From: Michael Tremer Date: Sat, 17 May 2008 21:58:21 +0000 (+0200) Subject: Imported the beginning of the rowie-spezial and possible 2.2 release. X-Git-Url: http://git.ipfire.org/?p=people%2Fteissler%2Fipfire-2.x.git;a=commitdiff_plain;h=4ce3790866ee506a29c7cdba3490f42051d2e8b7 Imported the beginning of the rowie-spezial and possible 2.2 release. --- diff --git a/config/kernel/kernel.config.i586 b/config/kernel/kernel.config.i586 index 1bf00c448..2d62a0dcb 100644 --- a/config/kernel/kernel.config.i586 +++ b/config/kernel/kernel.config.i586 @@ -1,16 +1,22 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.57-ipfire -# Wed Mar 26 10:02:36 2008 +# Linux kernel version: 2.6.20.21 +# Sat May 17 15:55:08 2008 # CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y CONFIG_SEMAPHORE_SLEEPERS=y CONFIG_X86=y CONFIG_MMU=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # # Code maturity level options @@ -26,17 +32,22 @@ CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y +# CONFIG_IPC_NS is not set CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set +# CONFIG_UTS_NS is not set CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y # CONFIG_IKCONFIG is not set +CONFIG_SYSFS_DEPRECATED=y +# CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" -CONFIG_UID16=y -CONFIG_VM86=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_HOTPLUG=y @@ -47,11 +58,9 @@ CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 CONFIG_SLAB=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -62,7 +71,6 @@ CONFIG_BASE_SMALL=0 CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y @@ -70,7 +78,10 @@ CONFIG_KMOD=y # # Block layer # +CONFIG_BLOCK=y # CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set # # IO Schedulers @@ -88,6 +99,7 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" # # Processor type and features # +# CONFIG_SMP is not set CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set @@ -97,6 +109,7 @@ CONFIG_X86_PC=y # CONFIG_X86_VISWS is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_ES7000 is not set +# CONFIG_PARAVIRT is not set # CONFIG_M386 is not set # CONFIG_M486 is not set CONFIG_M586=y @@ -106,6 +119,7 @@ CONFIG_M586=y # CONFIG_MPENTIUMII is not set # CONFIG_MPENTIUMIII is not set # CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set # CONFIG_MPENTIUM4 is not set # CONFIG_MK6 is not set # CONFIG_MK7 is not set @@ -124,6 +138,8 @@ CONFIG_X86_CMPXCHG=y CONFIG_X86_XADD=y CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_PPRO_FENCE=y CONFIG_X86_F00F_BUG=y @@ -136,13 +152,13 @@ CONFIG_X86_ALIGNMENT_16=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y -# CONFIG_SMP is not set CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_X86_UP_APIC is not set CONFIG_X86_MCE=y CONFIG_X86_MCE_NONFATAL=m +CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set CONFIG_X86_REBOOTFIXUPS=y @@ -159,15 +175,12 @@ CONFIG_X86_CPUID=y # CONFIG_NOHIGHMEM is not set CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set -CONFIG_VMSPLIT_3G=y -# CONFIG_VMSPLIT_3G_OPT is not set -# CONFIG_VMSPLIT_2G is not set -# CONFIG_VMSPLIT_1G is not set CONFIG_PAGE_OFFSET=0xC0000000 CONFIG_HIGHMEM=y CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -176,20 +189,23 @@ CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_SPARSEMEM_STATIC=y CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set # CONFIG_HIGHPTE is not set CONFIG_MATH_EMULATION=y # CONFIG_MTRR is not set # CONFIG_EFI is not set -# CONFIG_REGPARM is not set CONFIG_SECCOMP=y CONFIG_HZ_100=y # CONFIG_HZ_250 is not set +# CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 # CONFIG_KEXEC is not set # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 -CONFIG_DOUBLEFAULT=y +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x100000 +CONFIG_COMPAT_VDSO=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # @@ -198,6 +214,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_PM=y CONFIG_PM_LEGACY=y # CONFIG_PM_DEBUG is not set +# CONFIG_PM_SYSFS_DEPRECATED is not set # CONFIG_SOFTWARE_SUSPEND is not set # @@ -213,6 +230,7 @@ CONFIG_ACPI_BUTTON=m CONFIG_ACPI_VIDEO=m # CONFIG_ACPI_HOTKEY is not set CONFIG_ACPI_FAN=m +# CONFIG_ACPI_DOCK is not set CONFIG_ACPI_PROCESSOR=m CONFIG_ACPI_THERMAL=m # CONFIG_ACPI_ASUS is not set @@ -226,6 +244,7 @@ CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y # CONFIG_ACPI_CONTAINER is not set +# CONFIG_ACPI_SBS is not set # # APM (Advanced Power Management) BIOS Support @@ -273,6 +292,7 @@ CONFIG_X86_SPEEDSTEP_SMI=m CONFIG_X86_P4_CLOCKMOD=m CONFIG_X86_CPUFREQ_NFORCE2=m CONFIG_X86_LONGRUN=m +# CONFIG_X86_LONGHAUL is not set # # shared options @@ -293,7 +313,7 @@ CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_PCIEPORTBUS=y -CONFIG_PCI_LEGACY_PROC=y +CONFIG_PCIEAER=y CONFIG_ISA_DMA_API=y CONFIG_ISA=y CONFIG_EISA=y @@ -358,6 +378,7 @@ CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m +# CONFIG_XFRM_SUB_POLICY is not set CONFIG_NET_KEY=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -366,7 +387,6 @@ CONFIG_ASK_IP_FIB_HASH=y # CONFIG_IP_FIB_TRIE is not set CONFIG_IP_FIB_HASH=y CONFIG_IP_MULTIPLE_TABLES=y -# CONFIG_IP_ROUTE_FWMARK is not set CONFIG_IP_ROUTE_MULTIPATH=y # CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set CONFIG_IP_ROUTE_VERBOSE=y @@ -383,14 +403,14 @@ CONFIG_IPSEC_NAT_TRAVERSAL=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y CONFIG_INET_DIAG=m CONFIG_INET_TCP_DIAG=m CONFIG_TCP_CONG_ADVANCED=y - -# -# TCP congestion control -# CONFIG_TCP_CONG_BIC=m CONFIG_TCP_CONG_CUBIC=m CONFIG_TCP_CONG_WESTWOOD=m @@ -399,12 +419,25 @@ CONFIG_TCP_CONG_HSTCP=m CONFIG_TCP_CONG_HYBLA=m CONFIG_TCP_CONG_VEGAS=m CONFIG_TCP_CONG_SCALABLE=m +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_DEFAULT_BIC is not set +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +CONFIG_DEFAULT_RENO=y +CONFIG_DEFAULT_TCP_CONG="reno" +# CONFIG_TCP_MD5SIG is not set # # IP: Virtual Server Configuration # # CONFIG_IP_VS is not set # CONFIG_IPV6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_NETWORK_SECMARK is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set CONFIG_BRIDGE_NETFILTER=y @@ -415,95 +448,60 @@ CONFIG_BRIDGE_NETFILTER=y CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_NETLINK_LOG=m +# CONFIG_NF_CONNTRACK_ENABLED is not set CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m CONFIG_NETFILTER_XT_MATCH_MAC=m CONFIG_NETFILTER_XT_MATCH_MARK=m +# CONFIG_NETFILTER_XT_MATCH_POLICY is not set +# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set CONFIG_NETFILTER_XT_MATCH_REALM=m CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_STATE=m +# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set # # IP: Netfilter Configuration # -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_CT_ACCT=y -CONFIG_IP_NF_CONNTRACK_MARK=y -CONFIG_IP_NF_CONNTRACK_EVENTS=y -CONFIG_IP_NF_CONNTRACK_NETLINK=m -CONFIG_IP_NF_CT_PROTO_SCTP=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_NETBIOS_NS=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_PPTP=m -CONFIG_IP_NF_H323=m CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_IPRANGE=m -CONFIG_IP_NF_MATCH_LAYER7=m -# CONFIG_IP_NF_MATCH_LAYER7_DEBUG is not set -CONFIG_IP_NF_MATCH_MULTIPORT=m CONFIG_IP_NF_MATCH_TOS=m CONFIG_IP_NF_MATCH_RECENT=m CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m +# CONFIG_IP_NF_MATCH_AH is not set CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_MATCH_OWNER=m CONFIG_IP_NF_MATCH_ADDRTYPE=m -CONFIG_IP_NF_MATCH_HASHLIMIT=m -CONFIG_IP_NF_MATCH_POLICY=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_SAME=m -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_PPTP=m -CONFIG_IP_NF_NAT_H323=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_IMQ=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_NAT_MMS=m -CONFIG_IP_NF_MMS=m -CONFIG_IP_NF_NAT_SIP=m -CONFIG_IP_NF_SIP=m # # Bridge: Netfilter Configuration @@ -552,12 +550,12 @@ CONFIG_ATM_BR2684=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m # CONFIG_DECNET is not set +CONFIG_LLC=m # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -565,6 +563,7 @@ CONFIG_VLAN_8021Q=m # QoS and/or fair queueing # CONFIG_NET_SCHED=y +CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_CLK_JIFFIES=y # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set # CONFIG_NET_SCH_CLK_CPU is not set @@ -627,7 +626,12 @@ CONFIG_NET_ESTIMATOR=y CONFIG_IEEE80211=m # CONFIG_IEEE80211_DEBUG is not set CONFIG_IEEE80211_CRYPT_WEP=m -# CONFIG_IEEE80211_CRYPT_CCMP is not set +CONFIG_IEEE80211_CRYPT_CCMP=m +CONFIG_IEEE80211_CRYPT_TKIP=m +CONFIG_IEEE80211_SOFTMAC=m +# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set +CONFIG_WIRELESS_EXT=y +CONFIG_FIB_RULES=y CONFIG_KLIPS=m # @@ -637,6 +641,7 @@ CONFIG_KLIPS_ESP=y CONFIG_KLIPS_AH=y CONFIG_KLIPS_AUTH_HMAC_MD5=y CONFIG_KLIPS_AUTH_HMAC_SHA1=y +CONFIG_KLIPS_ALG=y CONFIG_KLIPS_ENC_CRYPTOAPI=y CONFIG_KLIPS_ENC_1DES=y CONFIG_KLIPS_ENC_3DES=y @@ -655,6 +660,7 @@ CONFIG_KLIPS_DEBUG=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_FW_LOADER=m +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -669,18 +675,19 @@ CONFIG_MTD=m CONFIG_MTD_CONCAT=m CONFIG_MTD_PARTITIONS=y # CONFIG_MTD_REDBOOT_PARTS is not set -CONFIG_MTD_CMDLINE_PARTS=y # # User Modules And Translation Layers # CONFIG_MTD_CHAR=m +CONFIG_MTD_BLKDEVS=m CONFIG_MTD_BLOCK=m # CONFIG_MTD_BLOCK_RO is not set # CONFIG_FTL is not set # CONFIG_NFTL is not set # CONFIG_INFTL is not set # CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set # # RAM/ROM/Flash chip drivers @@ -716,7 +723,6 @@ CONFIG_MTD_CFI_I2=y # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_PHRAM is not set # CONFIG_MTD_MTDRAM is not set -# CONFIG_MTD_BLKMTD is not set # CONFIG_MTD_BLOCK2MTD is not set # @@ -730,6 +736,7 @@ CONFIG_MTD_CFI_I2=y # NAND Flash Device Drivers # # CONFIG_MTD_NAND is not set +# CONFIG_MTD_NAND_CAFE is not set # # OneNAND Flash Device Drivers @@ -746,6 +753,7 @@ CONFIG_PARPORT_PC=m # CONFIG_PARPORT_PC_SUPERIO is not set CONFIG_PARPORT_PC_PCMCIA=m # CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_AX88796 is not set # CONFIG_PARPORT_1284 is not set # @@ -782,10 +790,18 @@ CONFIG_BLK_DEV_UB=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set +# +# Misc devices +# +# CONFIG_IBM_ASM is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set + # # ATA/ATAPI/MFM/RLL support # @@ -833,10 +849,11 @@ CONFIG_BLK_DEV_TRIFLEX=m CONFIG_BLK_DEV_CY82C693=m CONFIG_BLK_DEV_CS5520=m CONFIG_BLK_DEV_CS5530=m -# CONFIG_BLK_DEV_CS5535 is not set +CONFIG_BLK_DEV_CS5535=m CONFIG_BLK_DEV_HPT34X=m # CONFIG_HPT34X_AUTODMA is not set CONFIG_BLK_DEV_HPT366=m +CONFIG_BLK_DEV_JMICRON=m CONFIG_BLK_DEV_SC1200=m CONFIG_BLK_DEV_PIIX=m CONFIG_BLK_DEV_IT821X=m @@ -872,6 +889,8 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=y CONFIG_SCSI=m +# CONFIG_SCSI_TGT is not set +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # @@ -891,14 +910,17 @@ CONFIG_CHR_DEV_SCH=m CONFIG_SCSI_MULTI_LUN=y # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set # -# SCSI Transport Attributes +# SCSI Transports # CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=m -# CONFIG_SCSI_ISCSI_ATTRS is not set +CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SAS_LIBSAS_DEBUG=y # # SCSI low-level drivers @@ -929,6 +951,8 @@ CONFIG_AIC79XX_RESET_DELAY_MS=15000 # CONFIG_AIC79XX_DEBUG_ENABLE is not set CONFIG_AIC79XX_DEBUG_MASK=0 # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +CONFIG_SCSI_AIC94XX=m +# CONFIG_AIC94XX_DEBUG is not set CONFIG_SCSI_DPT_I2O=m CONFIG_SCSI_ADVANSYS=m CONFIG_SCSI_IN2000=m @@ -938,24 +962,7 @@ CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=m -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -# CONFIG_SCSI_SATA_MV is not set -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m CONFIG_SCSI_HPTIOP=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_INTEL_COMBINED=y CONFIG_SCSI_BUSLOGIC=m # CONFIG_SCSI_OMIT_FLASHPOINT is not set CONFIG_SCSI_DMX3191D=m @@ -980,25 +987,27 @@ CONFIG_SCSI_INIA100=m # CONFIG_SCSI_IMM is not set CONFIG_SCSI_NCR53C406A=m CONFIG_SCSI_NCR_D700=m +CONFIG_SCSI_STEX=m CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_SYM53C8XX_MMIO=y # CONFIG_SCSI_IPR is not set CONFIG_SCSI_NCR_Q720=m CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 CONFIG_SCSI_NCR53C8XX_SYNC=20 # CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_MCA_53C9X is not set +CONFIG_SCSI_MCA_53C9X=m CONFIG_SCSI_PAS16=m CONFIG_SCSI_PSI240I=m -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_QLOGIC_FC is not set +CONFIG_SCSI_QLOGIC_FAS=m CONFIG_SCSI_QLOGIC_1280=m -# CONFIG_SCSI_QLA_FC is not set -# CONFIG_SCSI_LPFC is not set +CONFIG_SCSI_QLA_FC=m +CONFIG_SCSI_QLA_ISCSI=m +CONFIG_SCSI_LPFC=m +CONFIG_SCSI_SEAGATE=m CONFIG_SCSI_SIM710=m CONFIG_53C700_IO_MAPPED=y CONFIG_SCSI_SYM53C416=m @@ -1012,6 +1021,7 @@ CONFIG_SCSI_U14_34F_MAX_TAGS=8 CONFIG_SCSI_ULTRASTOR=m CONFIG_SCSI_NSP32=m # CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set # # PCMCIA SCSI adapter support @@ -1022,6 +1032,68 @@ CONFIG_PCMCIA_NINJA_SCSI=m CONFIG_PCMCIA_QLOGIC=m CONFIG_PCMCIA_SYM53C500=m +# +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=m +# CONFIG_ATA_NONSTANDARD is not set +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y +CONFIG_PATA_ALI=m +CONFIG_PATA_AMD=m +CONFIG_PATA_ARTOP=m +CONFIG_PATA_ATIIXP=m +CONFIG_PATA_CMD64X=m +CONFIG_PATA_CS5520=m +CONFIG_PATA_CS5530=m +CONFIG_PATA_CS5535=m +CONFIG_PATA_CYPRESS=m +CONFIG_PATA_EFAR=m +CONFIG_ATA_GENERIC=m +CONFIG_PATA_HPT366=m +CONFIG_PATA_HPT37X=m +CONFIG_PATA_HPT3X2N=m +CONFIG_PATA_HPT3X3=m +CONFIG_PATA_ISAPNP=m +CONFIG_PATA_IT821X=m +CONFIG_PATA_JMICRON=m +CONFIG_PATA_LEGACY=m +CONFIG_PATA_TRIFLEX=m +CONFIG_PATA_MARVELL=m +CONFIG_PATA_MPIIX=m +CONFIG_PATA_OLDPIIX=m +CONFIG_PATA_NETCELL=m +CONFIG_PATA_NS87410=m +CONFIG_PATA_OPTI=m +CONFIG_PATA_OPTIDMA=m +CONFIG_PATA_PCMCIA=m +CONFIG_PATA_PDC_OLD=m +CONFIG_PATA_QDI=m +CONFIG_PATA_RADISYS=m +CONFIG_PATA_RZ1000=m +CONFIG_PATA_SC1200=m +CONFIG_PATA_SERVERWORKS=m +CONFIG_PATA_PDC2027X=m +CONFIG_PATA_SIL680=m +CONFIG_PATA_SIS=m +CONFIG_PATA_VIA=m +CONFIG_PATA_WINBOND=m +CONFIG_PATA_WINBOND_VLB=m + # # Old CD-ROM drivers (not SCSI, not IDE) # @@ -1036,8 +1108,8 @@ CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m # CONFIG_MD_RAID10 is not set -CONFIG_MD_RAID5=m -# CONFIG_MD_RAID6 is not set +CONFIG_MD_RAID456=m +CONFIG_MD_RAID5_RESHAPE=y CONFIG_MD_MULTIPATH=m # CONFIG_MD_FAULTY is not set # CONFIG_BLK_DEV_DM is not set @@ -1075,7 +1147,7 @@ CONFIG_IEEE1394_OHCI1394=m # # Protocol Drivers # -# CONFIG_IEEE1394_VIDEO1394 is not set +CONFIG_IEEE1394_VIDEO1394=m CONFIG_IEEE1394_SBP2=m # CONFIG_IEEE1394_SBP2_PHYS_DMA is not set CONFIG_IEEE1394_ETH1394=m @@ -1087,6 +1159,11 @@ CONFIG_IEEE1394_ETH1394=m # # CONFIG_I2O is not set +# +# Macintosh device drivers +# +# CONFIG_MAC_EMUMOUSEBTN is not set + # # Network device support # @@ -1122,6 +1199,10 @@ CONFIG_DAVICOM_PHY=m CONFIG_QSEMI_PHY=m CONFIG_LXT_PHY=m CONFIG_CICADA_PHY=m +CONFIG_VITESSE_PHY=m +CONFIG_SMSC_PHY=m +CONFIG_BROADCOM_PHY=m +# CONFIG_FIXED_PHY is not set # # Ethernet (10 or 100Mbit) @@ -1150,7 +1231,7 @@ CONFIG_ULTRA=m CONFIG_ULTRA32=m CONFIG_SMC9194=m CONFIG_NET_VENDOR_RACAL=y -# CONFIG_NI5010 is not set +CONFIG_NI5010=m CONFIG_NI52=m CONFIG_NI65=m @@ -1188,6 +1269,7 @@ CONFIG_NE2_MCA=m CONFIG_IBMLANA=m CONFIG_NET_PCI=y CONFIG_PCNET32=m +# CONFIG_PCNET32_NAPI is not set CONFIG_AMD8111_ETH=m CONFIG_AMD8111E_NAPI=y CONFIG_ADAPTEC_STARFIRE=m @@ -1196,6 +1278,7 @@ CONFIG_AC3200=m CONFIG_APRICOT=m CONFIG_B44=m CONFIG_FORCEDETH=m +# CONFIG_FORCEDETH_NAPI is not set CONFIG_CS89x0=m CONFIG_DGRS=m CONFIG_EEPRO100=m @@ -1219,6 +1302,7 @@ CONFIG_SUNDANCE_MMIO=y CONFIG_TLAN=m CONFIG_VIA_RHINE=m CONFIG_VIA_RHINE_MMIO=y +# CONFIG_VIA_RHINE_NAPI is not set CONFIG_NET_POCKET=y CONFIG_ATP=m CONFIG_DE600=m @@ -1246,15 +1330,20 @@ CONFIG_SK98LIN=m CONFIG_VIA_VELOCITY=m CONFIG_TIGON3=m CONFIG_BNX2=m +CONFIG_QLA3XXX=m # # Ethernet (10000 Mbit) # CONFIG_CHELSIO_T1=m +CONFIG_CHELSIO_T1_1G=y +CONFIG_CHELSIO_T1_NAPI=y CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set +CONFIG_MYRI10GE=m +CONFIG_NETXEN_NIC=m # # Token Ring devices @@ -1264,7 +1353,67 @@ CONFIG_S2IO=m # # Wireless LAN (non-hamradio) # -# CONFIG_NET_RADIO is not set +CONFIG_NET_RADIO=y +CONFIG_NET_WIRELESS_RTNETLINK=y + +# +# Obsolete Wireless cards support (pre-802.11) +# +CONFIG_STRIP=m +CONFIG_ARLAN=m +CONFIG_WAVELAN=m +CONFIG_PCMCIA_WAVELAN=m +CONFIG_PCMCIA_NETWAVE=m + +# +# Wireless 802.11 Frequency Hopping cards support +# +CONFIG_PCMCIA_RAYCS=m + +# +# Wireless 802.11b ISA/PCI cards support +# +CONFIG_IPW2100=m +CONFIG_IPW2100_MONITOR=y +# CONFIG_IPW2100_DEBUG is not set +CONFIG_IPW2200=m +CONFIG_IPW2200_MONITOR=y +CONFIG_IPW2200_RADIOTAP=y +CONFIG_IPW2200_PROMISCUOUS=y +CONFIG_IPW2200_QOS=y +# CONFIG_IPW2200_DEBUG is not set +CONFIG_AIRO=m +CONFIG_HERMES=m +CONFIG_PLX_HERMES=m +CONFIG_TMD_HERMES=m +CONFIG_NORTEL_HERMES=m +CONFIG_PCI_HERMES=m +CONFIG_ATMEL=m +CONFIG_PCI_ATMEL=m + +# +# Wireless 802.11b Pcmcia/Cardbus cards support +# +CONFIG_PCMCIA_HERMES=m +CONFIG_PCMCIA_SPECTRUM=m +CONFIG_AIRO_CS=m +CONFIG_PCMCIA_ATMEL=m +CONFIG_PCMCIA_WL3501=m + +# +# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support +# +CONFIG_PRISM54=m +CONFIG_USB_ZD1201=m +CONFIG_HOSTAP=m +CONFIG_HOSTAP_FIRMWARE=y +# CONFIG_HOSTAP_FIRMWARE_NVRAM is not set +CONFIG_HOSTAP_PLX=m +CONFIG_HOSTAP_PCI=m +CONFIG_HOSTAP_CS=m +# CONFIG_BCM43XX is not set +# CONFIG_ZD1211RW is not set +CONFIG_NET_WIRELESS=y # # PCMCIA network device support @@ -1341,6 +1490,7 @@ CONFIG_PPPOE=m CONFIG_PPPOATM=m CONFIG_SLIP=m CONFIG_SLIP_COMPRESSED=y +CONFIG_SLHC=m CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y # CONFIG_NET_FC is not set @@ -1410,6 +1560,7 @@ CONFIG_MISDN_L1OIP=y # Input device support # CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set # # Userland interfaces @@ -1432,6 +1583,7 @@ CONFIG_KEYBOARD_SUNKBD=m CONFIG_KEYBOARD_LKKBD=m CONFIG_KEYBOARD_XTKBD=m CONFIG_KEYBOARD_NEWTON=m +# CONFIG_KEYBOARD_STOWAWAY is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TOUCHSCREEN is not set @@ -1459,25 +1611,28 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_SERIAL_NONSTANDARD is not set # # Serial drivers # CONFIG_SERIAL_8250=m +CONFIG_SERIAL_8250_PCI=m +CONFIG_SERIAL_8250_PNP=m CONFIG_SERIAL_8250_CS=m -# CONFIG_SERIAL_8250_ACPI is not set CONFIG_SERIAL_8250_NR_UARTS=4 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -CONFIG_SERIAL_8250_DETECT_IRQ=y -CONFIG_SERIAL_8250_RSA=y # CONFIG_SERIAL_8250_FOURPORT is not set # CONFIG_SERIAL_8250_ACCENT is not set # CONFIG_SERIAL_8250_BOCA is not set +# CONFIG_SERIAL_8250_EXAR_ST16C554 is not set # CONFIG_SERIAL_8250_HUB6 is not set +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y # CONFIG_SERIAL_8250_MCA is not set # @@ -1524,11 +1679,15 @@ CONFIG_IBMASR=m CONFIG_WAFER_WDT=m CONFIG_I6300ESB_WDT=m CONFIG_I8XX_TCO=m +# CONFIG_ITCO_WDT is not set CONFIG_SC1200_WDT=m +# CONFIG_PC87413_WDT is not set CONFIG_60XX_WDT=m CONFIG_SBC8360_WDT=m CONFIG_CPU5_WDT=m +# CONFIG_SMSC37B787_WDT is not set CONFIG_W83627HF_WDT=m +# CONFIG_W83697HF_WDT is not set CONFIG_W83877F_WDT=m CONFIG_W83977F_WDT=m CONFIG_MACHZ_WDT=m @@ -1553,18 +1712,27 @@ CONFIG_WDT_501_PCI=y # CONFIG_USBPCWATCHDOG=m CONFIG_HW_RANDOM=m +CONFIG_HW_RANDOM_INTEL=m +CONFIG_HW_RANDOM_AMD=m +CONFIG_HW_RANDOM_GEODE=m +CONFIG_HW_RANDOM_VIA=m # CONFIG_NVRAM is not set CONFIG_RTC=y # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set +CONFIG_AGP=m +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_ATI is not set +# CONFIG_AGP_AMD is not set +# CONFIG_AGP_AMD64 is not set +CONFIG_AGP_INTEL=m +# CONFIG_AGP_NVIDIA is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_EFFICEON is not set # CONFIG_DRM is not set # @@ -1574,6 +1742,8 @@ CONFIG_SYNCLINK_CS=m # CONFIG_CARDMAN_4000 is not set # CONFIG_CARDMAN_4040 is not set CONFIG_MWAVE=m +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set CONFIG_CS5535_GPIO=m # CONFIG_RAW_DRIVER is not set CONFIG_HPET=y @@ -1613,6 +1783,7 @@ CONFIG_I2C_ALGOBIT=m # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set # CONFIG_I2C_PARPORT is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -1636,9 +1807,7 @@ CONFIG_I2C_ALGOBIT=m # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set -# CONFIG_SENSORS_RTC8564 is not set # CONFIG_SENSORS_MAX6875 is not set -# CONFIG_RTC_X1205_I2C is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -1654,19 +1823,28 @@ CONFIG_I2C_ALGOBIT=m # Dallas's 1-wire bus # CONFIG_W1=m -CONFIG_W1_MATROX=m -CONFIG_W1_DS9490=m -CONFIG_W1_DS9490_BRIDGE=m -CONFIG_W1_THERM=m -CONFIG_W1_SMEM=m -CONFIG_W1_DS2433=m -CONFIG_W1_DS2433_CRC=y +CONFIG_W1_CON=y + +# +# 1-wire Bus Masters +# +# CONFIG_W1_MASTER_MATROX is not set +# CONFIG_W1_MASTER_DS2490 is not set +# CONFIG_W1_MASTER_DS2482 is not set + +# +# 1-wire Slaves +# +# CONFIG_W1_SLAVE_THERM is not set +# CONFIG_W1_SLAVE_SMEM is not set +# CONFIG_W1_SLAVE_DS2433 is not set # # Hardware Monitoring support # CONFIG_HWMON=m # CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set # CONFIG_SENSORS_ADM1021 is not set # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set @@ -1700,43 +1878,42 @@ CONFIG_SENSORS_PC87427=m # CONFIG_SENSORS_SMSC47M192 is not set # CONFIG_SENSORS_SMSC47B397 is not set # CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set # CONFIG_SENSORS_VT8231 is not set # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83791D is not set # CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set # CONFIG_SENSORS_W83627EHF is not set CONFIG_SENSORS_HDAPS=m # CONFIG_HWMON_DEBUG_CHIP is not set -# -# Misc devices -# -# CONFIG_IBM_ASM is not set - -# -# Multimedia Capabilities Port drivers -# - # # Multimedia devices # CONFIG_VIDEO_DEV=m +CONFIG_VIDEO_V4L1=y +CONFIG_VIDEO_V4L1_COMPAT=y +CONFIG_VIDEO_V4L2=y # -# Video For Linux +# Video Capture Adapters # # -# Video Adapters +# Video Capture Adapters # # CONFIG_VIDEO_ADV_DEBUG is not set +CONFIG_VIDEO_HELPER_CHIPS_AUTO=y +# CONFIG_VIDEO_VIVI is not set # CONFIG_VIDEO_BT848 is not set # CONFIG_VIDEO_PMS is not set # CONFIG_VIDEO_BWQCAM is not set # CONFIG_VIDEO_CQCAM is not set # CONFIG_VIDEO_CPIA is not set +# CONFIG_VIDEO_CPIA2 is not set # CONFIG_VIDEO_SAA5246A is not set # CONFIG_VIDEO_SAA5249 is not set # CONFIG_TUNER_3036 is not set @@ -1748,11 +1925,27 @@ CONFIG_VIDEO_DEV=m # CONFIG_VIDEO_HEXIUM_ORION is not set # CONFIG_VIDEO_HEXIUM_GEMINI is not set # CONFIG_VIDEO_CX88 is not set -CONFIG_VIDEO_CX88_VP3054=m +# CONFIG_VIDEO_CAFE_CCIC is not set + +# +# V4L USB devices +# +# CONFIG_VIDEO_PVRUSB2 is not set # CONFIG_VIDEO_EM28XX is not set +# CONFIG_VIDEO_USBVISION is not set +# CONFIG_USB_VICAM is not set +# CONFIG_USB_IBMCAM is not set +# CONFIG_USB_KONICAWC is not set +# CONFIG_USB_QUICKCAM_MESSENGER is not set +# CONFIG_USB_ET61X251 is not set # CONFIG_VIDEO_OVCAMCHIP is not set -# CONFIG_VIDEO_AUDIO_DECODER is not set -# CONFIG_VIDEO_DECODER is not set +# CONFIG_USB_W9968CF is not set +# CONFIG_USB_OV511 is not set +# CONFIG_USB_SE401 is not set +# CONFIG_USB_SN9C102 is not set +# CONFIG_USB_STV680 is not set +# CONFIG_USB_ZC0301 is not set +# CONFIG_USB_PWC is not set # # Radio Adapters @@ -1771,160 +1964,91 @@ CONFIG_VIDEO_CX88_VP3054=m # CONFIG_RADIO_TRUST is not set # CONFIG_RADIO_TYPHOON is not set # CONFIG_RADIO_ZOLTRIX is not set +# CONFIG_USB_DSBR is not set # # Digital Video Broadcasting Devices # CONFIG_DVB=y -CONFIG_DVB_CORE=m - -# -# Supported SAA7146 based PCI Adapters -# -CONFIG_DVB_AV7110=m -# CONFIG_DVB_AV7110_FIRMWARE is not set -CONFIG_DVB_AV7110_OSD=y -CONFIG_DVB_BUDGET=m -CONFIG_DVB_BUDGET_CI=m -CONFIG_DVB_BUDGET_AV=m -CONFIG_DVB_BUDGET_PATCH=m - -# -# Supported USB Adapters -# -CONFIG_DVB_USB=m -# CONFIG_DVB_USB_DEBUG is not set -CONFIG_DVB_USB_A800=m -CONFIG_DVB_USB_DIBUSB_MB=m -# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set -CONFIG_DVB_USB_DIBUSB_MC=m -CONFIG_DVB_USB_UMT_010=m -CONFIG_DVB_USB_CXUSB=m -CONFIG_DVB_USB_DIGITV=m -CONFIG_DVB_USB_VP7045=m -CONFIG_DVB_USB_VP702X=m -CONFIG_DVB_USB_NOVA_T_USB2=m -CONFIG_DVB_USB_DTT200U=m -CONFIG_DVB_TTUSB_BUDGET=m -CONFIG_DVB_TTUSB_DEC=m -CONFIG_DVB_CINERGYT2=m -# CONFIG_DVB_CINERGYT2_TUNING is not set - -# -# Supported FlexCopII (B2C2) Adapters -# -CONFIG_DVB_B2C2_FLEXCOP=m -CONFIG_DVB_B2C2_FLEXCOP_PCI=m -CONFIG_DVB_B2C2_FLEXCOP_USB=m -# CONFIG_DVB_B2C2_FLEXCOP_DEBUG is not set - -# -# Supported BT878 Adapters -# - -# -# Supported Pluto2 Adapters -# -CONFIG_DVB_PLUTO2=m - -# -# Supported DVB Frontends -# - -# -# Customise DVB Frontends -# - -# -# DVB-S (satellite) frontends -# -CONFIG_DVB_STV0299=m -CONFIG_DVB_CX24110=m -CONFIG_DVB_CX24123=m -CONFIG_DVB_TDA8083=m -CONFIG_DVB_MT312=m -CONFIG_DVB_VES1X93=m -CONFIG_DVB_S5H1420=m - -# -# DVB-T (terrestrial) frontends -# -CONFIG_DVB_SP8870=m -CONFIG_DVB_SP887X=m -CONFIG_DVB_CX22700=m -CONFIG_DVB_CX22702=m -CONFIG_DVB_L64781=m -CONFIG_DVB_TDA1004X=m -CONFIG_DVB_NXT6000=m -CONFIG_DVB_MT352=m -CONFIG_DVB_DIB3000MB=m -CONFIG_DVB_DIB3000MC=m - -# -# DVB-C (cable) frontends -# -CONFIG_DVB_VES1820=m -CONFIG_DVB_TDA10021=m -CONFIG_DVB_STV0297=m - -# -# ATSC (North American/Korean Terresterial DTV) frontends -# -CONFIG_DVB_NXT200X=m -CONFIG_DVB_OR51211=m -CONFIG_DVB_OR51132=m -CONFIG_DVB_BCM3510=m -CONFIG_DVB_LGDT330X=m -CONFIG_VIDEO_SAA7146=m -CONFIG_VIDEO_SAA7146_VV=m -CONFIG_VIDEO_VIDEOBUF=m -CONFIG_VIDEO_BUF=m +# CONFIG_DVB_CORE is not set +# CONFIG_USB_DABUSB is not set # # Graphics support # +CONFIG_FIRMWARE_EDID=y CONFIG_FB=y +CONFIG_FB_DDC=m CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y # CONFIG_FB_MACMODES is not set -# CONFIG_FB_MODE_HELPERS is not set -# CONFIG_FB_TILEBLITTING is not set -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_CIRRUS=m +CONFIG_FB_PM2=m +CONFIG_FB_PM2_FIFO_DISCONNECT=y +CONFIG_FB_CYBER2000=m +CONFIG_FB_ARC=m +CONFIG_FB_ASILIANT=y +CONFIG_FB_IMSTT=y # CONFIG_FB_VGA16 is not set CONFIG_FB_VESA=y -CONFIG_VIDEO_SELECT=y # CONFIG_FB_HGA is not set # CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_I810 is not set -# CONFIG_FB_INTEL is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON_OLD is not set -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_CYBLA is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_GEODE is not set +CONFIG_FB_NVIDIA=m +CONFIG_FB_NVIDIA_I2C=y +CONFIG_FB_RIVA=m +CONFIG_FB_RIVA_I2C=y +# CONFIG_FB_RIVA_DEBUG is not set +CONFIG_FB_I810=m +CONFIG_FB_I810_GTF=y +CONFIG_FB_I810_I2C=y +CONFIG_FB_INTEL=m +# CONFIG_FB_INTEL_DEBUG is not set +CONFIG_FB_INTEL_I2C=y +CONFIG_FB_MATROX=m +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G=y +CONFIG_FB_MATROX_I2C=m +CONFIG_FB_MATROX_MAVEN=m +CONFIG_FB_MATROX_MULTIHEAD=y +CONFIG_FB_RADEON=m +CONFIG_FB_RADEON_I2C=y +# CONFIG_FB_RADEON_DEBUG is not set +CONFIG_FB_ATY128=m +CONFIG_FB_ATY=m +CONFIG_FB_ATY_CT=y +# CONFIG_FB_ATY_GENERIC_LCD is not set +CONFIG_FB_ATY_GX=y +CONFIG_FB_SAVAGE=m +CONFIG_FB_SAVAGE_I2C=y +CONFIG_FB_SAVAGE_ACCEL=y +CONFIG_FB_SIS=m +CONFIG_FB_SIS_300=y +CONFIG_FB_SIS_315=y +CONFIG_FB_NEOMAGIC=m +CONFIG_FB_KYRO=m +CONFIG_FB_3DFX=m +CONFIG_FB_3DFX_ACCEL=y +CONFIG_FB_VOODOO1=m +CONFIG_FB_CYBLA=m +CONFIG_FB_TRIDENT=m +CONFIG_FB_TRIDENT_ACCEL=y +CONFIG_FB_GEODE=y +CONFIG_FB_GEODE_GX=m +# CONFIG_FB_GEODE_GX_SET_FBSIZE is not set +CONFIG_FB_GEODE_GX1=m # CONFIG_FB_VIRTUAL is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +CONFIG_VIDEO_SELECT=y # CONFIG_MDA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE=y @@ -1960,11 +2084,13 @@ CONFIG_SND_SEQUENCER=m CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m +CONFIG_SND_PCM_OSS_PLUGINS=y # CONFIG_SND_SEQUENCER_OSS is not set CONFIG_SND_RTCTIMER=m CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y # CONFIG_SND_DYNAMIC_MINORS is not set # CONFIG_SND_SUPPORT_OLD_API is not set +CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set @@ -1976,10 +2102,10 @@ CONFIG_SND_OPL3_LIB=m CONFIG_SND_OPL4_LIB=m CONFIG_SND_VX_LIB=m CONFIG_SND_AC97_CODEC=m -CONFIG_SND_AC97_BUS=m CONFIG_SND_DUMMY=m # CONFIG_SND_VIRMIDI is not set CONFIG_SND_MTPAV=m +CONFIG_SND_MTS64=m CONFIG_SND_SERIAL_U16550=m CONFIG_SND_MPU401=m @@ -1988,6 +2114,7 @@ CONFIG_SND_MPU401=m # CONFIG_SND_AD1848_LIB=m CONFIG_SND_CS4231_LIB=m +# CONFIG_SND_ADLIB is not set CONFIG_SND_AD1816A=m CONFIG_SND_AD1848=m CONFIG_SND_ALS100=m @@ -2010,6 +2137,7 @@ CONFIG_SND_OPL3SA2=m CONFIG_SND_OPTI92X_AD1848=m CONFIG_SND_OPTI92X_CS4231=m CONFIG_SND_OPTI93X=m +CONFIG_SND_MIRO=m CONFIG_SND_SB8=m CONFIG_SND_SB16=m CONFIG_SND_SBAWE=m @@ -2022,6 +2150,7 @@ CONFIG_SND_WAVEFRONT=m # PCI devices # CONFIG_SND_AD1889=m +CONFIG_SND_ALS300=m CONFIG_SND_ALS4000=m CONFIG_SND_ALI5451=m CONFIG_SND_ATIIXP=m @@ -2031,12 +2160,12 @@ CONFIG_SND_AU8820=m CONFIG_SND_AU8830=m CONFIG_SND_AZT3328=m CONFIG_SND_BT87X=m -# CONFIG_SND_BT87X_OVERCLOCK is not set +CONFIG_SND_BT87X_OVERCLOCK=y CONFIG_SND_CA0106=m CONFIG_SND_CMIPCI=m CONFIG_SND_CS4281=m CONFIG_SND_CS46XX=m -# CONFIG_SND_CS46XX_NEW_DSP is not set +CONFIG_SND_CS46XX_NEW_DSP=y CONFIG_SND_CS5535AUDIO=m CONFIG_SND_DARLA20=m CONFIG_SND_GINA20=m @@ -2057,7 +2186,8 @@ CONFIG_SND_ENS1371=m CONFIG_SND_ES1938=m CONFIG_SND_ES1968=m CONFIG_SND_FM801=m -# CONFIG_SND_FM801_TEA575X_BOOL is not set +CONFIG_SND_FM801_TEA575X_BOOL=y +CONFIG_SND_FM801_TEA575X=m CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m @@ -2070,6 +2200,7 @@ CONFIG_SND_MAESTRO3=m CONFIG_SND_MIXART=m CONFIG_SND_NM256=m CONFIG_SND_PCXHR=m +CONFIG_SND_RIPTIDE=m CONFIG_SND_RME32=m CONFIG_SND_RME96=m CONFIG_SND_RME9652=m @@ -2079,6 +2210,7 @@ CONFIG_SND_VIA82XX=m CONFIG_SND_VIA82XX_MODEM=m CONFIG_SND_VX222=m CONFIG_SND_YMFPCI=m +# CONFIG_SND_AC97_POWER_SAVE is not set # # USB devices @@ -2096,12 +2228,19 @@ CONFIG_SND_PDAUDIOCF=m # Open Sound System # # CONFIG_SOUND_PRIME is not set +CONFIG_AC97_BUS=m + +# +# HID Devices +# +CONFIG_HID=y # # USB support # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=m # CONFIG_USB_DEBUG is not set @@ -2120,6 +2259,7 @@ CONFIG_USB_SUSPEND=y CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set CONFIG_USB_ISP116X_HCD=m CONFIG_USB_OHCI_HCD=m # CONFIG_USB_OHCI_BIG_ENDIAN is not set @@ -2131,7 +2271,6 @@ CONFIG_USB_SL811_HCD=m # # USB Device Class drivers # -# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set CONFIG_USB_ACM=m CONFIG_USB_PRINTER=m @@ -2153,13 +2292,13 @@ CONFIG_USB_STORAGE_SDDR09=y CONFIG_USB_STORAGE_SDDR55=y CONFIG_USB_STORAGE_JUMPSHOT=y CONFIG_USB_STORAGE_ALAUDA=y +# CONFIG_USB_STORAGE_KARMA is not set # CONFIG_USB_LIBUSUAL is not set # # USB Input Devices # CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y # CONFIG_USB_HIDINPUT_POWERBOOK is not set # CONFIG_HID_FF is not set CONFIG_USB_HIDDEV=y @@ -2174,9 +2313,7 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_ACECAD is not set # CONFIG_USB_KBTAB is not set # CONFIG_USB_POWERMATE is not set -# CONFIG_USB_MTOUCH is not set -# CONFIG_USB_ITMTOUCH is not set -# CONFIG_USB_EGALAX is not set +# CONFIG_USB_TOUCHSCREEN is not set # CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set @@ -2190,21 +2327,6 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set -# -# USB Multimedia devices -# -# CONFIG_USB_DABUSB is not set -# CONFIG_USB_VICAM is not set -# CONFIG_USB_DSBR is not set -# CONFIG_USB_ET61X251 is not set -# CONFIG_USB_IBMCAM is not set -# CONFIG_USB_KONICAWC is not set -# CONFIG_USB_OV511 is not set -# CONFIG_USB_SE401 is not set -# CONFIG_USB_SN9C102 is not set -# CONFIG_USB_STV680 is not set -# CONFIG_USB_PWC is not set - # # USB Network Adapters # @@ -2212,12 +2334,14 @@ CONFIG_USB_CATC=m CONFIG_USB_KAWETH=m CONFIG_USB_PEGASUS=m CONFIG_USB_RTL8150=m +CONFIG_USB_USBNET_MII=m CONFIG_USB_USBNET=m CONFIG_USB_NET_AX8817X=m CONFIG_USB_NET_CDCETHER=m CONFIG_USB_NET_GL620A=m CONFIG_USB_NET_NET1080=m CONFIG_USB_NET_PLUSB=m +# CONFIG_USB_NET_MCS7830 is not set CONFIG_USB_NET_RNDIS_HOST=m CONFIG_USB_NET_CDC_SUBSET=m CONFIG_USB_ALI_M5632=y @@ -2243,17 +2367,21 @@ CONFIG_USB_NET_ZAURUS=m # CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m +# CONFIG_USB_ADUTUX is not set # CONFIG_USB_AUERSWALD is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set CONFIG_USB_LCD=m CONFIG_USB_LED=m +# CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGETKIT is not set -# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set # CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_TEST is not set # @@ -2276,7 +2404,22 @@ CONFIG_USB_XUSBATM=m CONFIG_MMC=m # CONFIG_MMC_DEBUG is not set CONFIG_MMC_BLOCK=m +# CONFIG_MMC_SDHCI is not set CONFIG_MMC_WBSD=m +# CONFIG_MMC_TIFM_SD is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# # # InfiniBand support @@ -2288,6 +2431,29 @@ CONFIG_MMC_WBSD=m # # CONFIG_EDAC is not set +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# + +# +# Virtualization +# +# CONFIG_KVM is not set + # # File systems # @@ -2300,6 +2466,7 @@ CONFIG_EXT3_FS=m CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4DEV_FS is not set CONFIG_JBD=m # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=m @@ -2318,15 +2485,16 @@ CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_FS_POSIX_ACL=y CONFIG_XFS_FS=m -CONFIG_XFS_EXPORT=y CONFIG_XFS_QUOTA=y CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set +# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set CONFIG_QUOTACTL=y CONFIG_DNOTIFY=y @@ -2358,12 +2526,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y # CONFIG_PROC_KCORE is not set +CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_HUGETLBFS is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y -# CONFIG_RELAYFS_FS is not set # CONFIG_CONFIGFS_FS is not set # @@ -2376,11 +2545,11 @@ CONFIG_RAMFS=y # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set CONFIG_JFFS2_FS=m CONFIG_JFFS2_FS_DEBUG=0 CONFIG_JFFS2_FS_WRITEBUFFER=y # CONFIG_JFFS2_SUMMARY is not set +# CONFIG_JFFS2_FS_XATTR is not set CONFIG_JFFS2_COMPRESSION_OPTIONS=y CONFIG_JFFS2_ZLIB=y CONFIG_JFFS2_RTIME=y @@ -2392,7 +2561,6 @@ CONFIG_JFFS2_CMODE_PRIORITY=y CONFIG_SQUASHFS=y # CONFIG_SQUASHFS_EMBEDDED is not set CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 -# CONFIG_SQUASHFS_VMALLOC is not set # CONFIG_VXFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set @@ -2425,8 +2593,10 @@ CONFIG_SMB_FS=m CONFIG_CIFS=m CONFIG_CIFS_STATS=y CONFIG_CIFS_STATS2=y +# CONFIG_CIFS_WEAK_PW_HASH is not set CONFIG_CIFS_XATTR=y # CONFIG_CIFS_POSIX is not set +# CONFIG_CIFS_DEBUG2 is not set # CONFIG_CIFS_EXPERIMENTAL is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set @@ -2483,6 +2653,11 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set + # # Instrumentation Support # @@ -2492,12 +2667,18 @@ CONFIG_NLS_UTF8=y # # Kernel hacking # +CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_MUST_CHECK=y # CONFIG_MAGIC_SYSRQ is not set +CONFIG_UNUSED_SYMBOLS=y +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set # CONFIG_DEBUG_KERNEL is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_DEBUG_BUGVERBOSE=y CONFIG_EARLY_PRINTK=y +CONFIG_DOUBLEFAULT=y # # Security options @@ -2509,7 +2690,12 @@ CONFIG_EARLY_PRINTK=y # Cryptographic options # CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=m @@ -2518,9 +2704,15 @@ CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m # CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_GF128MUL is not set +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_CBC=m +# CONFIG_CRYPTO_LRW is not set CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m +CONFIG_CRYPTO_TWOFISH_586=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_AES_586=m @@ -2531,7 +2723,7 @@ CONFIG_CRYPTO_ARC4=m # CONFIG_CRYPTO_KHAZAD is not set # CONFIG_CRYPTO_ANUBIS is not set CONFIG_CRYPTO_DEFLATE=y -# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_CRC32C=m # CONFIG_CRYPTO_TEST is not set @@ -2539,21 +2731,27 @@ CONFIG_CRYPTO_CRC32C=m # Hardware crypto devices # CONFIG_CRYPTO_DEV_PADLOCK=m -CONFIG_CRYPTO_DEV_PADLOCK_AES=y +CONFIG_CRYPTO_DEV_PADLOCK_AES=m +CONFIG_CRYPTO_DEV_PADLOCK_SHA=m +CONFIG_CRYPTO_DEV_GEODE=m # # Library routines # +CONFIG_BITREVERSE=y CONFIG_CRC_CCITT=m CONFIG_CRC16=m CONFIG_CRC32=y CONFIG_LIBCRC32C=m +CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_TEXTSEARCH=y CONFIG_TEXTSEARCH_KMP=m CONFIG_TEXTSEARCH_BM=m CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_IOMAP_COPY=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_X86_BIOS_REBOOT=y diff --git a/config/kernel/kernel.config.i586.smp b/config/kernel/kernel.config.i586.smp index 9b6362345..4f47ddfa6 100644 --- a/config/kernel/kernel.config.i586.smp +++ b/config/kernel/kernel.config.i586.smp @@ -1,16 +1,22 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.57-ipfire -# Wed Mar 26 10:05:08 2008 +# Linux kernel version: 2.6.20.21 +# Sat May 17 15:57:15 2008 # CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y CONFIG_SEMAPHORE_SLEEPERS=y CONFIG_X86=y CONFIG_MMU=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # # Code maturity level options @@ -26,18 +32,23 @@ CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y +# CONFIG_IPC_NS is not set CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set +# CONFIG_UTS_NS is not set CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y # CONFIG_IKCONFIG is not set # CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y +# CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" -CONFIG_UID16=y -CONFIG_VM86=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_HOTPLUG=y @@ -48,11 +59,9 @@ CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 CONFIG_SLAB=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -63,7 +72,6 @@ CONFIG_BASE_SMALL=0 CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y @@ -72,7 +80,10 @@ CONFIG_STOP_MACHINE=y # # Block layer # +CONFIG_BLOCK=y # CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set # # IO Schedulers @@ -90,6 +101,7 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" # # Processor type and features # +CONFIG_SMP=y CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set @@ -99,6 +111,7 @@ CONFIG_X86_PC=y # CONFIG_X86_VISWS is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_ES7000 is not set +# CONFIG_PARAVIRT is not set # CONFIG_M386 is not set # CONFIG_M486 is not set CONFIG_M586=y @@ -108,6 +121,7 @@ CONFIG_M586=y # CONFIG_MPENTIUMII is not set # CONFIG_MPENTIUMIII is not set # CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set # CONFIG_MPENTIUM4 is not set # CONFIG_MK6 is not set # CONFIG_MK7 is not set @@ -126,6 +140,8 @@ CONFIG_X86_CMPXCHG=y CONFIG_X86_XADD=y CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_PPRO_FENCE=y CONFIG_X86_F00F_BUG=y @@ -138,9 +154,9 @@ CONFIG_X86_ALIGNMENT_16=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y -CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_SCHED_SMT=y +# CONFIG_SCHED_SMT is not set +CONFIG_SCHED_MC=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set @@ -150,6 +166,7 @@ CONFIG_X86_IO_APIC=y CONFIG_X86_MCE=y CONFIG_X86_MCE_NONFATAL=m # CONFIG_X86_MCE_P4THERMAL is not set +CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set CONFIG_X86_REBOOTFIXUPS=y @@ -166,15 +183,12 @@ CONFIG_X86_CPUID=y # CONFIG_NOHIGHMEM is not set CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set -CONFIG_VMSPLIT_3G=y -# CONFIG_VMSPLIT_3G_OPT is not set -# CONFIG_VMSPLIT_2G is not set -# CONFIG_VMSPLIT_1G is not set CONFIG_PAGE_OFFSET=0xC0000000 CONFIG_HIGHMEM=y CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set @@ -183,22 +197,25 @@ CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_SPARSEMEM_STATIC=y CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set # CONFIG_HIGHPTE is not set CONFIG_MATH_EMULATION=y # CONFIG_MTRR is not set # CONFIG_EFI is not set CONFIG_IRQBALANCE=y -# CONFIG_REGPARM is not set CONFIG_SECCOMP=y CONFIG_HZ_100=y # CONFIG_HZ_250 is not set +# CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 # CONFIG_KEXEC is not set # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x100000 # CONFIG_HOTPLUG_CPU is not set -CONFIG_DOUBLEFAULT=y +CONFIG_COMPAT_VDSO=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # @@ -207,6 +224,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_PM=y CONFIG_PM_LEGACY=y # CONFIG_PM_DEBUG is not set +# CONFIG_PM_SYSFS_DEPRECATED is not set # # ACPI (Advanced Configuration and Power Interface) Support @@ -218,6 +236,7 @@ CONFIG_ACPI_BUTTON=m CONFIG_ACPI_VIDEO=m # CONFIG_ACPI_HOTKEY is not set CONFIG_ACPI_FAN=m +# CONFIG_ACPI_DOCK is not set CONFIG_ACPI_PROCESSOR=m CONFIG_ACPI_THERMAL=m # CONFIG_ACPI_ASUS is not set @@ -231,6 +250,7 @@ CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y # CONFIG_ACPI_CONTAINER is not set +# CONFIG_ACPI_SBS is not set # # APM (Advanced Power Management) BIOS Support @@ -278,6 +298,7 @@ CONFIG_X86_SPEEDSTEP_SMI=m CONFIG_X86_P4_CLOCKMOD=m CONFIG_X86_CPUFREQ_NFORCE2=m CONFIG_X86_LONGRUN=m +# CONFIG_X86_LONGHAUL is not set # # shared options @@ -298,8 +319,9 @@ CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_PCIEPORTBUS=y +CONFIG_PCIEAER=y # CONFIG_PCI_MSI is not set -CONFIG_PCI_LEGACY_PROC=y +CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y CONFIG_ISA=y CONFIG_EISA=y @@ -364,6 +386,7 @@ CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m +# CONFIG_XFRM_SUB_POLICY is not set CONFIG_NET_KEY=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -372,7 +395,6 @@ CONFIG_ASK_IP_FIB_HASH=y # CONFIG_IP_FIB_TRIE is not set CONFIG_IP_FIB_HASH=y CONFIG_IP_MULTIPLE_TABLES=y -# CONFIG_IP_ROUTE_FWMARK is not set CONFIG_IP_ROUTE_MULTIPATH=y # CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set CONFIG_IP_ROUTE_VERBOSE=y @@ -389,14 +411,14 @@ CONFIG_IPSEC_NAT_TRAVERSAL=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y CONFIG_INET_DIAG=m CONFIG_INET_TCP_DIAG=m CONFIG_TCP_CONG_ADVANCED=y - -# -# TCP congestion control -# CONFIG_TCP_CONG_BIC=m CONFIG_TCP_CONG_CUBIC=m CONFIG_TCP_CONG_WESTWOOD=m @@ -405,12 +427,25 @@ CONFIG_TCP_CONG_HSTCP=m CONFIG_TCP_CONG_HYBLA=m CONFIG_TCP_CONG_VEGAS=m CONFIG_TCP_CONG_SCALABLE=m +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_DEFAULT_BIC is not set +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +CONFIG_DEFAULT_RENO=y +CONFIG_DEFAULT_TCP_CONG="reno" +# CONFIG_TCP_MD5SIG is not set # # IP: Virtual Server Configuration # # CONFIG_IP_VS is not set # CONFIG_IPV6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_NETWORK_SECMARK is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set CONFIG_BRIDGE_NETFILTER=y @@ -421,95 +456,60 @@ CONFIG_BRIDGE_NETFILTER=y CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_NETLINK_LOG=m +# CONFIG_NF_CONNTRACK_ENABLED is not set CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m CONFIG_NETFILTER_XT_MATCH_MAC=m CONFIG_NETFILTER_XT_MATCH_MARK=m +# CONFIG_NETFILTER_XT_MATCH_POLICY is not set +# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set CONFIG_NETFILTER_XT_MATCH_REALM=m CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_STATE=m +# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set # # IP: Netfilter Configuration # -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_CT_ACCT=y -CONFIG_IP_NF_CONNTRACK_MARK=y -CONFIG_IP_NF_CONNTRACK_EVENTS=y -CONFIG_IP_NF_CONNTRACK_NETLINK=m -CONFIG_IP_NF_CT_PROTO_SCTP=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_NETBIOS_NS=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_PPTP=m -CONFIG_IP_NF_H323=m CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_IPRANGE=m -CONFIG_IP_NF_MATCH_LAYER7=m -# CONFIG_IP_NF_MATCH_LAYER7_DEBUG is not set -CONFIG_IP_NF_MATCH_MULTIPORT=m CONFIG_IP_NF_MATCH_TOS=m CONFIG_IP_NF_MATCH_RECENT=m CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m +# CONFIG_IP_NF_MATCH_AH is not set CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_MATCH_OWNER=m CONFIG_IP_NF_MATCH_ADDRTYPE=m -CONFIG_IP_NF_MATCH_HASHLIMIT=m -CONFIG_IP_NF_MATCH_POLICY=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_SAME=m -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_PPTP=m -CONFIG_IP_NF_NAT_H323=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_IMQ=m CONFIG_IP_NF_TARGET_TOS=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_NAT_MMS=m -CONFIG_IP_NF_MMS=m -CONFIG_IP_NF_NAT_SIP=m -CONFIG_IP_NF_SIP=m # # Bridge: Netfilter Configuration @@ -558,12 +558,12 @@ CONFIG_ATM_BR2684=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m # CONFIG_DECNET is not set +CONFIG_LLC=m # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -571,6 +571,7 @@ CONFIG_VLAN_8021Q=m # QoS and/or fair queueing # CONFIG_NET_SCHED=y +CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_CLK_JIFFIES=y # CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set # CONFIG_NET_SCH_CLK_CPU is not set @@ -633,7 +634,12 @@ CONFIG_NET_ESTIMATOR=y CONFIG_IEEE80211=m # CONFIG_IEEE80211_DEBUG is not set CONFIG_IEEE80211_CRYPT_WEP=m -# CONFIG_IEEE80211_CRYPT_CCMP is not set +CONFIG_IEEE80211_CRYPT_CCMP=m +CONFIG_IEEE80211_CRYPT_TKIP=m +CONFIG_IEEE80211_SOFTMAC=m +# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set +CONFIG_WIRELESS_EXT=y +CONFIG_FIB_RULES=y CONFIG_KLIPS=m # @@ -643,6 +649,7 @@ CONFIG_KLIPS_ESP=y CONFIG_KLIPS_AH=y CONFIG_KLIPS_AUTH_HMAC_MD5=y CONFIG_KLIPS_AUTH_HMAC_SHA1=y +CONFIG_KLIPS_ALG=y CONFIG_KLIPS_ENC_CRYPTOAPI=y CONFIG_KLIPS_ENC_1DES=y CONFIG_KLIPS_ENC_3DES=y @@ -661,6 +668,7 @@ CONFIG_KLIPS_DEBUG=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_FW_LOADER=m +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -675,18 +683,19 @@ CONFIG_MTD=m CONFIG_MTD_CONCAT=m CONFIG_MTD_PARTITIONS=y # CONFIG_MTD_REDBOOT_PARTS is not set -CONFIG_MTD_CMDLINE_PARTS=y # # User Modules And Translation Layers # CONFIG_MTD_CHAR=m +CONFIG_MTD_BLKDEVS=m CONFIG_MTD_BLOCK=m # CONFIG_MTD_BLOCK_RO is not set # CONFIG_FTL is not set # CONFIG_NFTL is not set # CONFIG_INFTL is not set # CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set # # RAM/ROM/Flash chip drivers @@ -722,7 +731,6 @@ CONFIG_MTD_CFI_I2=y # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_PHRAM is not set # CONFIG_MTD_MTDRAM is not set -# CONFIG_MTD_BLKMTD is not set # CONFIG_MTD_BLOCK2MTD is not set # @@ -736,6 +744,7 @@ CONFIG_MTD_CFI_I2=y # NAND Flash Device Drivers # # CONFIG_MTD_NAND is not set +# CONFIG_MTD_NAND_CAFE is not set # # OneNAND Flash Device Drivers @@ -752,6 +761,7 @@ CONFIG_PARPORT_PC=m # CONFIG_PARPORT_PC_SUPERIO is not set CONFIG_PARPORT_PC_PCMCIA=m # CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_AX88796 is not set # CONFIG_PARPORT_1284 is not set # @@ -788,10 +798,18 @@ CONFIG_BLK_DEV_UB=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set +# +# Misc devices +# +# CONFIG_IBM_ASM is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set + # # ATA/ATAPI/MFM/RLL support # @@ -839,10 +857,11 @@ CONFIG_BLK_DEV_TRIFLEX=m CONFIG_BLK_DEV_CY82C693=m CONFIG_BLK_DEV_CS5520=m CONFIG_BLK_DEV_CS5530=m -# CONFIG_BLK_DEV_CS5535 is not set +CONFIG_BLK_DEV_CS5535=m CONFIG_BLK_DEV_HPT34X=m # CONFIG_HPT34X_AUTODMA is not set CONFIG_BLK_DEV_HPT366=m +CONFIG_BLK_DEV_JMICRON=m CONFIG_BLK_DEV_SC1200=m CONFIG_BLK_DEV_PIIX=m CONFIG_BLK_DEV_IT821X=m @@ -878,6 +897,8 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=y CONFIG_SCSI=m +# CONFIG_SCSI_TGT is not set +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # @@ -897,14 +918,17 @@ CONFIG_CHR_DEV_SCH=m CONFIG_SCSI_MULTI_LUN=y # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set # -# SCSI Transport Attributes +# SCSI Transports # CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=m -# CONFIG_SCSI_ISCSI_ATTRS is not set +CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SAS_LIBSAS_DEBUG=y # # SCSI low-level drivers @@ -935,6 +959,8 @@ CONFIG_AIC79XX_RESET_DELAY_MS=15000 # CONFIG_AIC79XX_DEBUG_ENABLE is not set CONFIG_AIC79XX_DEBUG_MASK=0 # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +CONFIG_SCSI_AIC94XX=m +# CONFIG_AIC94XX_DEBUG is not set CONFIG_SCSI_DPT_I2O=m CONFIG_SCSI_ADVANSYS=m CONFIG_SCSI_IN2000=m @@ -944,24 +970,7 @@ CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=m -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -# CONFIG_SCSI_SATA_MV is not set -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m CONFIG_SCSI_HPTIOP=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_INTEL_COMBINED=y CONFIG_SCSI_BUSLOGIC=m # CONFIG_SCSI_OMIT_FLASHPOINT is not set CONFIG_SCSI_DMX3191D=m @@ -986,11 +995,12 @@ CONFIG_SCSI_INIA100=m # CONFIG_SCSI_IMM is not set CONFIG_SCSI_NCR53C406A=m CONFIG_SCSI_NCR_D700=m +CONFIG_SCSI_STEX=m CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_SYM53C8XX_MMIO=y # CONFIG_SCSI_IPR is not set CONFIG_SCSI_NCR_Q720=m CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 @@ -999,11 +1009,12 @@ CONFIG_SCSI_NCR53C8XX_SYNC=20 # CONFIG_SCSI_NCR53C8XX_PROFILE is not set CONFIG_SCSI_PAS16=m CONFIG_SCSI_PSI240I=m -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_QLOGIC_FC is not set +CONFIG_SCSI_QLOGIC_FAS=m CONFIG_SCSI_QLOGIC_1280=m -# CONFIG_SCSI_QLA_FC is not set -# CONFIG_SCSI_LPFC is not set +CONFIG_SCSI_QLA_FC=m +CONFIG_SCSI_QLA_ISCSI=m +CONFIG_SCSI_LPFC=m +CONFIG_SCSI_SEAGATE=m CONFIG_SCSI_SIM710=m CONFIG_53C700_IO_MAPPED=y CONFIG_SCSI_SYM53C416=m @@ -1017,6 +1028,7 @@ CONFIG_SCSI_U14_34F_MAX_TAGS=8 CONFIG_SCSI_ULTRASTOR=m CONFIG_SCSI_NSP32=m # CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set # # PCMCIA SCSI adapter support @@ -1027,6 +1039,68 @@ CONFIG_PCMCIA_NINJA_SCSI=m CONFIG_PCMCIA_QLOGIC=m CONFIG_PCMCIA_SYM53C500=m +# +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=m +# CONFIG_ATA_NONSTANDARD is not set +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y +CONFIG_PATA_ALI=m +CONFIG_PATA_AMD=m +CONFIG_PATA_ARTOP=m +CONFIG_PATA_ATIIXP=m +CONFIG_PATA_CMD64X=m +CONFIG_PATA_CS5520=m +CONFIG_PATA_CS5530=m +CONFIG_PATA_CS5535=m +CONFIG_PATA_CYPRESS=m +CONFIG_PATA_EFAR=m +CONFIG_ATA_GENERIC=m +CONFIG_PATA_HPT366=m +CONFIG_PATA_HPT37X=m +CONFIG_PATA_HPT3X2N=m +CONFIG_PATA_HPT3X3=m +CONFIG_PATA_ISAPNP=m +CONFIG_PATA_IT821X=m +CONFIG_PATA_JMICRON=m +CONFIG_PATA_LEGACY=m +CONFIG_PATA_TRIFLEX=m +CONFIG_PATA_MARVELL=m +CONFIG_PATA_MPIIX=m +CONFIG_PATA_OLDPIIX=m +CONFIG_PATA_NETCELL=m +CONFIG_PATA_NS87410=m +CONFIG_PATA_OPTI=m +CONFIG_PATA_OPTIDMA=m +CONFIG_PATA_PCMCIA=m +CONFIG_PATA_PDC_OLD=m +CONFIG_PATA_QDI=m +CONFIG_PATA_RADISYS=m +CONFIG_PATA_RZ1000=m +CONFIG_PATA_SC1200=m +CONFIG_PATA_SERVERWORKS=m +CONFIG_PATA_PDC2027X=m +CONFIG_PATA_SIL680=m +CONFIG_PATA_SIS=m +CONFIG_PATA_VIA=m +CONFIG_PATA_WINBOND=m +CONFIG_PATA_WINBOND_VLB=m + # # Old CD-ROM drivers (not SCSI, not IDE) # @@ -1041,8 +1115,8 @@ CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m # CONFIG_MD_RAID10 is not set -CONFIG_MD_RAID5=m -# CONFIG_MD_RAID6 is not set +CONFIG_MD_RAID456=m +CONFIG_MD_RAID5_RESHAPE=y CONFIG_MD_MULTIPATH=m # CONFIG_MD_FAULTY is not set # CONFIG_BLK_DEV_DM is not set @@ -1080,7 +1154,7 @@ CONFIG_IEEE1394_OHCI1394=m # # Protocol Drivers # -# CONFIG_IEEE1394_VIDEO1394 is not set +CONFIG_IEEE1394_VIDEO1394=m CONFIG_IEEE1394_SBP2=m # CONFIG_IEEE1394_SBP2_PHYS_DMA is not set CONFIG_IEEE1394_ETH1394=m @@ -1092,6 +1166,11 @@ CONFIG_IEEE1394_ETH1394=m # # CONFIG_I2O is not set +# +# Macintosh device drivers +# +# CONFIG_MAC_EMUMOUSEBTN is not set + # # Network device support # @@ -1127,6 +1206,10 @@ CONFIG_DAVICOM_PHY=m CONFIG_QSEMI_PHY=m CONFIG_LXT_PHY=m CONFIG_CICADA_PHY=m +CONFIG_VITESSE_PHY=m +CONFIG_SMSC_PHY=m +CONFIG_BROADCOM_PHY=m +# CONFIG_FIXED_PHY is not set # # Ethernet (10 or 100Mbit) @@ -1191,6 +1274,7 @@ CONFIG_NE2_MCA=m CONFIG_IBMLANA=m CONFIG_NET_PCI=y CONFIG_PCNET32=m +# CONFIG_PCNET32_NAPI is not set CONFIG_AMD8111_ETH=m CONFIG_AMD8111E_NAPI=y CONFIG_ADAPTEC_STARFIRE=m @@ -1199,6 +1283,7 @@ CONFIG_AC3200=m CONFIG_APRICOT=m CONFIG_B44=m CONFIG_FORCEDETH=m +# CONFIG_FORCEDETH_NAPI is not set CONFIG_CS89x0=m CONFIG_DGRS=m CONFIG_EEPRO100=m @@ -1222,6 +1307,7 @@ CONFIG_SUNDANCE_MMIO=y CONFIG_TLAN=m CONFIG_VIA_RHINE=m CONFIG_VIA_RHINE_MMIO=y +# CONFIG_VIA_RHINE_NAPI is not set CONFIG_NET_POCKET=y CONFIG_ATP=m CONFIG_DE600=m @@ -1249,15 +1335,20 @@ CONFIG_SK98LIN=m CONFIG_VIA_VELOCITY=m CONFIG_TIGON3=m CONFIG_BNX2=m +CONFIG_QLA3XXX=m # # Ethernet (10000 Mbit) # CONFIG_CHELSIO_T1=m +CONFIG_CHELSIO_T1_1G=y +CONFIG_CHELSIO_T1_NAPI=y CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set +CONFIG_MYRI10GE=m +CONFIG_NETXEN_NIC=m # # Token Ring devices @@ -1267,7 +1358,67 @@ CONFIG_S2IO=m # # Wireless LAN (non-hamradio) # -# CONFIG_NET_RADIO is not set +CONFIG_NET_RADIO=y +CONFIG_NET_WIRELESS_RTNETLINK=y + +# +# Obsolete Wireless cards support (pre-802.11) +# +CONFIG_STRIP=m +CONFIG_ARLAN=m +CONFIG_WAVELAN=m +CONFIG_PCMCIA_WAVELAN=m +CONFIG_PCMCIA_NETWAVE=m + +# +# Wireless 802.11 Frequency Hopping cards support +# +CONFIG_PCMCIA_RAYCS=m + +# +# Wireless 802.11b ISA/PCI cards support +# +CONFIG_IPW2100=m +CONFIG_IPW2100_MONITOR=y +# CONFIG_IPW2100_DEBUG is not set +CONFIG_IPW2200=m +CONFIG_IPW2200_MONITOR=y +CONFIG_IPW2200_RADIOTAP=y +CONFIG_IPW2200_PROMISCUOUS=y +CONFIG_IPW2200_QOS=y +# CONFIG_IPW2200_DEBUG is not set +CONFIG_AIRO=m +CONFIG_HERMES=m +CONFIG_PLX_HERMES=m +CONFIG_TMD_HERMES=m +CONFIG_NORTEL_HERMES=m +CONFIG_PCI_HERMES=m +CONFIG_ATMEL=m +CONFIG_PCI_ATMEL=m + +# +# Wireless 802.11b Pcmcia/Cardbus cards support +# +CONFIG_PCMCIA_HERMES=m +CONFIG_PCMCIA_SPECTRUM=m +CONFIG_AIRO_CS=m +CONFIG_PCMCIA_ATMEL=m +CONFIG_PCMCIA_WL3501=m + +# +# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support +# +CONFIG_PRISM54=m +CONFIG_USB_ZD1201=m +CONFIG_HOSTAP=m +CONFIG_HOSTAP_FIRMWARE=y +# CONFIG_HOSTAP_FIRMWARE_NVRAM is not set +CONFIG_HOSTAP_PLX=m +CONFIG_HOSTAP_PCI=m +CONFIG_HOSTAP_CS=m +# CONFIG_BCM43XX is not set +# CONFIG_ZD1211RW is not set +CONFIG_NET_WIRELESS=y # # PCMCIA network device support @@ -1344,6 +1495,7 @@ CONFIG_PPPOE=m CONFIG_PPPOATM=m CONFIG_SLIP=m CONFIG_SLIP_COMPRESSED=y +CONFIG_SLHC=m CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y # CONFIG_NET_FC is not set @@ -1413,6 +1565,7 @@ CONFIG_MISDN_L1OIP=y # Input device support # CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set # # Userland interfaces @@ -1435,6 +1588,7 @@ CONFIG_KEYBOARD_SUNKBD=m CONFIG_KEYBOARD_LKKBD=m CONFIG_KEYBOARD_XTKBD=m CONFIG_KEYBOARD_NEWTON=m +# CONFIG_KEYBOARD_STOWAWAY is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TOUCHSCREEN is not set @@ -1462,25 +1616,28 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_SERIAL_NONSTANDARD is not set # # Serial drivers # CONFIG_SERIAL_8250=m +CONFIG_SERIAL_8250_PCI=m +CONFIG_SERIAL_8250_PNP=m CONFIG_SERIAL_8250_CS=m -# CONFIG_SERIAL_8250_ACPI is not set CONFIG_SERIAL_8250_NR_UARTS=4 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -CONFIG_SERIAL_8250_DETECT_IRQ=y -CONFIG_SERIAL_8250_RSA=y # CONFIG_SERIAL_8250_FOURPORT is not set # CONFIG_SERIAL_8250_ACCENT is not set # CONFIG_SERIAL_8250_BOCA is not set +# CONFIG_SERIAL_8250_EXAR_ST16C554 is not set # CONFIG_SERIAL_8250_HUB6 is not set +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y # CONFIG_SERIAL_8250_MCA is not set # @@ -1527,11 +1684,15 @@ CONFIG_IBMASR=m CONFIG_WAFER_WDT=m CONFIG_I6300ESB_WDT=m CONFIG_I8XX_TCO=m +# CONFIG_ITCO_WDT is not set CONFIG_SC1200_WDT=m +# CONFIG_PC87413_WDT is not set CONFIG_60XX_WDT=m CONFIG_SBC8360_WDT=m CONFIG_CPU5_WDT=m +# CONFIG_SMSC37B787_WDT is not set CONFIG_W83627HF_WDT=m +# CONFIG_W83697HF_WDT is not set CONFIG_W83877F_WDT=m CONFIG_W83977F_WDT=m CONFIG_MACHZ_WDT=m @@ -1556,17 +1717,27 @@ CONFIG_WDT_501_PCI=y # CONFIG_USBPCWATCHDOG=m CONFIG_HW_RANDOM=m +CONFIG_HW_RANDOM_INTEL=m +CONFIG_HW_RANDOM_AMD=m +CONFIG_HW_RANDOM_GEODE=m +CONFIG_HW_RANDOM_VIA=m # CONFIG_NVRAM is not set CONFIG_RTC=y # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_AGP is not set +CONFIG_AGP=m +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_ATI is not set +# CONFIG_AGP_AMD is not set +# CONFIG_AGP_AMD64 is not set +CONFIG_AGP_INTEL=m +# CONFIG_AGP_NVIDIA is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_EFFICEON is not set # CONFIG_DRM is not set # @@ -1576,6 +1747,8 @@ CONFIG_SYNCLINK_CS=m # CONFIG_CARDMAN_4000 is not set # CONFIG_CARDMAN_4040 is not set CONFIG_MWAVE=m +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set CONFIG_CS5535_GPIO=m # CONFIG_RAW_DRIVER is not set CONFIG_HPET=y @@ -1614,6 +1787,7 @@ CONFIG_I2C_ALGOBIT=m # CONFIG_I2C_I810 is not set # CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set # CONFIG_I2C_PARPORT is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set @@ -1637,9 +1811,7 @@ CONFIG_I2C_ALGOBIT=m # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set -# CONFIG_SENSORS_RTC8564 is not set # CONFIG_SENSORS_MAX6875 is not set -# CONFIG_RTC_X1205_I2C is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -1655,19 +1827,28 @@ CONFIG_I2C_ALGOBIT=m # Dallas's 1-wire bus # CONFIG_W1=m -CONFIG_W1_MATROX=m -CONFIG_W1_DS9490=m -CONFIG_W1_DS9490_BRIDGE=m -CONFIG_W1_THERM=m -CONFIG_W1_SMEM=m -CONFIG_W1_DS2433=m -CONFIG_W1_DS2433_CRC=y +CONFIG_W1_CON=y + +# +# 1-wire Bus Masters +# +# CONFIG_W1_MASTER_MATROX is not set +# CONFIG_W1_MASTER_DS2490 is not set +# CONFIG_W1_MASTER_DS2482 is not set + +# +# 1-wire Slaves +# +# CONFIG_W1_SLAVE_THERM is not set +# CONFIG_W1_SLAVE_SMEM is not set +# CONFIG_W1_SLAVE_DS2433 is not set # # Hardware Monitoring support # CONFIG_HWMON=m # CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set # CONFIG_SENSORS_ADM1021 is not set # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set @@ -1701,43 +1882,42 @@ CONFIG_SENSORS_PC87427=m # CONFIG_SENSORS_SMSC47M192 is not set # CONFIG_SENSORS_SMSC47B397 is not set # CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set # CONFIG_SENSORS_VT8231 is not set # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83791D is not set # CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set # CONFIG_SENSORS_W83627EHF is not set CONFIG_SENSORS_HDAPS=m # CONFIG_HWMON_DEBUG_CHIP is not set -# -# Misc devices -# -# CONFIG_IBM_ASM is not set - -# -# Multimedia Capabilities Port drivers -# - # # Multimedia devices # CONFIG_VIDEO_DEV=m +CONFIG_VIDEO_V4L1=y +CONFIG_VIDEO_V4L1_COMPAT=y +CONFIG_VIDEO_V4L2=y # -# Video For Linux +# Video Capture Adapters # # -# Video Adapters +# Video Capture Adapters # # CONFIG_VIDEO_ADV_DEBUG is not set +CONFIG_VIDEO_HELPER_CHIPS_AUTO=y +# CONFIG_VIDEO_VIVI is not set # CONFIG_VIDEO_BT848 is not set # CONFIG_VIDEO_PMS is not set # CONFIG_VIDEO_BWQCAM is not set # CONFIG_VIDEO_CQCAM is not set # CONFIG_VIDEO_CPIA is not set +# CONFIG_VIDEO_CPIA2 is not set # CONFIG_VIDEO_SAA5246A is not set # CONFIG_VIDEO_SAA5249 is not set # CONFIG_TUNER_3036 is not set @@ -1749,11 +1929,27 @@ CONFIG_VIDEO_DEV=m # CONFIG_VIDEO_HEXIUM_ORION is not set # CONFIG_VIDEO_HEXIUM_GEMINI is not set # CONFIG_VIDEO_CX88 is not set -CONFIG_VIDEO_CX88_VP3054=m +# CONFIG_VIDEO_CAFE_CCIC is not set + +# +# V4L USB devices +# +# CONFIG_VIDEO_PVRUSB2 is not set # CONFIG_VIDEO_EM28XX is not set +# CONFIG_VIDEO_USBVISION is not set +# CONFIG_USB_VICAM is not set +# CONFIG_USB_IBMCAM is not set +# CONFIG_USB_KONICAWC is not set +# CONFIG_USB_QUICKCAM_MESSENGER is not set +# CONFIG_USB_ET61X251 is not set # CONFIG_VIDEO_OVCAMCHIP is not set -# CONFIG_VIDEO_AUDIO_DECODER is not set -# CONFIG_VIDEO_DECODER is not set +# CONFIG_USB_W9968CF is not set +# CONFIG_USB_OV511 is not set +# CONFIG_USB_SE401 is not set +# CONFIG_USB_SN9C102 is not set +# CONFIG_USB_STV680 is not set +# CONFIG_USB_ZC0301 is not set +# CONFIG_USB_PWC is not set # # Radio Adapters @@ -1772,160 +1968,91 @@ CONFIG_VIDEO_CX88_VP3054=m # CONFIG_RADIO_TRUST is not set # CONFIG_RADIO_TYPHOON is not set # CONFIG_RADIO_ZOLTRIX is not set +# CONFIG_USB_DSBR is not set # # Digital Video Broadcasting Devices # CONFIG_DVB=y -CONFIG_DVB_CORE=m - -# -# Supported SAA7146 based PCI Adapters -# -CONFIG_DVB_AV7110=m -# CONFIG_DVB_AV7110_FIRMWARE is not set -CONFIG_DVB_AV7110_OSD=y -CONFIG_DVB_BUDGET=m -CONFIG_DVB_BUDGET_CI=m -CONFIG_DVB_BUDGET_AV=m -CONFIG_DVB_BUDGET_PATCH=m - -# -# Supported USB Adapters -# -CONFIG_DVB_USB=m -# CONFIG_DVB_USB_DEBUG is not set -CONFIG_DVB_USB_A800=m -CONFIG_DVB_USB_DIBUSB_MB=m -# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set -CONFIG_DVB_USB_DIBUSB_MC=m -CONFIG_DVB_USB_UMT_010=m -CONFIG_DVB_USB_CXUSB=m -CONFIG_DVB_USB_DIGITV=m -CONFIG_DVB_USB_VP7045=m -CONFIG_DVB_USB_VP702X=m -CONFIG_DVB_USB_NOVA_T_USB2=m -CONFIG_DVB_USB_DTT200U=m -CONFIG_DVB_TTUSB_BUDGET=m -CONFIG_DVB_TTUSB_DEC=m -CONFIG_DVB_CINERGYT2=m -# CONFIG_DVB_CINERGYT2_TUNING is not set - -# -# Supported FlexCopII (B2C2) Adapters -# -CONFIG_DVB_B2C2_FLEXCOP=m -CONFIG_DVB_B2C2_FLEXCOP_PCI=m -CONFIG_DVB_B2C2_FLEXCOP_USB=m -# CONFIG_DVB_B2C2_FLEXCOP_DEBUG is not set - -# -# Supported BT878 Adapters -# - -# -# Supported Pluto2 Adapters -# -CONFIG_DVB_PLUTO2=m - -# -# Supported DVB Frontends -# - -# -# Customise DVB Frontends -# - -# -# DVB-S (satellite) frontends -# -CONFIG_DVB_STV0299=m -CONFIG_DVB_CX24110=m -CONFIG_DVB_CX24123=m -CONFIG_DVB_TDA8083=m -CONFIG_DVB_MT312=m -CONFIG_DVB_VES1X93=m -CONFIG_DVB_S5H1420=m - -# -# DVB-T (terrestrial) frontends -# -CONFIG_DVB_SP8870=m -CONFIG_DVB_SP887X=m -CONFIG_DVB_CX22700=m -CONFIG_DVB_CX22702=m -CONFIG_DVB_L64781=m -CONFIG_DVB_TDA1004X=m -CONFIG_DVB_NXT6000=m -CONFIG_DVB_MT352=m -CONFIG_DVB_DIB3000MB=m -CONFIG_DVB_DIB3000MC=m - -# -# DVB-C (cable) frontends -# -CONFIG_DVB_VES1820=m -CONFIG_DVB_TDA10021=m -CONFIG_DVB_STV0297=m - -# -# ATSC (North American/Korean Terresterial DTV) frontends -# -CONFIG_DVB_NXT200X=m -CONFIG_DVB_OR51211=m -CONFIG_DVB_OR51132=m -CONFIG_DVB_BCM3510=m -CONFIG_DVB_LGDT330X=m -CONFIG_VIDEO_SAA7146=m -CONFIG_VIDEO_SAA7146_VV=m -CONFIG_VIDEO_VIDEOBUF=m -CONFIG_VIDEO_BUF=m +# CONFIG_DVB_CORE is not set +# CONFIG_USB_DABUSB is not set # # Graphics support # +CONFIG_FIRMWARE_EDID=y CONFIG_FB=y +CONFIG_FB_DDC=m CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y # CONFIG_FB_MACMODES is not set -# CONFIG_FB_MODE_HELPERS is not set -# CONFIG_FB_TILEBLITTING is not set -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_CIRRUS=m +CONFIG_FB_PM2=m +CONFIG_FB_PM2_FIFO_DISCONNECT=y +CONFIG_FB_CYBER2000=m +CONFIG_FB_ARC=m +CONFIG_FB_ASILIANT=y +CONFIG_FB_IMSTT=y # CONFIG_FB_VGA16 is not set CONFIG_FB_VESA=y -CONFIG_VIDEO_SELECT=y # CONFIG_FB_HGA is not set # CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_I810 is not set -# CONFIG_FB_INTEL is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON_OLD is not set -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_CYBLA is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_GEODE is not set +CONFIG_FB_NVIDIA=m +CONFIG_FB_NVIDIA_I2C=y +CONFIG_FB_RIVA=m +CONFIG_FB_RIVA_I2C=y +# CONFIG_FB_RIVA_DEBUG is not set +CONFIG_FB_I810=m +CONFIG_FB_I810_GTF=y +CONFIG_FB_I810_I2C=y +CONFIG_FB_INTEL=m +# CONFIG_FB_INTEL_DEBUG is not set +CONFIG_FB_INTEL_I2C=y +CONFIG_FB_MATROX=m +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G=y +CONFIG_FB_MATROX_I2C=m +CONFIG_FB_MATROX_MAVEN=m +CONFIG_FB_MATROX_MULTIHEAD=y +CONFIG_FB_RADEON=m +CONFIG_FB_RADEON_I2C=y +# CONFIG_FB_RADEON_DEBUG is not set +CONFIG_FB_ATY128=m +CONFIG_FB_ATY=m +CONFIG_FB_ATY_CT=y +# CONFIG_FB_ATY_GENERIC_LCD is not set +CONFIG_FB_ATY_GX=y +CONFIG_FB_SAVAGE=m +CONFIG_FB_SAVAGE_I2C=y +CONFIG_FB_SAVAGE_ACCEL=y +CONFIG_FB_SIS=m +CONFIG_FB_SIS_300=y +CONFIG_FB_SIS_315=y +CONFIG_FB_NEOMAGIC=m +CONFIG_FB_KYRO=m +CONFIG_FB_3DFX=m +CONFIG_FB_3DFX_ACCEL=y +CONFIG_FB_VOODOO1=m +CONFIG_FB_CYBLA=m +CONFIG_FB_TRIDENT=m +CONFIG_FB_TRIDENT_ACCEL=y +CONFIG_FB_GEODE=y +CONFIG_FB_GEODE_GX=m +# CONFIG_FB_GEODE_GX_SET_FBSIZE is not set +CONFIG_FB_GEODE_GX1=m # CONFIG_FB_VIRTUAL is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +CONFIG_VIDEO_SELECT=y # CONFIG_MDA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE=y @@ -1961,11 +2088,13 @@ CONFIG_SND_SEQUENCER=m CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m +CONFIG_SND_PCM_OSS_PLUGINS=y # CONFIG_SND_SEQUENCER_OSS is not set CONFIG_SND_RTCTIMER=m CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y # CONFIG_SND_DYNAMIC_MINORS is not set # CONFIG_SND_SUPPORT_OLD_API is not set +CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set @@ -1977,10 +2106,10 @@ CONFIG_SND_OPL3_LIB=m CONFIG_SND_OPL4_LIB=m CONFIG_SND_VX_LIB=m CONFIG_SND_AC97_CODEC=m -CONFIG_SND_AC97_BUS=m CONFIG_SND_DUMMY=m # CONFIG_SND_VIRMIDI is not set CONFIG_SND_MTPAV=m +CONFIG_SND_MTS64=m CONFIG_SND_SERIAL_U16550=m CONFIG_SND_MPU401=m @@ -1989,6 +2118,7 @@ CONFIG_SND_MPU401=m # CONFIG_SND_AD1848_LIB=m CONFIG_SND_CS4231_LIB=m +# CONFIG_SND_ADLIB is not set CONFIG_SND_AD1816A=m CONFIG_SND_AD1848=m CONFIG_SND_ALS100=m @@ -2011,6 +2141,7 @@ CONFIG_SND_OPL3SA2=m CONFIG_SND_OPTI92X_AD1848=m CONFIG_SND_OPTI92X_CS4231=m CONFIG_SND_OPTI93X=m +CONFIG_SND_MIRO=m CONFIG_SND_SB8=m CONFIG_SND_SB16=m CONFIG_SND_SBAWE=m @@ -2023,6 +2154,7 @@ CONFIG_SND_WAVEFRONT=m # PCI devices # CONFIG_SND_AD1889=m +CONFIG_SND_ALS300=m CONFIG_SND_ALS4000=m CONFIG_SND_ALI5451=m CONFIG_SND_ATIIXP=m @@ -2032,12 +2164,12 @@ CONFIG_SND_AU8820=m CONFIG_SND_AU8830=m CONFIG_SND_AZT3328=m CONFIG_SND_BT87X=m -# CONFIG_SND_BT87X_OVERCLOCK is not set +CONFIG_SND_BT87X_OVERCLOCK=y CONFIG_SND_CA0106=m CONFIG_SND_CMIPCI=m CONFIG_SND_CS4281=m CONFIG_SND_CS46XX=m -# CONFIG_SND_CS46XX_NEW_DSP is not set +CONFIG_SND_CS46XX_NEW_DSP=y CONFIG_SND_CS5535AUDIO=m CONFIG_SND_DARLA20=m CONFIG_SND_GINA20=m @@ -2058,7 +2190,8 @@ CONFIG_SND_ENS1371=m CONFIG_SND_ES1938=m CONFIG_SND_ES1968=m CONFIG_SND_FM801=m -# CONFIG_SND_FM801_TEA575X_BOOL is not set +CONFIG_SND_FM801_TEA575X_BOOL=y +CONFIG_SND_FM801_TEA575X=m CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m @@ -2071,6 +2204,7 @@ CONFIG_SND_MAESTRO3=m CONFIG_SND_MIXART=m CONFIG_SND_NM256=m CONFIG_SND_PCXHR=m +CONFIG_SND_RIPTIDE=m CONFIG_SND_RME32=m CONFIG_SND_RME96=m CONFIG_SND_RME9652=m @@ -2080,6 +2214,7 @@ CONFIG_SND_VIA82XX=m CONFIG_SND_VIA82XX_MODEM=m CONFIG_SND_VX222=m CONFIG_SND_YMFPCI=m +# CONFIG_SND_AC97_POWER_SAVE is not set # # USB devices @@ -2097,12 +2232,19 @@ CONFIG_SND_PDAUDIOCF=m # Open Sound System # # CONFIG_SOUND_PRIME is not set +CONFIG_AC97_BUS=m + +# +# HID Devices +# +CONFIG_HID=y # # USB support # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=m # CONFIG_USB_DEBUG is not set @@ -2121,6 +2263,7 @@ CONFIG_USB_SUSPEND=y CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set CONFIG_USB_ISP116X_HCD=m CONFIG_USB_OHCI_HCD=m # CONFIG_USB_OHCI_BIG_ENDIAN is not set @@ -2132,7 +2275,6 @@ CONFIG_USB_SL811_HCD=m # # USB Device Class drivers # -# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set CONFIG_USB_ACM=m CONFIG_USB_PRINTER=m @@ -2154,13 +2296,13 @@ CONFIG_USB_STORAGE_SDDR09=y CONFIG_USB_STORAGE_SDDR55=y CONFIG_USB_STORAGE_JUMPSHOT=y CONFIG_USB_STORAGE_ALAUDA=y +# CONFIG_USB_STORAGE_KARMA is not set # CONFIG_USB_LIBUSUAL is not set # # USB Input Devices # CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y # CONFIG_USB_HIDINPUT_POWERBOOK is not set # CONFIG_HID_FF is not set CONFIG_USB_HIDDEV=y @@ -2175,9 +2317,7 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_ACECAD is not set # CONFIG_USB_KBTAB is not set # CONFIG_USB_POWERMATE is not set -# CONFIG_USB_MTOUCH is not set -# CONFIG_USB_ITMTOUCH is not set -# CONFIG_USB_EGALAX is not set +# CONFIG_USB_TOUCHSCREEN is not set # CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set @@ -2191,21 +2331,6 @@ CONFIG_USB_HIDDEV=y # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set -# -# USB Multimedia devices -# -# CONFIG_USB_DABUSB is not set -# CONFIG_USB_VICAM is not set -# CONFIG_USB_DSBR is not set -# CONFIG_USB_ET61X251 is not set -# CONFIG_USB_IBMCAM is not set -# CONFIG_USB_KONICAWC is not set -# CONFIG_USB_OV511 is not set -# CONFIG_USB_SE401 is not set -# CONFIG_USB_SN9C102 is not set -# CONFIG_USB_STV680 is not set -# CONFIG_USB_PWC is not set - # # USB Network Adapters # @@ -2213,12 +2338,14 @@ CONFIG_USB_CATC=m CONFIG_USB_KAWETH=m CONFIG_USB_PEGASUS=m CONFIG_USB_RTL8150=m +CONFIG_USB_USBNET_MII=m CONFIG_USB_USBNET=m CONFIG_USB_NET_AX8817X=m CONFIG_USB_NET_CDCETHER=m CONFIG_USB_NET_GL620A=m CONFIG_USB_NET_NET1080=m CONFIG_USB_NET_PLUSB=m +# CONFIG_USB_NET_MCS7830 is not set CONFIG_USB_NET_RNDIS_HOST=m CONFIG_USB_NET_CDC_SUBSET=m CONFIG_USB_ALI_M5632=y @@ -2244,17 +2371,21 @@ CONFIG_USB_NET_ZAURUS=m # CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m +# CONFIG_USB_ADUTUX is not set # CONFIG_USB_AUERSWALD is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set CONFIG_USB_LCD=m CONFIG_USB_LED=m +# CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGETKIT is not set -# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set # CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_TEST is not set # @@ -2277,7 +2408,22 @@ CONFIG_USB_XUSBATM=m CONFIG_MMC=m # CONFIG_MMC_DEBUG is not set CONFIG_MMC_BLOCK=m +# CONFIG_MMC_SDHCI is not set CONFIG_MMC_WBSD=m +# CONFIG_MMC_TIFM_SD is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# # # InfiniBand support @@ -2289,6 +2435,29 @@ CONFIG_MMC_WBSD=m # # CONFIG_EDAC is not set +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# + +# +# Virtualization +# +# CONFIG_KVM is not set + # # File systems # @@ -2301,6 +2470,7 @@ CONFIG_EXT3_FS=m CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4DEV_FS is not set CONFIG_JBD=m # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=m @@ -2319,15 +2489,16 @@ CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_FS_POSIX_ACL=y CONFIG_XFS_FS=m -CONFIG_XFS_EXPORT=y CONFIG_XFS_QUOTA=y CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set +# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set CONFIG_QUOTACTL=y CONFIG_DNOTIFY=y @@ -2359,12 +2530,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y # CONFIG_PROC_KCORE is not set +CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_HUGETLBFS is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y -# CONFIG_RELAYFS_FS is not set # CONFIG_CONFIGFS_FS is not set # @@ -2377,11 +2549,11 @@ CONFIG_RAMFS=y # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set CONFIG_JFFS2_FS=m CONFIG_JFFS2_FS_DEBUG=0 CONFIG_JFFS2_FS_WRITEBUFFER=y # CONFIG_JFFS2_SUMMARY is not set +# CONFIG_JFFS2_FS_XATTR is not set CONFIG_JFFS2_COMPRESSION_OPTIONS=y CONFIG_JFFS2_ZLIB=y CONFIG_JFFS2_RTIME=y @@ -2393,7 +2565,6 @@ CONFIG_JFFS2_CMODE_PRIORITY=y CONFIG_SQUASHFS=y # CONFIG_SQUASHFS_EMBEDDED is not set CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 -# CONFIG_SQUASHFS_VMALLOC is not set # CONFIG_VXFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set @@ -2426,8 +2597,10 @@ CONFIG_SMB_FS=m CONFIG_CIFS=m CONFIG_CIFS_STATS=y CONFIG_CIFS_STATS2=y +# CONFIG_CIFS_WEAK_PW_HASH is not set CONFIG_CIFS_XATTR=y # CONFIG_CIFS_POSIX is not set +# CONFIG_CIFS_DEBUG2 is not set # CONFIG_CIFS_EXPERIMENTAL is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set @@ -2484,6 +2657,11 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set + # # Instrumentation Support # @@ -2493,14 +2671,20 @@ CONFIG_NLS_UTF8=y # # Kernel hacking # +CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_MUST_CHECK=y # CONFIG_MAGIC_SYSRQ is not set +CONFIG_UNUSED_SYMBOLS=y +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set # CONFIG_DEBUG_KERNEL is not set CONFIG_LOG_BUF_SHIFT=15 CONFIG_DEBUG_BUGVERBOSE=y CONFIG_EARLY_PRINTK=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y +CONFIG_DOUBLEFAULT=y # # Security options @@ -2512,7 +2696,12 @@ CONFIG_X86_MPPARSE=y # Cryptographic options # CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=m @@ -2521,9 +2710,15 @@ CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m # CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_GF128MUL is not set +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_CBC=m +# CONFIG_CRYPTO_LRW is not set CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m +CONFIG_CRYPTO_TWOFISH_586=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_AES_586=m @@ -2534,7 +2729,7 @@ CONFIG_CRYPTO_ARC4=m # CONFIG_CRYPTO_KHAZAD is not set # CONFIG_CRYPTO_ANUBIS is not set CONFIG_CRYPTO_DEFLATE=y -# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_CRC32C=m # CONFIG_CRYPTO_TEST is not set @@ -2542,21 +2737,27 @@ CONFIG_CRYPTO_CRC32C=m # Hardware crypto devices # CONFIG_CRYPTO_DEV_PADLOCK=m -CONFIG_CRYPTO_DEV_PADLOCK_AES=y +CONFIG_CRYPTO_DEV_PADLOCK_AES=m +CONFIG_CRYPTO_DEV_PADLOCK_SHA=m +CONFIG_CRYPTO_DEV_GEODE=m # # Library routines # +CONFIG_BITREVERSE=y CONFIG_CRC_CCITT=m CONFIG_CRC16=m CONFIG_CRC32=y CONFIG_LIBCRC32C=m +CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_TEXTSEARCH=y CONFIG_TEXTSEARCH_KMP=m CONFIG_TEXTSEARCH_BM=m CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_IOMAP_COPY=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y diff --git a/config/rootfiles/common/klibc b/config/rootfiles/common/klibc index 62a7d5ec6..e85a94a44 100644 --- a/config/rootfiles/common/klibc +++ b/config/rootfiles/common/klibc @@ -1,4 +1,4 @@ -lib/klibc-pFXBYGYRiBAAryaZ7Zw_ISKpcTk.so +lib/klibc-a0tNUCyBK7E4egoFt3zvXR0uOfI.so usr/bin/klcc #usr/lib/klibc #usr/lib/klibc/bin @@ -1566,7 +1566,7 @@ usr/lib/klibc/bin/zcat #usr/lib/klibc/lib #usr/lib/klibc/lib/crt0.o #usr/lib/klibc/lib/interp.o -usr/lib/klibc/lib/klibc-pFXBYGYRiBAAryaZ7Zw_ISKpcTk.so +usr/lib/klibc/lib/klibc-a0tNUCyBK7E4egoFt3zvXR0uOfI.so #usr/lib/klibc/lib/libc.a usr/lib/klibc/lib/libc.so #usr/man/man1/klcc.1 diff --git a/config/rootfiles/core/1/files b/config/rootfiles/common/oldcore1 similarity index 100% rename from config/rootfiles/core/1/files rename to config/rootfiles/common/oldcore1 diff --git a/config/rootfiles/core/10/files b/config/rootfiles/common/oldcore10 similarity index 63% rename from config/rootfiles/core/10/files rename to config/rootfiles/common/oldcore10 index f236d4039..771d2c972 100644 --- a/config/rootfiles/core/10/files +++ b/config/rootfiles/common/oldcore10 @@ -1,5 +1,5 @@ etc/collectd.conf -etc/init.d/collectd +etc/rc.d/init.d/collectd usr/local/bin/wirelessctrl srv/web/ipfire/cgi-bin/optionsfw.cgi srv/web/ipfire/cgi-bin/gui.cgi @@ -8,5 +8,3 @@ srv/web/ipfire/cgi-bin/services.cgi srv/web/ipfire/cgi-bin/graphs.cgi var/ipfire/langs var/ipfire/graphs.pl -lib/modules/2.6.16.57-ipfire/kernel/drivers/parport/pt_drv.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/parport/pt_drv.ko diff --git a/config/rootfiles/core/11/files b/config/rootfiles/common/oldcore11 similarity index 100% rename from config/rootfiles/core/11/files rename to config/rootfiles/common/oldcore11 diff --git a/config/rootfiles/common/oldcore12 b/config/rootfiles/common/oldcore12 new file mode 100644 index 000000000..ef20f11e0 --- /dev/null +++ b/config/rootfiles/common/oldcore12 @@ -0,0 +1,11 @@ +etc/udev/dvb.sh +etc/udev/rules.d/10-dvb.rules +srv/web/ipfire/cgi-bin/services.cgi +srv/web/ipfire/cgi-bin/proxy.cgi +srv/web/ipfire/cgi-bin/urlfilter.cgi +srv/web/ipfire/cgi-bin/updatexlrator.cgi +srv/web/ipfire/cgi-bin/pakfire.cgi +srv/web/ipfire/cgi-bin/logs.cgi/log.dat +opt/pakfire/lib/functions.pl +var/ipfire/langs +etc/sysconfig/modules diff --git a/config/rootfiles/core/13/files b/config/rootfiles/common/oldcore13 similarity index 100% rename from config/rootfiles/core/13/files rename to config/rootfiles/common/oldcore13 diff --git a/config/rootfiles/common/oldcore14 b/config/rootfiles/common/oldcore14 new file mode 100644 index 000000000..544126cec --- /dev/null +++ b/config/rootfiles/common/oldcore14 @@ -0,0 +1,25 @@ +var/ipfire/langs +etc/squid +usr/lib/squid +usr/sbin/squid +srv/web/ipfire/cgi-bin/updatexlrator.cgi +srv/web/ipfire/html/images/updxl-src-avg.gif +usr/sbin/updxlrator +srv/web/ipfire/html/images/audio-volume-high.png +srv/web/ipfire/html/images/audio-volume-low.png +srv/web/ipfire/html/images/audio-x-generic.png +srv/web/ipfire/html/images/audio-volume-high-red.png +srv/web/ipfire/html/images/audio-volume-low-red.png +srv/web/ipfire/html/images/audio-x-generic-red.png +srv/web/ipfire/html/images/media-flash.png +srv/web/ipfire/html/images/media-playback-start-all.png +srv/web/ipfire/html/images/media-repeat.png +srv/web/ipfire/html/images/media-skip-backward.png +srv/web/ipfire/html/images/media-floppy.png +srv/web/ipfire/html/images/media-playback-start.png +srv/web/ipfire/html/images/media-resume.png +srv/web/ipfire/html/images/media-skip-forward.png +srv/web/ipfire/html/images/media-optical.png +srv/web/ipfire/html/images/media-playback-stop.png +srv/web/ipfire/html/images/media-shuffle.png +usr/share/curl/curl-ca-bundle.crt diff --git a/config/rootfiles/core/2/files b/config/rootfiles/common/oldcore2 similarity index 100% rename from config/rootfiles/core/2/files rename to config/rootfiles/common/oldcore2 diff --git a/config/rootfiles/core/3/files b/config/rootfiles/common/oldcore3 similarity index 100% rename from config/rootfiles/core/3/files rename to config/rootfiles/common/oldcore3 diff --git a/config/rootfiles/core/4/files b/config/rootfiles/common/oldcore4 similarity index 94% rename from config/rootfiles/core/4/files rename to config/rootfiles/common/oldcore4 index 586d315b4..dd9e12f23 100644 --- a/config/rootfiles/core/4/files +++ b/config/rootfiles/common/oldcore4 @@ -1,4 +1,4 @@ -etc/init.d/firewall +etc/rc.d/init.d/firewall usr/local/bin/makegraphs var/ipfire/langs var/ipfire/optionsfw/settings diff --git a/config/rootfiles/common/oldcore5 b/config/rootfiles/common/oldcore5 new file mode 100644 index 000000000..bd8bff956 --- /dev/null +++ b/config/rootfiles/common/oldcore5 @@ -0,0 +1,6 @@ +lib/modules/KVER-ipfire/kernel/fs/nfsd/nfsd.ko +lib/modules/KVER-ipfire-smp/kernel/fs/nfsd/nfsd.ko +usr/local/bin/qosctrl +etc/rc.d/init.d/squid +var/ipfire/langs +var/ipfire/graphs.pl diff --git a/config/rootfiles/common/oldcore6 b/config/rootfiles/common/oldcore6 new file mode 100644 index 000000000..71302132c --- /dev/null +++ b/config/rootfiles/common/oldcore6 @@ -0,0 +1,7 @@ +srv/web/ipfire/cgi-bin/outgoingfw.cgi +etc/rc.d/init.d/squid +#bin/ntfs-3g +#lib/libntfs-3g.so +#lib/libntfs-3g.so.21 +#lib/libntfs-3g.so.21.0.0 +#sbin/mount.ntfs-3g diff --git a/config/rootfiles/core/7/files b/config/rootfiles/common/oldcore7 similarity index 78% rename from config/rootfiles/core/7/files rename to config/rootfiles/common/oldcore7 index 0d6d78ddd..3c4d08a6a 100644 --- a/config/rootfiles/core/7/files +++ b/config/rootfiles/common/oldcore7 @@ -1,11 +1,11 @@ etc/collectd.conf -etc/init.d/collectd +etc/rc.d/init.d/collectd etc/rc.d/rc0.d/K50collectd etc/rc.d/rc3.d/S20collectd etc/rc.d/rc6.d/K50collectd etc/rc.d/rc3.d/S19smartenabler -etc/init.d/smartenabler -etc/init.d/networking/red +etc/rc.d/init.d/smartenabler +etc/rc.d/init.d/networking/red srv/web/ipfire/cgi-bin/graphs.cgi srv/web/ipfire/cgi-bin/hardwaregraphs.cgi srv/web/ipfire/cgi-bin/network.cgi @@ -18,5 +18,5 @@ usr/sbin/collectd var/ipfire/graphs.pl var/ipfire/menu.d/20-status.menu var/ipfire/langs -lib/modules/2.6.16.57-ipfire/kernel/drivers/ieee1394/sbp2.ko +lib/modules/KVER-ipfire/kernel/drivers/ieee1394/sbp2.ko srv/web/ipfire/cgi-bin/logs.cgi/log.dat diff --git a/config/rootfiles/common/oldcore8 b/config/rootfiles/common/oldcore8 new file mode 100644 index 000000000..eda575727 --- /dev/null +++ b/config/rootfiles/common/oldcore8 @@ -0,0 +1,3 @@ +srv/web/ipfire/cgi-bin/logs.cgi/log.dat +etc/ntp +usr/local/bin/wirelessctrl diff --git a/config/rootfiles/core/9/files b/config/rootfiles/common/oldcore9 similarity index 77% rename from config/rootfiles/core/9/files rename to config/rootfiles/common/oldcore9 index 6756a222a..808cd38b5 100644 --- a/config/rootfiles/core/9/files +++ b/config/rootfiles/common/oldcore9 @@ -1,10 +1,8 @@ -lib/modules/2.6.16.57-ipfire/misc/kqemu.ko -lib/modules/2.6.16.57-ipfire-smp/misc/kqemu.ko sbin/hdparm usr/lib/collectd usr/sbin/collectd usr/sbin/collectdmon -etc/init.d/networking/red +etc/rc.d/init.d/networking/red etc/squid sbin/capiinit usr/bin/capiinfo diff --git a/config/rootfiles/core/1/meta b/config/rootfiles/core/1/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/1/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/1/update.sh b/config/rootfiles/core/1/update.sh deleted file mode 100644 index 465a6deb0..000000000 --- a/config/rootfiles/core/1/update.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh - -extract_files -mv /srv/web/ipfire/html/updatecache /var/ -/etc/init.d/squid restart diff --git a/config/rootfiles/core/10/meta b/config/rootfiles/core/10/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/10/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/10/update.sh b/config/rootfiles/core/10/update.sh deleted file mode 100644 index a5b3c62f0..000000000 --- a/config/rootfiles/core/10/update.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -/usr/local/bin/backupctrl exclude >/dev/null 2>&1 -extract_files -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" -depmod -a -/etc/init.d/collectd restart diff --git a/config/rootfiles/core/11/meta b/config/rootfiles/core/11/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/11/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/11/update.sh b/config/rootfiles/core/11/update.sh deleted file mode 100644 index 7fcbd6409..000000000 --- a/config/rootfiles/core/11/update.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -/usr/local/bin/backupctrl exclude >/dev/null 2>&1 -/etc/init.d/squid stop -extract_files -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" -squidGuard -d -C all -chmod 666 /var/ipfire/urlfilter/blacklists/*/*.db -/etc/init.d/squid start diff --git a/config/rootfiles/core/12/files b/config/rootfiles/core/12/files deleted file mode 100644 index 82759d82e..000000000 --- a/config/rootfiles/core/12/files +++ /dev/null @@ -1,137 +0,0 @@ -lib/modules/2.6.16.57-ipfire/kernel/drivers/i2c/i2c-core.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/i2c/algos/i2c-algo-bit.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/saa7146.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/saa7146_vv.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget-ci.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget-patch.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/ttpci-eeprom.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/dvb-ttpci.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget-av.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget-core.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttusb-dec/ttusbdecfe.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttusb-dec/ttusb_dec.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-cxusb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-digitv.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-common.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-umt-010.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-mb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-mc.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-nova-t-usb2.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-vp7045.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-a800.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dtt200u.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-vp702x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/pluto2/pluto2.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/cinergyT2/cinergyT2.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/b2c2/b2c2-flexcop-pci.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/b2c2/b2c2-flexcop.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/b2c2/b2c2-flexcop-usb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/s5h1420.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dib3000mb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/mt352.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/cx22702.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/ves1x93.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/or51132.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda10021.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/or51211.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/cx24123.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/sp8870.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/nxt6000.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda1004x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/mt312.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda8083.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/nxt200x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/cx24110.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/stv0299.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/l64781.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dvb-pll.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/ves1820.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/sp887x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dib3000mc.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/bcm3510.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/cx22700.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/lgdt330x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/stv0297.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-core/dvb-core.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/compat_ioctl32.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/videodev.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/v4l1-compat.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/v4l2-common.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/i2c/i2c-core.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/i2c/algos/i2c-algo-bit.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/saa7146.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/saa7146_vv.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget-ci.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget-patch.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/ttpci-eeprom.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/dvb-ttpci.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget-av.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget-core.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttusb-dec/ttusbdecfe.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttusb-dec/ttusb_dec.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-cxusb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-digitv.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-common.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-umt-010.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-mb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-mc.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-nova-t-usb2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-vp7045.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-a800.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dtt200u.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-vp702x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/pluto2/pluto2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/cinergyT2/cinergyT2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/b2c2/b2c2-flexcop-pci.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/b2c2/b2c2-flexcop.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/b2c2/b2c2-flexcop-usb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/s5h1420.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dib3000mb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/mt352.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/cx22702.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/ves1x93.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/or51132.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda10021.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/or51211.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/cx24123.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/sp8870.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/nxt6000.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda1004x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/mt312.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda8083.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/nxt200x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/cx24110.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/stv0299.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/l64781.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dvb-pll.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/ves1820.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/sp887x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dib3000mc.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/bcm3510.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/cx22700.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/lgdt330x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/stv0297.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-core/dvb-core.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/compat_ioctl32.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/videodev.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/v4l1-compat.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/v4l2-common.ko -etc/udev/dvb.sh -etc/udev/rules.d/10-dvb.rules -srv/web/ipfire/cgi-bin/services.cgi -lib/modules/2.6.16.57-ipfire/kernel/drivers/usb/net/dm9601.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/usb/net/dm9601.ko -srv/web/ipfire/cgi-bin/proxy.cgi -srv/web/ipfire/cgi-bin/urlfilter.cgi -srv/web/ipfire/cgi-bin/updatexlrator.cgi -srv/web/ipfire/cgi-bin/pakfire.cgi -srv/web/ipfire/cgi-bin/logs.cgi/log.dat -opt/pakfire/lib/functions.pl -var/ipfire/langs -etc/sysconfig/modules diff --git a/config/rootfiles/core/12/meta b/config/rootfiles/core/12/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/12/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/12/update.sh b/config/rootfiles/core/12/update.sh deleted file mode 100644 index be912fcf0..000000000 --- a/config/rootfiles/core/12/update.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -/usr/local/bin/backupctrl exclude >/dev/null 2>&1 -extract_files -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" -depmod -a 2.6.16.57-ipfire -depmod -a 2.6.16.57-ipfire-smp - diff --git a/config/rootfiles/core/13/meta b/config/rootfiles/core/13/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/13/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/13/update.sh b/config/rootfiles/core/13/update.sh deleted file mode 100644 index 35cbba98e..000000000 --- a/config/rootfiles/core/13/update.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -/usr/local/bin/backupctrl exclude >/dev/null 2>&1 -extract_files -/var/ipfire/urlfilter/bin/prebuild.pl -/etc/init.d/squid restart diff --git a/config/rootfiles/core/14/files b/config/rootfiles/core/14/files deleted file mode 100644 index 5af1e3dc4..000000000 --- a/config/rootfiles/core/14/files +++ /dev/null @@ -1,297 +0,0 @@ -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/ir-common.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/saa7146_vv.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tea5761.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/mt2131.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tuner-xc2028.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/xc5000.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tuner-types.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/qt1010.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tuner-simple.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tda8290.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tda18271.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tda827x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/mt2266.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tda9887.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/tea5767.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/mt20xx.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/tuners/mt2060.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/common/saa7146.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/pluto2/pluto2.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/b2c2/b2c2-flexcop-pci.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/b2c2/b2c2-flexcop.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/b2c2/b2c2-flexcop-usb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/cinergyT2/cinergyT2.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-au6610.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-mc.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-cxusb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-gp8psk.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-opera.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-vp702x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-vp7045.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-ttusb2.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-digitv.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-umt-010.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-nova-t-usb2.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-mb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-af9005.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-m920x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dtt200u.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-a800.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-common.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-af9005-remote.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-gl861.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dib0700.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/sp8870.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dib3000mc.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dib7000p.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda826x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/isl6405.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/ves1x93.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/or51132.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/isl6421.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/mt352.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda10023.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/cx24123.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/drx397xD.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/cx24110.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/s5h1409.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dib0070.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dibx000_common.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/cx22700.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/stv0297.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/nxt6000.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/stv0299.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/au8522.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tua6100.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/s5h1411.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/mt312.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dvb-pll.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/lgdt330x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/bcm3510.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dib3000mb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/itd1000.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/l64781.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/nxt200x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda10048.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/ves1820.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dib7000m.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/lnbp21.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda10086.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/zl10353.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/s5h1420.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda10021.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/cx22702.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/or51211.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/sp887x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda1004x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/tda8083.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget-core.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/ttpci-eeprom.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/dvb-ttpci.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget-ci.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget-av.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttpci/budget-patch.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/dvb-core/dvb-core.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttusb-dec/ttusbdecfe.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/ttusb-dec/ttusb_dec.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/bt8xx/dst_ca.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/bt8xx/dvb-bt8xx.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/bt8xx/bt878.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/bt8xx/dst.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/msp3400.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/v4l1-compat.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/videobuf-dvb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/ir-kbd-i2c.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/cs53l32a.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/btcx-risc.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/cx2341x.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/v4l2-common.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/tda9875.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/videobuf-dma-sg.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/cx88/cx88-dvb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/cx88/cx8802.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/cx88/cx88xx.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/cx88/cx88-vp3054-i2c.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/cx88/cx8800.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/v4l2-int-device.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/videodev.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/wm8775.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/videobuf-core.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/pvrusb2/pvrusb2.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/compat_ioctl32.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/tvaudio.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/cx25840/cx25840.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/saa7115.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/tda7432.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/bt8xx/bttv.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/tveeprom.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/saa7134 -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/saa7134/saa6752hs.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/saa7134/saa7134-dvb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/saa7134/saa7134-empress.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/saa7134/saa7134.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/tuner.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/ir-common.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/saa7146_vv.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tea5761.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/mt2131.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tuner-xc2028.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/xc5000.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tuner-types.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/qt1010.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tuner-simple.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tda8290.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tda18271.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tda827x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/mt2266.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tda9887.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/tea5767.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/mt20xx.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/tuners/mt2060.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/common/saa7146.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/pluto2/pluto2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/b2c2/b2c2-flexcop-pci.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/b2c2/b2c2-flexcop.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/b2c2/b2c2-flexcop-usb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/cinergyT2/cinergyT2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-au6610.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-mc.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-cxusb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-gp8psk.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-opera.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-vp702x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-vp7045.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-ttusb2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-digitv.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-umt-010.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-nova-t-usb2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-mb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-af9005.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-m920x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dtt200u.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-a800.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dibusb-common.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-af9005-remote.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-gl861.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-usb/dvb-usb-dib0700.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/sp8870.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dib3000mc.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dib7000p.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda826x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/isl6405.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/ves1x93.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/or51132.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/isl6421.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/mt352.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda10023.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/cx24123.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/drx397xD.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/cx24110.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/s5h1409.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dib0070.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dibx000_common.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/cx22700.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/stv0297.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/nxt6000.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/stv0299.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/au8522.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tua6100.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/s5h1411.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/mt312.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dvb-pll.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/lgdt330x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/bcm3510.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dib3000mb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/itd1000.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/l64781.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/nxt200x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda10048.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/ves1820.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dib7000m.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/lnbp21.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda10086.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/zl10353.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/s5h1420.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda10021.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/cx22702.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/or51211.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/sp887x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda1004x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/tda8083.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget-core.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/ttpci-eeprom.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/dvb-ttpci.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget-ci.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget-av.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttpci/budget-patch.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/dvb-core/dvb-core.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttusb-dec/ttusbdecfe.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/ttusb-dec/ttusb_dec.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/bt8xx/dst_ca.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/bt8xx/dvb-bt8xx.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/bt8xx/bt878.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/bt8xx/dst.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/msp3400.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/v4l1-compat.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/videobuf-dvb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/ir-kbd-i2c.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/cs53l32a.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/btcx-risc.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/cx2341x.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/v4l2-common.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/tda9875.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/videobuf-dma-sg.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/cx88/cx88-dvb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/cx88/cx8802.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/cx88/cx88xx.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/cx88/cx88-vp3054-i2c.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/cx88/cx8800.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/v4l2-int-device.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/videodev.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/wm8775.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/videobuf-core.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/pvrusb2/pvrusb2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/compat_ioctl32.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/tvaudio.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/cx25840/cx25840.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/saa7115.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/tda7432.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/bt8xx/bttv.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/tveeprom.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/saa7134 -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/saa7134/saa6752hs.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/saa7134/saa7134-dvb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/saa7134/saa7134-empress.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/saa7134/saa7134.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/tuner.ko -var/ipfire/langs -etc/squid -usr/lib/squid -usr/sbin/squid -srv/web/ipfire/cgi-bin/updatexlrator.cgi -srv/web/ipfire/html/images/updxl-src-avg.gif -usr/sbin/updxlrator -srv/web/ipfire/html/images/audio-volume-high.png -srv/web/ipfire/html/images/audio-volume-low.png -srv/web/ipfire/html/images/audio-x-generic.png -srv/web/ipfire/html/images/audio-volume-high-red.png -srv/web/ipfire/html/images/audio-volume-low-red.png -srv/web/ipfire/html/images/audio-x-generic-red.png -srv/web/ipfire/html/images/media-flash.png -srv/web/ipfire/html/images/media-playback-start-all.png -srv/web/ipfire/html/images/media-repeat.png -srv/web/ipfire/html/images/media-skip-backward.png -srv/web/ipfire/html/images/media-floppy.png -srv/web/ipfire/html/images/media-playback-start.png -srv/web/ipfire/html/images/media-resume.png -srv/web/ipfire/html/images/media-skip-forward.png -srv/web/ipfire/html/images/media-optical.png -srv/web/ipfire/html/images/media-playback-stop.png -srv/web/ipfire/html/images/media-shuffle.png -usr/share/curl/curl-ca-bundle.crt diff --git a/config/rootfiles/core/14/meta b/config/rootfiles/core/14/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/14/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/14/update.sh b/config/rootfiles/core/14/update.sh deleted file mode 100644 index dafea5700..000000000 --- a/config/rootfiles/core/14/update.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -/usr/local/bin/backupctrl exclude >/dev/null 2>&1 -rm /lib/modules/2.6.16.57-ipfire/kernel/drivers/media/video/video-buf.ko -rm /lib/modules/2.6.16.57-ipfire/kernel/drivers/media/dvb/frontends/dib3000-common.ko -rm /lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/video/video-buf.ko -rm /lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/media/dvb/frontends/dib3000-common.ko -/etc/init.d/squid stop -extract_files -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" -/etc/init.d/squid start -depmod -a 2.6.16.57-ipfire -depmod -a 2.6.16.57-ipfire-smp diff --git a/config/rootfiles/core/2/meta b/config/rootfiles/core/2/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/2/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/2/update.sh b/config/rootfiles/core/2/update.sh deleted file mode 100644 index 2b2622a86..000000000 --- a/config/rootfiles/core/2/update.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -mv /etc/sysconfig/rc.local /etc/sysconfig/rc.local.old -extract_files -if [ -e "/var/ipfire/qos/enable" ]; then - /usr/local/bin/qosctrl stop - /usr/local/bin/qosctrl generate - /usr/local/bin/qosctrl start -fi -/usr/local/bin/outgoingfwctrl restart diff --git a/config/rootfiles/core/3/meta b/config/rootfiles/core/3/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/3/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/3/update.sh b/config/rootfiles/core/3/update.sh deleted file mode 100644 index 0eba4bc92..000000000 --- a/config/rootfiles/core/3/update.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -extract_files diff --git a/config/rootfiles/core/4/meta b/config/rootfiles/core/4/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/4/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/4/update.sh b/config/rootfiles/core/4/update.sh deleted file mode 100644 index 19c61575d..000000000 --- a/config/rootfiles/core/4/update.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -extract_files -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" diff --git a/config/rootfiles/core/5/files b/config/rootfiles/core/5/files deleted file mode 100644 index a49abc806..000000000 --- a/config/rootfiles/core/5/files +++ /dev/null @@ -1,6 +0,0 @@ -lib/modules/2.6.16.57-ipfire/kernel/fs/nfsd/nfsd.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/fs/nfsd/nfsd.ko -usr/local/bin/qosctrl -etc/init.d/squid -var/ipfire/langs -var/ipfire/graphs.pl diff --git a/config/rootfiles/core/5/meta b/config/rootfiles/core/5/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/5/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/5/update.sh b/config/rootfiles/core/5/update.sh deleted file mode 100644 index ab25cabd0..000000000 --- a/config/rootfiles/core/5/update.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -extract_files -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" -depmod -a diff --git a/config/rootfiles/core/6/files b/config/rootfiles/core/6/files deleted file mode 100644 index 6a52d2728..000000000 --- a/config/rootfiles/core/6/files +++ /dev/null @@ -1,19 +0,0 @@ -lib/modules/2.6.16.57-ipfire/kernel/fs/fuse -lib/modules/2.6.16.57-ipfire-smp/kernel/fs/fuse -lib/modules/2.6.16.57-ipfire/kernel/drivers/net/r8169.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/net/r8168.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/usb/net/mcs7830.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/ieee1394/sbp2.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/net/r8169.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/net/r8168.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/usb/net/mcs7830.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/ieee1394/sbp2.ko -/sbin/kudzu -var/ipfire/langs -srv/web/ipfire/cgi-bin/outgoingfw.cgi -etc/init.d/squid -#bin/ntfs-3g -#lib/libntfs-3g.so -#lib/libntfs-3g.so.21 -#lib/libntfs-3g.so.21.0.0 -#sbin/mount.ntfs-3g diff --git a/config/rootfiles/core/6/meta b/config/rootfiles/core/6/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/6/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/6/update.sh b/config/rootfiles/core/6/update.sh deleted file mode 100644 index 621a4cdd0..000000000 --- a/config/rootfiles/core/6/update.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -extract_files -depmod -a -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" diff --git a/config/rootfiles/core/7/meta b/config/rootfiles/core/7/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/7/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/7/update.sh b/config/rootfiles/core/7/update.sh deleted file mode 100644 index a41f73b4f..000000000 --- a/config/rootfiles/core/7/update.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -extract_files -depmod -a -GATEWAY=$(cat /var/ipfire/red/remote-ipaddress) -echo "$GATEWAY gateway" >> /etc/hosts -/etc/init.d/collectd start -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" diff --git a/config/rootfiles/core/8/files b/config/rootfiles/core/8/files deleted file mode 100644 index 989570be0..000000000 --- a/config/rootfiles/core/8/files +++ /dev/null @@ -1,7 +0,0 @@ -lib/modules/2.6.16.57-ipfire/kernel/drivers/net/r8169.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/net/r8168.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/net/r8169.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/net/r8168.ko -srv/web/ipfire/cgi-bin/logs.cgi/log.dat -etc/ntp -usr/local/bin/wirelessctrl diff --git a/config/rootfiles/core/8/meta b/config/rootfiles/core/8/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/8/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/8/update.sh b/config/rootfiles/core/8/update.sh deleted file mode 100644 index c1952ecd8..000000000 --- a/config/rootfiles/core/8/update.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -/usr/local/bin/backupctrl exclude >/dev/null 2>&1 -extract_files -depmod -a - diff --git a/config/rootfiles/core/9/meta b/config/rootfiles/core/9/meta deleted file mode 100644 index d547fa86f..000000000 --- a/config/rootfiles/core/9/meta +++ /dev/null @@ -1 +0,0 @@ -DEPS="" diff --git a/config/rootfiles/core/9/update.sh b/config/rootfiles/core/9/update.sh deleted file mode 100644 index 16cffb3aa..000000000 --- a/config/rootfiles/core/9/update.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -. /opt/pakfire/lib/functions.sh -/usr/local/bin/backupctrl exclude >/dev/null 2>&1 -/etc/init.d/squid stop -/etc/init.d/collectd stop -extract_files -/etc/init.d/squid start -/etc/init.d/collectd start -depmod -a -perl -e "require '/var/ipfire/lang.pl'; &Lang::BuildCacheLang" diff --git a/config/rootfiles/packages/pcmciautils b/config/rootfiles/packages/pcmciautils index da04ffb08..53efe4636 100644 --- a/config/rootfiles/packages/pcmciautils +++ b/config/rootfiles/packages/pcmciautils @@ -7,22 +7,22 @@ sbin/pcmcia-check-broken-cis sbin/pcmcia-socket-startup #usr/share/man/man8/lspcmcia.8 #usr/share/man/man8/pccardctl.8 -lib/modules/2.6.16.57-ipfire/kernel/drivers/char/pcmcia -lib/modules/2.6.16.57-ipfire/kernel/drivers/ide/legacy/ide-cs.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/net/pcmcia -lib/modules/2.6.16.57-ipfire/kernel/drivers/net/tulip/xircom_cb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/net/tulip/xircom_tulip_cb.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/scsi/pcmcia -lib/modules/2.6.16.57-ipfire/kernel/drivers/parport/parport_cs.ko -lib/modules/2.6.16.57-ipfire/kernel/drivers/pcmcia -lib/modules/2.6.16.57-ipfire/kernel/drivers/serial/serial_cs.ko -lib/modules/2.6.16.57-ipfire/kernel/sound/pcmcia -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/char/pcmcia -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/ide/legacy/ide-cs.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/net/pcmcia -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/net/tulip/xircom_cb.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/scsi/pcmcia -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/parport/parport_cs.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/pcmcia -lib/modules/2.6.16.57-ipfire-smp/kernel/drivers/serial/serial_cs.ko -lib/modules/2.6.16.57-ipfire-smp/kernel/sound/pcmcia +lib/modules/KVER-ipfire/kernel/drivers/char/pcmcia +lib/modules/KVER-ipfire/kernel/drivers/ide/legacy/ide-cs.ko +lib/modules/KVER-ipfire/kernel/drivers/net/pcmcia +lib/modules/KVER-ipfire/kernel/drivers/net/tulip/xircom_cb.ko +lib/modules/KVER-ipfire/kernel/drivers/net/tulip/xircom_tulip_cb.ko +lib/modules/KVER-ipfire/kernel/drivers/scsi/pcmcia +lib/modules/KVER-ipfire/kernel/drivers/parport/parport_cs.ko +lib/modules/KVER-ipfire/kernel/drivers/pcmcia +lib/modules/KVER-ipfire/kernel/drivers/serial/serial_cs.ko +lib/modules/KVER-ipfire/kernel/sound/pcmcia +lib/modules/KVER-ipfire-smp/kernel/drivers/char/pcmcia +lib/modules/KVER-ipfire-smp/kernel/drivers/ide/legacy/ide-cs.ko +lib/modules/KVER-ipfire-smp/kernel/drivers/net/pcmcia +lib/modules/KVER-ipfire-smp/kernel/drivers/net/tulip/xircom_cb.ko +lib/modules/KVER-ipfire-smp/kernel/drivers/scsi/pcmcia +lib/modules/KVER-ipfire-smp/kernel/drivers/parport/parport_cs.ko +lib/modules/KVER-ipfire-smp/kernel/drivers/pcmcia +lib/modules/KVER-ipfire-smp/kernel/drivers/serial/serial_cs.ko +lib/modules/KVER-ipfire-smp/kernel/sound/pcmcia diff --git a/config/v4l-dvb/config b/config/v4l-dvb/config index cc895a8a4..50bffdb80 100644 --- a/config/v4l-dvb/config +++ b/config/v4l-dvb/config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: -# Tue May 13 21:13:59 2008 +# Sat May 17 18:34:26 2008 # CONFIG_INPUT=y CONFIG_USB=m @@ -12,7 +12,7 @@ CONFIG_FW_LOADER=m # CONFIG_of is not set # CONFIG_PLAT_M32700UT is not set CONFIG_NET=y -CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_COPYAREA=m # CONFIG_GENERIC_GPIO is not set # CONFIG_SOUND_PRIME is not set CONFIG_SND_AC97_CODEC=m @@ -20,7 +20,7 @@ CONFIG_SND_AC97_CODEC=m # CONFIG_dependencies is not set # CONFIG_SGI_IP22 is not set CONFIG_I2C=m -CONFIG_FB_CFB_IMAGEBLIT=y +CONFIG_FB_CFB_IMAGEBLIT=m # CONFIG_GPIO_PCA953X is not set # CONFIG_STANDALONE is not set CONFIG_HOTPLUG=y @@ -51,7 +51,7 @@ CONFIG_SND_PCM=m CONFIG_EXPERIMENTAL=y # CONFIG_M32R is not set # CONFIG_I2C_ALGO_SGI is not set -CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_FILLRECT=m CONFIG_VIRT_TO_BUS=y # CONFIG_VIDEO_KERNEL_VERSION is not set @@ -91,11 +91,12 @@ CONFIG_MEDIA_TUNER_MT2131=m CONFIG_MEDIA_TUNER_QT1010=m CONFIG_MEDIA_TUNER_XC2028=m CONFIG_MEDIA_TUNER_XC5000=m -# CONFIG_MEDIA_TUNER_MXL5005S is not set +CONFIG_MEDIA_TUNER_MXL5005S=m CONFIG_VIDEO_V4L2=m CONFIG_VIDEO_V4L1=m CONFIG_VIDEOBUF_GEN=m CONFIG_VIDEOBUF_DMA_SG=m +CONFIG_VIDEOBUF_VMALLOC=m CONFIG_VIDEOBUF_DVB=m CONFIG_VIDEO_BTCX=m CONFIG_VIDEO_IR_I2C=m @@ -103,67 +104,123 @@ CONFIG_VIDEO_IR=m CONFIG_VIDEO_TVEEPROM=m CONFIG_VIDEO_TUNER=m CONFIG_VIDEO_CAPTURE_DRIVERS=y -CONFIG_VIDEO_ADV_DEBUG=n +# CONFIG_VIDEO_ADV_DEBUG is not set CONFIG_VIDEO_HELPER_CHIPS_AUTO=y CONFIG_VIDEO_TVAUDIO=m CONFIG_VIDEO_TDA7432=m +CONFIG_VIDEO_TDA9840=m CONFIG_VIDEO_TDA9875=m +CONFIG_VIDEO_TEA6415C=m +CONFIG_VIDEO_TEA6420=m CONFIG_VIDEO_MSP3400=m +CONFIG_VIDEO_CS5345=m CONFIG_VIDEO_CS53L32A=m +CONFIG_VIDEO_M52790=m CONFIG_VIDEO_WM8775=m +CONFIG_VIDEO_WM8739=m +CONFIG_VIDEO_VP27SMPX=m +CONFIG_VIDEO_BT819=m +CONFIG_VIDEO_BT856=m +CONFIG_VIDEO_KS0127=m +CONFIG_VIDEO_OV7670=m +CONFIG_VIDEO_SAA7110=m +CONFIG_VIDEO_SAA7111=m +CONFIG_VIDEO_SAA7114=m CONFIG_VIDEO_SAA711X=m +CONFIG_VIDEO_SAA717X=m +CONFIG_VIDEO_TVP5150=m +CONFIG_VIDEO_VPX3220=m CONFIG_VIDEO_CX25840=m CONFIG_VIDEO_CX2341X=m +CONFIG_VIDEO_SAA7127=m +CONFIG_VIDEO_SAA7185=m +CONFIG_VIDEO_ADV7170=m +CONFIG_VIDEO_ADV7175=m +CONFIG_VIDEO_UPD64031A=m +CONFIG_VIDEO_UPD64083=m # CONFIG_VIDEO_VIVI is not set CONFIG_VIDEO_BT848=m CONFIG_VIDEO_BT848_DVB=y -# CONFIG_VIDEO_SAA6588 is not set -# CONFIG_VIDEO_PMS is not set -# CONFIG_VIDEO_BWQCAM is not set -# CONFIG_VIDEO_CQCAM is not set -# CONFIG_VIDEO_CPIA is not set -# CONFIG_VIDEO_CPIA2 is not set -# CONFIG_VIDEO_SAA5246A is not set -# CONFIG_VIDEO_SAA5249 is not set -# CONFIG_TUNER_3036 is not set -# CONFIG_VIDEO_STRADIS is not set -# CONFIG_VIDEO_ZORAN is not set +CONFIG_VIDEO_SAA6588=m +CONFIG_VIDEO_PMS=m +CONFIG_VIDEO_BWQCAM=m +CONFIG_VIDEO_CQCAM=m +CONFIG_VIDEO_CPIA=m +CONFIG_VIDEO_CPIA_USB=m +CONFIG_VIDEO_CPIA2=m +CONFIG_VIDEO_SAA5246A=m +CONFIG_VIDEO_SAA5249=m +CONFIG_TUNER_3036=m +CONFIG_VIDEO_STRADIS=m +CONFIG_VIDEO_ZORAN_ZR36060=m +CONFIG_VIDEO_ZORAN=m +CONFIG_VIDEO_ZORAN_BUZ=m +CONFIG_VIDEO_ZORAN_DC10=m +CONFIG_VIDEO_ZORAN_DC30=m +CONFIG_VIDEO_ZORAN_LML33=m +CONFIG_VIDEO_ZORAN_LML33R10=m +CONFIG_VIDEO_ZORAN_AVS6EYES=m CONFIG_VIDEO_SAA7134=m -# CONFIG_VIDEO_SAA7134_ALSA is not set +CONFIG_VIDEO_SAA7134_ALSA=m CONFIG_VIDEO_SAA7134_DVB=m -# CONFIG_VIDEO_MXB is not set -# CONFIG_VIDEO_DPC is not set -# CONFIG_VIDEO_HEXIUM_ORION is not set -# CONFIG_VIDEO_HEXIUM_GEMINI is not set +CONFIG_VIDEO_MXB=m +CONFIG_VIDEO_DPC=m +CONFIG_VIDEO_HEXIUM_ORION=m +CONFIG_VIDEO_HEXIUM_GEMINI=m CONFIG_VIDEO_CX88=m -# CONFIG_VIDEO_CX88_ALSA is not set -# CONFIG_VIDEO_CX88_BLACKBIRD is not set +CONFIG_VIDEO_CX88_ALSA=m +CONFIG_VIDEO_CX88_BLACKBIRD=m CONFIG_VIDEO_CX88_DVB=m CONFIG_VIDEO_CX88_VP3054=m -# CONFIG_VIDEO_CX23885 is not set -# CONFIG_VIDEO_AU0828 is not set -# CONFIG_VIDEO_IVTV is not set -# CONFIG_VIDEO_CX18 is not set +CONFIG_VIDEO_CX23885=m +CONFIG_VIDEO_AU0828=m +CONFIG_VIDEO_IVTV=m +CONFIG_VIDEO_FB_IVTV=m +CONFIG_VIDEO_CX18=m +CONFIG_VIDEO_CAFE_CCIC=m CONFIG_V4L_USB_DRIVERS=y CONFIG_VIDEO_PVRUSB2=m +CONFIG_VIDEO_PVRUSB2_SYSFS=y CONFIG_VIDEO_PVRUSB2_DVB=y -# CONFIG_VIDEO_EM28XX is not set -# CONFIG_VIDEO_USBVISION is not set -# CONFIG_USB_VICAM is not set -# CONFIG_USB_IBMCAM is not set -# CONFIG_USB_KONICAWC is not set -# CONFIG_USB_QUICKCAM_MESSENGER is not set -# CONFIG_USB_ET61X251 is not set -# CONFIG_VIDEO_OVCAMCHIP is not set -# CONFIG_USB_W9968CF is not set -# CONFIG_USB_OV511 is not set -# CONFIG_USB_SE401 is not set -# CONFIG_USB_SN9C102 is not set -# CONFIG_USB_STV680 is not set -# CONFIG_USB_ZC0301 is not set -# CONFIG_USB_PWC is not set -# CONFIG_USB_ZR364XX is not set -# CONFIG_RADIO_ADAPTERS is not set +# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set +CONFIG_VIDEO_EM28XX=m +CONFIG_VIDEO_EM28XX_ALSA=m +CONFIG_VIDEO_EM28XX_DVB=m +CONFIG_VIDEO_USBVISION=m +CONFIG_VIDEO_USBVIDEO=m +CONFIG_USB_VICAM=m +CONFIG_USB_IBMCAM=m +CONFIG_USB_KONICAWC=m +CONFIG_USB_QUICKCAM_MESSENGER=m +CONFIG_USB_ET61X251=m +CONFIG_VIDEO_OVCAMCHIP=m +CONFIG_USB_W9968CF=m +CONFIG_USB_OV511=m +CONFIG_USB_SE401=m +CONFIG_USB_SN9C102=m +CONFIG_USB_STV680=m +CONFIG_USB_ZC0301=m +CONFIG_USB_PWC=m +CONFIG_USB_PWC_DEBUG=y +CONFIG_USB_ZR364XX=m +CONFIG_RADIO_ADAPTERS=y +CONFIG_RADIO_CADET=m +CONFIG_RADIO_RTRACK=m +CONFIG_RADIO_RTRACK2=m +CONFIG_RADIO_AZTECH=m +CONFIG_RADIO_GEMTEK=m +CONFIG_RADIO_GEMTEK_PCI=m +CONFIG_RADIO_MAXIRADIO=m +CONFIG_RADIO_MAESTRO=m +CONFIG_RADIO_SF16FMI=m +CONFIG_RADIO_SF16FMR2=m +CONFIG_RADIO_TERRATEC=m +CONFIG_RADIO_TRUST=m +CONFIG_RADIO_TYPHOON=m +CONFIG_RADIO_TYPHOON_PROC_FS=y +CONFIG_RADIO_ZOLTRIX=m +CONFIG_USB_DSBR=m +CONFIG_USB_SI470X=m CONFIG_DVB_CAPTURE_DRIVERS=y # @@ -183,7 +240,7 @@ CONFIG_DVB_BUDGET_PATCH=m # Supported USB Adapters # CONFIG_DVB_USB=m -CONFIG_DVB_USB_DEBUG=n +# CONFIG_DVB_USB_DEBUG is not set CONFIG_DVB_USB_A800=m CONFIG_DVB_USB_DIBUSB_MB=m CONFIG_DVB_USB_DIBUSB_MB_FAULTY=y @@ -215,7 +272,7 @@ CONFIG_DVB_CINERGYT2=m CONFIG_DVB_B2C2_FLEXCOP=m CONFIG_DVB_B2C2_FLEXCOP_PCI=m CONFIG_DVB_B2C2_FLEXCOP_USB=m -CONFIG_DVB_B2C2_FLEXCOP_DEBUG=n +# CONFIG_DVB_B2C2_FLEXCOP_DEBUG is not set # # Supported BT878 Adapters @@ -302,7 +359,8 @@ CONFIG_DVB_TUNER_DIB0070=m CONFIG_DVB_LNBP21=m CONFIG_DVB_ISL6405=m CONFIG_DVB_ISL6421=m -# CONFIG_DAB is not set +CONFIG_DAB=y +CONFIG_USB_DABUSB=m # # Audio devices for multimedia @@ -311,7 +369,8 @@ CONFIG_DVB_ISL6421=m # # ALSA sound # -# CONFIG_SND_BT87X is not set +CONFIG_SND_BT87X=m +CONFIG_SND_BT87X_OVERCLOCK=y # # OSS sound diff --git a/doc/packages-list.txt b/doc/packages-list.txt index 87be4fbfc..9d1aacbbb 100644 --- a/doc/packages-list.txt +++ b/doc/packages-list.txt @@ -1,4 +1,4 @@ -== List of softwares used to build IPFire Version: 2.1 == +== List of softwares used to build IPFire Version: 2.2-test == * Archive-Tar-1.29 * Archive-Zip-1.16 * BerkeleyDB-0.27 @@ -70,7 +70,6 @@ * dhcp-3.1.0 * dhcpcd-2.0.8 * diffutils-2.8.1 -* dm9601-2.6 * dnsmasq-2.40 * dosfstools-2.11 * e2fsprogs-1.39 @@ -128,7 +127,7 @@ * ipp2p-0.8.2 * ipp2p-0.8.2-iptables * iproute2-2.6.16-060323 -* iptables-1.3.5 +* iptables-1.3.8 * iptstate-2.1 * iputils-ss020927 * java-1.5.0_06-for-ipfire @@ -163,7 +162,7 @@ * libwww-perl-5.803 * libxml2-2.6.26 * libxslt-1.1.17 -* linux-2.6.16.57 +* linux-2.6.20.21 * linux-atm-2.4.1 * linux-libc-headers-2.6.12.0 * linuxigd-0.95 @@ -178,7 +177,6 @@ * man-pages-2.34 * mbr-1.1.8 * mc-4.6.1 -* mcs7830-060926 * memtest86+-1.65 * misc-progs * mkinitcpio-0.5.9 @@ -213,7 +211,7 @@ * openmailadmin-1.0.0 * openssh-4.7p1 * openssl-0.9.8g -* openswan-2.4.9 +* openswan-2.4.12 * openvpn-2.0.9 * pam_mysql-0.7RC1 * patch-2.5.4 diff --git a/lfs/collectd b/lfs/collectd index 90564fe43..726297917 100644 --- a/lfs/collectd +++ b/lfs/collectd @@ -78,10 +78,10 @@ $(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects)) @$(PREBUILD) @rm -rf $(DIR_APP) && cd $(DIR_SRC) && tar jxf $(DIR_DL)/$(DL_FILE) cd $(DIR_APP) && ./configure --prefix=/usr --localstatedir=/var \ - --disable-{apache,apple_sensors,csv,ipvs,memcached,mysql,netlink,nginx,nut} \ + --disable-{apache,apple_sensors,csv,iptables,ipvs,memcached,mysql,netlink,nginx,nut} \ --disable-{perl,serial,sensors,snmp,tape,vserver,wireless,xmms} \ --enable-{apcups,battery,cpu{,freq},df,disk,dns,email,entropy,exec,hddtemp} \ - --enable-{interface,iptables,irq,load,logfile,mbmon,memory,multimeter} \ + --enable-{interface,irq,load,logfile,mbmon,memory,multimeter} \ --enable-{network,nfs,ntpd,ping,processes,rrdtool,swap,syslog} \ --enable-{tcpconns,unixsock,users} \ --with-rrdtool=/usr/share/rrdtool-1.2.15 --enable-debug diff --git a/lfs/core-updates b/lfs/core-updates index 2eddfaf89..9d242eea3 100644 --- a/lfs/core-updates +++ b/lfs/core-updates @@ -55,6 +55,7 @@ core/%: eval $$(cat /usr/src/config/rootfiles/$@/meta) cp -f /usr/src/config/rootfiles/$@/update.sh /install/packages/package cp -f /usr/src/config/rootfiles/$@/files /install/packages/package/ROOTFILES + sed -e 's/KVER/$(KVER)/g' -i /install/packages/package/ROOTFILES chmod 755 /install/packages/package/update.sh cd / && tar cf /install/packages/package/files --files-from=/install/packages/package/ROOTFILES --exclude='#*' -cat /install/packages/package/ROOTFILES | grep -v "#" > /install/packages/package/ROOTFILES diff --git a/lfs/iptables b/lfs/iptables index bc53e3c44..de88dfa3d 100644 --- a/lfs/iptables +++ b/lfs/iptables @@ -24,7 +24,7 @@ include Config -VER = 1.3.5 +VER = 1.3.8 THISAPP = iptables-$(VER) DL_FILE = $(THISAPP).tar.bz2 @@ -45,7 +45,7 @@ netfilter-layer7-v2.9.tar.gz = $(URL_IPFIRE)/netfilter-layer7-v2.9.tar.gz libnfnetlink-0.0.25.tar.bz2 = $(URL_IPFIRE)/libnfnetlink-0.0.25.tar.bz2 libnetfilter_queue-0.0.13.tar.bz2 = $(URL_IPFIRE)/libnetfilter_queue-0.0.13.tar.bz2 -$(DL_FILE)_MD5 = 00fb916fa8040ca992a5ace56d905ea5 +$(DL_FILE)_MD5 = 0a9209f928002e5eee9cdff8fef4d4b3 netfilter-layer7-v2.9.tar.gz_MD5 = ebf9043a5352ebe6dbd721989ef83dee libnfnetlink-0.0.25.tar.bz2_MD5 = fc915a2e66d282e524af6ef939042d7d libnetfilter_queue-0.0.13.tar.bz2_MD5 = 660cbfd3dc8c10bf9b1803cd2b688256 diff --git a/lfs/linux b/lfs/linux index a8ca0d0d7..8135ab436 100644 --- a/lfs/linux +++ b/lfs/linux @@ -24,8 +24,8 @@ include Config -PATCHLEVEL = .57 -VER = 2.6.16.57 +PATCHLEVEL = .21 +VER = 2.6.20.21 THISAPP = linux-$(VER) DL_FILE = $(THISAPP).tar.bz2 @@ -48,30 +48,30 @@ endif ############################################################################### objects =$(DL_FILE) \ mISDN-1_1_5.tar.gz \ - squashfs3.2-r2.tar.gz \ - iptables-1.3.5.tar.bz2 \ + squashfs3.3.tgz \ + iptables-1.3.8.tar.bz2 \ patch-o-matic-ng-20061210.tar.bz2 \ netfilter-layer7-v2.9.tar.gz \ patch-2.6.16-nath323-1.3.bz2 \ - openswan-2.4.9.tar.gz + openswan-2.4.12.tar.gz $(DL_FILE) = $(URL_IPFIRE)/$(DL_FILE) patch-o-matic-ng-20061210.tar.bz2 = $(URL_IPFIRE)/patch-o-matic-ng-20061210.tar.bz2 -iptables-1.3.5.tar.bz2 = $(URL_IPFIRE)/iptables-1.3.5.tar.bz2 -netfilter-layer7-v2.9.tar.gz = $(URL_IPFIRE)/netfilter-layer7-v2.9.tar.gz -patch-2.6.16-nath323-1.3.bz2 = $(URL_IPFIRE)/patch-2.6.16-nath323-1.3.bz2 -squashfs3.2-r2.tar.gz = $(URL_IPFIRE)/squashfs3.2-r2.tar.gz -mISDN-1_1_5.tar.gz = $(URL_IPFIRE)/mISDN-1_1_5.tar.gz -openswan-2.4.9.tar.gz = $(URL_IPFIRE)/openswan-2.4.9.tar.gz - -$(DL_FILE)_MD5 = d6f37a2967be44ab2696eef3132d4b0a +iptables-1.3.8.tar.bz2 = $(URL_IPFIRE)/iptables-1.3.8.tar.bz2 +netfilter-layer7-v2.9.tar.gz = $(URL_IPFIRE)/netfilter-layer7-v2.9.tar.gz +patch-2.6.16-nath323-1.3.bz2 = $(URL_IPFIRE)/patch-2.6.16-nath323-1.3.bz2 +squashfs3.3.tgz = $(URL_IPFIRE)/squashfs3.3.tgz +mISDN-1_1_5.tar.gz = $(URL_IPFIRE)/mISDN-1_1_5.tar.gz +openswan-2.4.12.tar.gz = $(URL_IPFIRE)/openswan-2.4.12.tar.gz + +$(DL_FILE)_MD5 = fbedc192e654735936cc780da8deeba4 patch-o-matic-ng-20061210.tar.bz2_MD5 = 76edac76301b45f89e467b41c8cf4393 -iptables-1.3.5.tar.bz2_MD5 = 00fb916fa8040ca992a5ace56d905ea5 +iptables-1.3.8.tar.bz2_MD5 = 0a9209f928002e5eee9cdff8fef4d4b3 netfilter-layer7-v2.9.tar.gz_MD5 = ebf9043a5352ebe6dbd721989ef83dee patch-2.6.16-nath323-1.3.bz2_MD5 = f926409ff703a307baf54b57ab75d138 -squashfs3.2-r2.tar.gz_MD5 = bf360b92eba9e6d5610196ce2e02fcd1 -mISDN-1_1_5.tar.gz_MD5 = 93b1cff7817b82638a0475c2b7b7f1b6 -openswan-2.4.9.tar.gz_MD5 = 845f12d80d443cfa1a52f2b53b987bee +squashfs3.3.tgz_MD5 = 95c40fca0d886893631b5de14a0af25b +mISDN-1_1_5.tar.gz_MD5 = 93b1cff7817b82638a0475c2b7b7f1b6 +openswan-2.4.12.tar.gz_MD5 = 0bca0cc205d2d83eff64a7cea825ce7a install : $(TARGET) @@ -103,54 +103,49 @@ $(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects)) @rm -rf $(DIR_APP) $(DIR_SRC)/linux $(DIR_SRC)/xen-* && cd $(DIR_SRC) && tar jxf $(DIR_DL)/$(DL_FILE) ln -s linux-$(VER) /usr/src/linux - # An UTF8 patch from LFS - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-2.6.16.27-utf8_input-1.patch - # Openswan 2 cd $(DIR_SRC) && rm -rf openswan-* - cd $(DIR_SRC) && tar xfz $(DIR_DL)/openswan-2.4.9.tar.gz - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/openswan-2.4.9.kernel-2.6-klips.patch - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/openswan-2.4.9.kernel-2.6-natt.patch - + cd $(DIR_SRC) && tar xfz $(DIR_DL)/openswan-2.4.12.tar.gz + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/openswan-2.4.12.kernel-2.6-klips.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/openswan-2.4.12.kernel-2.6.20.21-natt.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/openswan-2.4.12.kernel-2.6.20-cryptoalg.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/openswan-2.4.12-missing-files.patch cd $(DIR_SRC)/openswan-* && sed -i -e 's/INC_USRLOCAL=\/usr\/local/INC_USRLOCAL=\/usr/' Makefile.inc - # H323 conntrack - cd $(DIR_APP) && bunzip2 -cd $(DIR_DL)/patch-2.6.16-nath323-1.3.bz2 | patch -Np1 - # Reiser4 - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/reiser4-for-2.6.16-5.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/reiser4-for-2.6.20.patch # SquashFS cd $(DIR_SRC) && rm -rf squashfs* - cd $(DIR_SRC) && tar xfz $(DIR_DL)/squashfs3.2-r2.tar.gz - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/squashfs3.2-r2/kernel-patches/linux-2.6.16/squashfs3.2-patch + cd $(DIR_SRC) && tar xfz $(DIR_DL)/squashfs3.3.tgz + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/squashfs3.3/kernel-patches/linux-2.6.20/squashfs3.3-patch # ip_conntrack permissions from 440 to 444 cd $(DIR_APP) && patch -Np0 < $(DIR_SRC)/src/patches/ip_conntrack_standalone-patch-for-ipfire.patch # Some VIA patches - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/padlock-prereq-2.6.16.diff - cd $(DIR_APP) && patch -Np0 < $(DIR_SRC)/src/patches/epia_dma.patch + #cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/padlock-prereq-2.6.16.diff + #cd $(DIR_APP) && patch -Np0 < $(DIR_SRC)/src/patches/epia_dma.patch # Patch-o-matic cd $(DIR_SRC) && rm -rf iptables-* patch-o-matic* - cd $(DIR_SRC) && tar xfj $(DIR_DL)/iptables-1.3.5.tar.bz2 - cd $(DIR_SRC) && ln -sf iptables-1.3.5 iptables - cd $(DIR_SRC) && tar xfj $(DIR_DL)/patch-o-matic-ng-20061210.tar.bz2 - cd $(DIR_SRC)/patch-o-matic-ng* && \ - ./runme --batch --kernel-path=$(ROOT)/usr/src/$(THISAPP)/ \ - --iptables-path=$(ROOT)/usr/src/iptables/ \ - sip-conntrack-nat rtsp-conntrack-nat \ - mms-conntrack-nat + cd $(DIR_SRC) && tar xfj $(DIR_DL)/iptables-1.3.8.tar.bz2 + cd $(DIR_SRC) && ln -sf iptables-1.3.8 iptables + #cd $(DIR_SRC) && tar xfj $(DIR_DL)/patch-o-matic-ng-20061210.tar.bz2 + #cd $(DIR_SRC)/patch-o-matic-ng* && \ + # ./runme --batch --kernel-path=$(ROOT)/usr/src/$(THISAPP)/ \ + # --iptables-path=$(ROOT)/usr/src/iptables/ \ + # sip-conntrack-nat rtsp-conntrack-nat \ + # mms-conntrack-nat # Layer7-patch cd $(DIR_SRC) && rm -rf $(DIR_SRC)/netfilter-layer7-v2.9 cd $(DIR_SRC) && tar xzf $(DIR_DL)/netfilter-layer7-v2.9.tar.gz - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/netfilter-layer7-v2.9/for_older_kernels/kernel-2.6.13-2.6.16-layer7-2.2.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/netfilter-layer7-v2.9/kernel-2.6.18-2.6.19-layer7-2.9.patch # Linux Intermediate Queueing Device ifeq "$(XEN)" "" - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-2.6.16-imq2.diff + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-2.6.20-imq.diff endif # mISDN @@ -162,7 +157,7 @@ endif ifeq "$(SMP)" "" # Only do this once on the non-SMP pass - cd $(DIR_SRC) && tar czf $(DIR_DL)/iptables-fixed.tar.gz iptables-1.3.5 + cd $(DIR_SRC) && tar czf $(DIR_DL)/iptables-fixed.tar.gz iptables-1.3.8 endif # Cleanup kernel source diff --git a/lfs/openswan b/lfs/openswan index fe30515da..70550386c 100644 --- a/lfs/openswan +++ b/lfs/openswan @@ -24,7 +24,7 @@ include Config -VER = 2.4.9 +VER = 2.4.12 THISAPP = openswan-$(VER) DL_FILE = $(THISAPP).tar.gz @@ -40,7 +40,7 @@ objects = $(DL_FILE) $(DL_FILE) = $(DL_FROM)/$(DL_FILE) -$(DL_FILE)_MD5 = 845f12d80d443cfa1a52f2b53b987bee +$(DL_FILE)_MD5 = 0bca0cc205d2d83eff64a7cea825ce7a install : $(TARGET) @@ -88,8 +88,8 @@ $(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects)) ln -sf $(CONFIG_ROOT)/certs /etc/ipsec.d/certs ln -sf $(CONFIG_ROOT)/crls /etc/ipsec.d/crls - cd /usr/lib/ipsec && patch -Np0 < $(DIR_SRC)/src/patches/openswan-2.4.9-startklips-1.patch - cd /usr/lib/ipsec && patch -Np0 < $(DIR_SRC)/src/patches/openswan-2.4.9-realsetup-1.patch + cd /usr/lib/ipsec && patch -Np0 < $(DIR_SRC)/src/patches/openswan-2.4.12-startklips-1.patch +# cd /usr/lib/ipsec && patch -Np0 < $(DIR_SRC)/src/patches/openswan-2.4.9-realsetup-1.patch cd /usr/lib/ipsec && patch -Np0 < $(DIR_SRC)/src/patches/openswan-2.4.9-updown-1.patch cd /usr/lib/ipsec && patch -Np0 < $(DIR_SRC)/src/patches/openswan-2.4.9-updown_x509-1.patch cd /etc/ipsec.d/policies && patch -Np0 < $(DIR_SRC)/src/patches/openswan-2.4.9-clear-1.patch diff --git a/lfs/v4l-dvb b/lfs/v4l-dvb index d2b0cd47f..7c2e7376d 100644 --- a/lfs/v4l-dvb +++ b/lfs/v4l-dvb @@ -78,13 +78,11 @@ $(subst %,%_MD5,$(objects)) : $(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects)) @$(PREBUILD) @rm -rf $(DIR_APP) && cd $(DIR_SRC) && tar jxf $(DIR_DL)/$(DL_FILE) - cd $(DIR_APP) && patch -N -p1 \ - < $(DIR_SRC)/src/patches/v4l-dvb-kernel2.6.16-disabledvideodev.patch cp -f $(DIR_SRC)/config/v4l-dvb/config $(DIR_APP)/v4l/.config ifeq "$(SMP)" "1" - cd $(DIR_APP) && make release VER=2.6.16.57-ipfire-smp + cd $(DIR_APP) && make release VER=$(KVER)-ipfire-smp else - cd $(DIR_APP) && make release VER=2.6.16.57-ipfire + cd $(DIR_APP) && make release VER=$(KVER)-ipfire endif cd $(DIR_APP) && make $(MAKETUNING) all cd $(DIR_APP) && make install diff --git a/make.sh b/make.sh index fb98ff004..212c8ee4c 100755 --- a/make.sh +++ b/make.sh @@ -24,7 +24,7 @@ NAME="IPFire" # Software name SNAME="ipfire" # Short name -VERSION="2.1" # Version number +VERSION="2.2-test" # Version number SLOGAN="www.ipfire.org" # Software slogan CONFIG_ROOT=/var/ipfire # Configuration rootdir NICE=10 # Nice level @@ -336,9 +336,9 @@ buildipfire() { ipfiremake zaptel SMP=1 ipfiremake r8169 SMP=1 ipfiremake r8168 SMP=1 - ipfiremake mcs7830 SMP=1 + #ipfiremake mcs7830 SMP=1 ipfiremake atl1 SMP=1 - ipfiremake dm9601 SMP=1 + #ipfiremake dm9601 SMP=1 ipfiremake kqemu SMP=1 ipfiremake v4l-dvb SMP=1 ipfiremake sane KMOD=1 SMP=1 @@ -347,9 +347,9 @@ buildipfire() { ipfiremake zaptel ipfiremake r8169 ipfiremake r8168 - ipfiremake mcs7830 + #ipfiremake mcs7830 ipfiremake atl1 - ipfiremake dm9601 + #ipfiremake dm9601 ipfiremake kqemu ipfiremake v4l-dvb ipfiremake sane KMOD=1 @@ -670,7 +670,7 @@ buildpackages() { } ipfirepackages() { - ipfiremake core-updates + #ipfiremake core-updates for i in $(ls -1 $BASEDIR/config/rootfiles/packages); do if [ -e $BASEDIR/lfs/$i ]; then ipfiredist $i diff --git a/src/pakfire/pakfire.conf b/src/pakfire/pakfire.conf index ac90b07a5..7eeb592ad 100644 --- a/src/pakfire/pakfire.conf +++ b/src/pakfire/pakfire.conf @@ -21,7 +21,7 @@ package Conf; -$version = "2.1"; +$version = "2.2-test"; $mainserver = "pakfire.ipfire.org"; diff --git a/src/patches/openswan-2.4.12-missing-files.patch b/src/patches/openswan-2.4.12-missing-files.patch new file mode 100644 index 000000000..ae4358f54 --- /dev/null +++ b/src/patches/openswan-2.4.12-missing-files.patch @@ -0,0 +1,277 @@ +--- /dev/null 2008-04-27 15:35:57.000000000 +0200 ++++ linux/net/ipsec/version.c 2008-05-17 18:35:27.000000000 +0200 +@@ -0,0 +1,44 @@ ++/* ++ * return IPsec version information ++ * Copyright (C) 2001 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: version.in.c,v 1.2 2004/04/14 05:09:46 ken Exp $ ++ */ ++ ++#ifdef __KERNEL__ ++#include ++#endif ++ ++#include "openswan.h" ++ ++#define V "2.4.12" /* substituted in by Makefile */ ++static const char openswan_number[] = V; ++static const char openswan_string[] = "Openswan " V; ++ ++/* ++ - ipsec_version_code - return IPsec version number/code, as string ++ */ ++const char * ++ipsec_version_code() ++{ ++ return openswan_number; ++} ++ ++/* ++ - ipsec_version_string - return full version string ++ */ ++const char * ++ipsec_version_string() ++{ ++ return openswan_string; ++} +--- /dev/null 2008-04-27 15:35:57.000000000 +0200 ++++ linux/net/ipsec/zutil.c 2008-05-16 17:28:08.000000000 +0200 +@@ -0,0 +1,227 @@ ++/* zutil.c -- target dependent utility functions for the compression library ++ * Copyright (C) 1995-2002 Jean-loup Gailly. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* @(#) $Id: zutil.c,v 1.5 2004/07/10 07:48:40 mcr Exp $ */ ++ ++#include ++ ++#define MY_ZCALLOC ++ ++struct internal_state {int dummy;}; /* for buggy compilers */ ++ ++#ifndef STDC ++extern void exit OF((int)); ++#endif ++ ++const char *z_errmsg[10] = { ++"need dictionary", /* Z_NEED_DICT 2 */ ++"stream end", /* Z_STREAM_END 1 */ ++"", /* Z_OK 0 */ ++"file error", /* Z_ERRNO (-1) */ ++"stream error", /* Z_STREAM_ERROR (-2) */ ++"data error", /* Z_DATA_ERROR (-3) */ ++"insufficient memory", /* Z_MEM_ERROR (-4) */ ++"buffer error", /* Z_BUF_ERROR (-5) */ ++"incompatible version",/* Z_VERSION_ERROR (-6) */ ++""}; ++ ++ ++const char * ZEXPORT zlibVersion() ++{ ++ return ZLIB_VERSION; ++} ++ ++#ifdef DEBUG ++ ++# ifndef verbose ++# define verbose 0 ++# endif ++int z_verbose = verbose; ++ ++void z_error (m) ++ char *m; ++{ ++ fprintf(stderr, "%s\n", m); ++ exit(1); ++} ++#endif ++ ++/* exported to allow conversion of error code to string for compress() and ++ * uncompress() ++ */ ++const char * ZEXPORT zError(err) ++ int err; ++{ ++ return ERR_MSG(err); ++} ++ ++ ++#ifndef HAVE_MEMCPY ++ ++void zmemcpy(dest, source, len) ++ Bytef* dest; ++ const Bytef* source; ++ uInt len; ++{ ++ if (len == 0) return; ++ do { ++ *dest++ = *source++; /* ??? to be unrolled */ ++ } while (--len != 0); ++} ++ ++int zmemcmp(s1, s2, len) ++ const Bytef* s1; ++ const Bytef* s2; ++ uInt len; ++{ ++ uInt j; ++ ++ for (j = 0; j < len; j++) { ++ if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; ++ } ++ return 0; ++} ++ ++void zmemzero(dest, len) ++ Bytef* dest; ++ uInt len; ++{ ++ if (len == 0) return; ++ do { ++ *dest++ = 0; /* ??? to be unrolled */ ++ } while (--len != 0); ++} ++#endif ++ ++#ifdef __TURBOC__ ++#if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__) ++/* Small and medium model in Turbo C are for now limited to near allocation ++ * with reduced MAX_WBITS and MAX_MEM_LEVEL ++ */ ++# define MY_ZCALLOC ++ ++/* Turbo C malloc() does not allow dynamic allocation of 64K bytes ++ * and farmalloc(64K) returns a pointer with an offset of 8, so we ++ * must fix the pointer. Warning: the pointer must be put back to its ++ * original form in order to free it, use zcfree(). ++ */ ++ ++#define MAX_PTR 10 ++/* 10*64K = 640K */ ++ ++local int next_ptr = 0; ++ ++typedef struct ptr_table_s { ++ voidpf org_ptr; ++ voidpf new_ptr; ++} ptr_table; ++ ++local ptr_table table[MAX_PTR]; ++/* This table is used to remember the original form of pointers ++ * to large buffers (64K). Such pointers are normalized with a zero offset. ++ * Since MSDOS is not a preemptive multitasking OS, this table is not ++ * protected from concurrent access. This hack doesn't work anyway on ++ * a protected system like OS/2. Use Microsoft C instead. ++ */ ++ ++voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) ++{ ++ voidpf buf = opaque; /* just to make some compilers happy */ ++ ulg bsize = (ulg)items*size; ++ ++ /* If we allocate less than 65520 bytes, we assume that farmalloc ++ * will return a usable pointer which doesn't have to be normalized. ++ */ ++ if (bsize < 65520L) { ++ buf = farmalloc(bsize); ++ if (*(ush*)&buf != 0) return buf; ++ } else { ++ buf = farmalloc(bsize + 16L); ++ } ++ if (buf == NULL || next_ptr >= MAX_PTR) return NULL; ++ table[next_ptr].org_ptr = buf; ++ ++ /* Normalize the pointer to seg:0 */ ++ *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; ++ *(ush*)&buf = 0; ++ table[next_ptr++].new_ptr = buf; ++ return buf; ++} ++ ++void zcfree (voidpf opaque, voidpf ptr) ++{ ++ int n; ++ if (*(ush*)&ptr != 0) { /* object < 64K */ ++ farfree(ptr); ++ return; ++ } ++ /* Find the original pointer */ ++ for (n = 0; n < next_ptr; n++) { ++ if (ptr != table[n].new_ptr) continue; ++ ++ farfree(table[n].org_ptr); ++ while (++n < next_ptr) { ++ table[n-1] = table[n]; ++ } ++ next_ptr--; ++ return; ++ } ++ ptr = opaque; /* just to make some compilers happy */ ++ Assert(0, "zcfree: ptr not found"); ++} ++#endif ++#endif /* __TURBOC__ */ ++ ++ ++#if defined(M_I86) && !defined(__32BIT__) ++/* Microsoft C in 16-bit mode */ ++ ++# define MY_ZCALLOC ++ ++#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) ++# define _halloc halloc ++# define _hfree hfree ++#endif ++ ++voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) ++{ ++ if (opaque) opaque = 0; /* to make compiler happy */ ++ return _halloc((long)items, size); ++} ++ ++void zcfree (voidpf opaque, voidpf ptr) ++{ ++ if (opaque) opaque = 0; /* to make compiler happy */ ++ _hfree(ptr); ++} ++ ++#endif /* MSC */ ++ ++ ++#ifndef MY_ZCALLOC /* Any system without a special alloc function */ ++ ++#ifndef STDC ++extern voidp calloc OF((uInt items, uInt size)); ++extern void free OF((voidpf ptr)); ++#endif ++ ++voidpf zcalloc (opaque, items, size) ++ voidpf opaque; ++ unsigned items; ++ unsigned size; ++{ ++ if (opaque) items += size - size; /* make compiler happy */ ++ return (voidpf)calloc(items, size); ++} ++ ++void zcfree (opaque, ptr) ++ voidpf opaque; ++ voidpf ptr; ++{ ++ free(ptr); ++ if (opaque) return; /* make compiler happy */ ++} ++ ++#endif /* MY_ZCALLOC */ diff --git a/src/patches/openswan-2.4.12-startklips-1.patch b/src/patches/openswan-2.4.12-startklips-1.patch new file mode 100644 index 000000000..f90f9fe11 --- /dev/null +++ b/src/patches/openswan-2.4.12-startklips-1.patch @@ -0,0 +1,79 @@ +--- _startklips.old 2008-05-17 15:41:46.000000000 +0200 ++++ _startklips 2008-05-17 15:52:10.000000000 +0200 +@@ -104,23 +104,35 @@ + + # figure out ifconfig for interface + addr= +- eval `ifconfig $phys | +- awk '$1 == "inet" && $2 ~ /^addr:/ && $NF ~ /^Mask:/ { +- gsub(/:/, " ", $0) +- print "addr=" $3 +- other = $5 +- if ($4 == "Bcast") +- print "type=broadcast" +- else if ($4 == "P-t-P") +- print "type=pointopoint" +- else if (NF == 5) { +- print "type=" +- other = "" +- } else +- print "type=unknown" +- print "otheraddr=" other +- print "mask=" $NF +- }'` ++ eval `ip addr show $phys | awk '$3 ~ /BROADCAST|POINTOPOINT/ { ++ if ($3 ~ /BROADCAST/) ++ print "type=broadcast"; ++ else if ($3 ~ /POINTOPOINT/) ++ print "type=pointopoint"; ++ else { ++ print "type="; ++ } ++ }'` ++ ++ if [ "$type" == "broadcast" ]; then ++ eval `ip addr show $phys | awk '$1 == "inet" { gsub(/\//, " "); ++ print "addr=" $2; ++ print "mask=" $3; ++ print "otheraddr=" $5; ++ }'` ++ elif [ "$type" == "pointopoint" ]; then ++ eval `ip addr show $phys | awk '$1 == "inet" { gsub(/\//, " "); ++ print "addr=" $2; ++ print "mask=" $5; ++ print "otheraddr=" $4; ++ }'` ++ else ++ type="unknown" ++ otheraddr= ++ fi ++ ++ eval `whatmask /$mask | awk -F': ' '$1 ~ /^Netmask =/ { print "mask=" $2 }'` ++ + if test " $addr" = " " + then + echo "unable to determine address of \`$phys'" +@@ -129,7 +141,7 @@ + if test " $type" = " unknown" + then + echo "\`$phys' is of an unknown type" +- exit 1 ++ exit 1 + fi + if test " $omtu" != " " + then +@@ -223,10 +235,10 @@ + fi + next=`netstat -nr | + awk '$1 == "0.0.0.0" && $3 == "0.0.0.0" { print $2; exit 0 }'` +- if [ "$next" = "0.0.0.0" ] ; then +- next=`ip addr list $phys | grep -E "^ +inet6*.*scope global $phys" | +- awk '{ if($3=="peer")print $4; else print $2 }' | awk -F / '{ print $1 }'` +- fi ++# if [ "$next" = "0.0.0.0" ] ; then ++# next=`ip addr list $phys | grep -E "^ +inet6*.*scope global $phys" | ++# awk '{ if($3=="peer")print $4; else print $2 }' | awk -F / '{ print $1 }'` ++# fi + klipsinterface "ipsec0=$phys" $next + } + diff --git a/src/patches/openswan-2.4.12.kernel-2.6-klips.patch b/src/patches/openswan-2.4.12.kernel-2.6-klips.patch new file mode 100755 index 000000000..f5331b4e4 --- /dev/null +++ b/src/patches/openswan-2.4.12.kernel-2.6-klips.patch @@ -0,0 +1,59404 @@ +packaging/utils/kernelpatch 2.6 +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/README.openswan-2 Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,112 @@ ++* ++* RCSID $Id: README.openswan-2,v 1.1 2003-12-10 01:07:49 mcr Exp $ ++* ++ ++ **************************************** ++ * IPSEC for Linux, Release 2.xx series * ++ **************************************** ++ ++ ++ ++1. Files ++ ++The contents of linux/net/ipsec/ (see below) join the linux kernel source tree. ++as provided for higher up. ++ ++The programs/ directory contains the user-level utilities which you need ++to run IPSEC. See the top-level top/INSTALL to compile and install them. ++ ++The testing/ directory contains test scripts. ++ ++The doc/ directory contains -- what else -- documentation. ++ ++1.1. Kernel files ++ ++The following are found in net/ipsec/: ++ ++Makefile The Makefile ++Config.in The configuration script for make menuconfig ++defconfig Configuration defaults for first time. ++ ++radij.c General-purpose radix-tree operations ++ ++ipsec_ipcomp.c IPCOMP encapsulate/decapsulate code. ++ipsec_ah.c Authentication Header (AH) encapsulate/decapsulate code. ++ipsec_esp.c Encapsulated Security Payload (ESP) encap/decap code. ++ ++pfkey_v2.c PF_KEYv2 socket interface code. ++pfkey_v2_parser.c PF_KEYv2 message parsing and processing code. ++ ++ipsec_init.c Initialization code, /proc interface. ++ipsec_radij.c Interface with the radix tree code. ++ipsec_netlink.c Interface with the netlink code. ++ipsec_xform.c Routines and structures common to transforms. ++ipsec_tunnel.c The outgoing packet processing code. ++ipsec_rcv.c The incoming packet processing code. ++ipsec_md5c.c Somewhat modified RSADSI MD5 C code. ++ipsec_sha1.c Somewhat modified Steve Reid SHA-1 C code. ++ ++sysctl_net_ipsec.c /proc/sys/net/ipsec/* variable definitions. ++ ++version.c symbolic link to project version. ++ ++radij.h Headers for radij.c ++ ++ipcomp.h Headers used by IPCOMP code. ++ ++ipsec_radij.h Interface with the radix tree code. ++ipsec_netlink.h Headers used by the netlink interface. ++ipsec_encap.h Headers defining encapsulation structures. ++ipsec_xform.h Transform headers. ++ipsec_tunnel.h Headers used by tunneling code. ++ipsec_ipe4.h Headers for the IP-in-IP code. ++ipsec_ah.h Headers common to AH transforms. ++ipsec_md5h.h RSADSI MD5 headers. ++ipsec_sha1.h SHA-1 headers. ++ipsec_esp.h Headers common to ESP transfroms. ++ipsec_rcv.h Headers for incoming packet processing code. ++ ++1.2. User-level files. ++ ++The following are found in utils/: ++ ++eroute.c Create an "extended route" source code ++spi.c Set up Security Associations source code ++spigrp.c Link SPIs together source code. ++tncfg.c Configure the tunneling features of the virtual interface ++ source code ++klipsdebug.c Set/reset klips debugging features source code. ++version.c symbolic link to project version. ++ ++eroute.8 Create an "extended route" manual page ++spi.8 Set up Security Associations manual page ++spigrp.8 Link SPIs together manual page ++tncfg.8 Configure the tunneling features of the virtual interface ++ manual page ++klipsdebug.8 Set/reset klips debugging features manual page ++ ++eroute.5 /proc/net/ipsec_eroute format manual page ++spi.5 /proc/net/ipsec_spi format manual page ++spigrp.5 /proc/net/ipsec_spigrp format manual page ++tncfg.5 /proc/net/ipsec_tncfg format manual page ++klipsdebug.5 /proc/net/ipsec_klipsdebug format manual page ++version.5 /proc/net/ipsec_version format manual page ++pf_key.5 /proc/net/pf_key format manual page ++ ++Makefile Utilities makefile. ++ ++*.8 Manpages for the respective utils. ++ ++ ++1.3. Test files ++ ++The test scripts are locate in testing/ and and documentation is found ++at doc/src/umltesting.html. Automated testing via "make check" is available ++provided that the User-Mode-Linux patches are available. ++ ++* ++* $Log: README.openswan-2,v $ ++* Revision 1.1 2003-12-10 01:07:49 mcr ++* documentation for additions. ++* ++* +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/crypto/ciphers/aes/test_main.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,41 @@ ++#include ++#include ++#include ++#include "aes_cbc.h" ++#define AES_BLOCK_SIZE 16 ++#define KEY_SIZE 128 /* bits */ ++#define KEY "1234567890123456" ++#define STR "hola guaso como estaisss ... 012" ++#define STRSZ (sizeof(STR)-1) ++ ++#define EMT_AESCBC_BLKLEN AES_BLOCK_SIZE ++#define AES_CONTEXT_T aes_context ++#define EMT_ESPAES_KEY_SZ 16 ++int pretty_print(const unsigned char *buf, int count) { ++ int i=0; ++ for (;i ++#include ++#include ++#include "aes.h" ++#include "aes_xcbc_mac.h" ++#define STR "Hola guasssso c|mo estais ...012" ++void print_hash(const __u8 *hash) { ++ printf("%08x %08x %08x %08x\n", ++ *(__u32*)(&hash[0]), ++ *(__u32*)(&hash[4]), ++ *(__u32*)(&hash[8]), ++ *(__u32*)(&hash[12])); ++} ++int main(int argc, char *argv[]) { ++ aes_block key= { 0xdeadbeef, 0xceedcaca, 0xcafebabe, 0xff010204 }; ++ __u8 hash[16]; ++ char *str = argv[1]; ++ aes_context_mac ctx; ++ if (str==NULL) { ++ fprintf(stderr, "pasame el str\n"); ++ return 255; ++ } ++ AES_xcbc_mac_set_key(&ctx, (__u8 *)&key, sizeof(key)); ++ AES_xcbc_mac_hash(&ctx, str, strlen(str), hash); ++ print_hash(hash); ++ str[2]='x'; ++ AES_xcbc_mac_hash(&ctx, str, strlen(str), hash); ++ print_hash(hash); ++ return 0; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/crypto/aes.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,97 @@ ++// I retain copyright in this code but I encourage its free use provided ++// that I don't carry any responsibility for the results. I am especially ++// happy to see it used in free and open source software. If you do use ++// it I would appreciate an acknowledgement of its origin in the code or ++// the product that results and I would also appreciate knowing a little ++// about the use to which it is being put. I am grateful to Frank Yellin ++// for some ideas that are used in this implementation. ++// ++// Dr B. R. Gladman 6th April 2001. ++// ++// This is an implementation of the AES encryption algorithm (Rijndael) ++// designed by Joan Daemen and Vincent Rijmen. This version is designed ++// to provide both fixed and dynamic block and key lengths and can also ++// run with either big or little endian internal byte order (see aes.h). ++// It inputs block and key lengths in bytes with the legal values being ++// 16, 24 and 32. ++ ++/* ++ * Modified by Jari Ruusu, May 1 2001 ++ * - Fixed some compile warnings, code was ok but gcc warned anyway. ++ * - Changed basic types: byte -> unsigned char, word -> u_int32_t ++ * - Major name space cleanup: Names visible to outside now begin ++ * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c ++ * - Removed C++ and DLL support as part of name space cleanup. ++ * - Eliminated unnecessary recomputation of tables. (actual bug fix) ++ * - Merged precomputed constant tables to aes.c file. ++ * - Removed data alignment restrictions for portability reasons. ++ * - Made block and key lengths accept bit count (128/192/256) ++ * as well byte count (16/24/32). ++ * - Removed all error checks. This change also eliminated the need ++ * to preinitialize the context struct to zero. ++ * - Removed some totally unused constants. ++ */ ++ ++#ifndef _AES_H ++#define _AES_H ++ ++#if defined(__linux__) && defined(__KERNEL__) ++# include ++#else ++# include ++#endif ++ ++// CONFIGURATION OPTIONS (see also aes.c) ++// ++// Define AES_BLOCK_SIZE to set the cipher block size (16, 24 or 32) or ++// leave this undefined for dynamically variable block size (this will ++// result in much slower code). ++// IMPORTANT NOTE: AES_BLOCK_SIZE is in BYTES (16, 24, 32 or undefined). If ++// left undefined a slower version providing variable block length is compiled ++ ++#define AES_BLOCK_SIZE 16 ++ ++// The number of key schedule words for different block and key lengths ++// allowing for method of computation which requires the length to be a ++// multiple of the key length ++// ++// Nk = 4 6 8 ++// ------------- ++// Nb = 4 | 60 60 64 ++// 6 | 96 90 96 ++// 8 | 120 120 120 ++ ++#if !defined(AES_BLOCK_SIZE) || (AES_BLOCK_SIZE == 32) ++#define AES_KS_LENGTH 120 ++#define AES_RC_LENGTH 29 ++#else ++#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE ++#define AES_RC_LENGTH (9 * AES_BLOCK_SIZE) / 8 - 8 ++#endif ++ ++typedef struct ++{ ++ u_int32_t aes_Nkey; // the number of words in the key input block ++ u_int32_t aes_Nrnd; // the number of cipher rounds ++ u_int32_t aes_e_key[AES_KS_LENGTH]; // the encryption key schedule ++ u_int32_t aes_d_key[AES_KS_LENGTH]; // the decryption key schedule ++#if !defined(AES_BLOCK_SIZE) ++ u_int32_t aes_Ncol; // the number of columns in the cipher state ++#endif ++} aes_context; ++ ++// THE CIPHER INTERFACE ++ ++#if !defined(AES_BLOCK_SIZE) ++extern void aes_set_blk(aes_context *, const int); ++#endif ++extern void aes_set_key(aes_context *, const unsigned char [], const int, const int); ++extern void aes_encrypt(const aes_context *, const unsigned char [], unsigned char []); ++extern void aes_decrypt(const aes_context *, const unsigned char [], unsigned char []); ++ ++// The block length inputs to aes_set_block and aes_set_key are in numbers ++// of bytes or bits. The calls to subroutines must be made in the above ++// order but multiple calls can be made without repeating earlier calls ++// if their parameters have not changed. ++ ++#endif // _AES_H +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/crypto/aes_cbc.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,4 @@ ++/* Glue header */ ++#include "aes.h" ++int AES_set_key(aes_context *aes_ctx, const u_int8_t * key, int keysize); ++int AES_cbc_encrypt(aes_context *ctx, const u_int8_t * in, u_int8_t * out, int ilen, const u_int8_t * iv, int encrypt); +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/crypto/aes_xcbc_mac.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,12 @@ ++#ifndef _AES_XCBC_MAC_H ++#define _AES_XCBC_MAC_H ++ ++typedef u_int32_t aes_block[4]; ++typedef struct { ++ aes_context ctx_k1; ++ aes_block k2; ++ aes_block k3; ++} aes_context_mac; ++int AES_xcbc_mac_set_key(aes_context_mac *ctxm, const u_int8_t *key, int keylen); ++int AES_xcbc_mac_hash(const aes_context_mac *ctxm, const u_int8_t * in, int ilen, u_int8_t hash[16]); ++#endif /* _AES_XCBC_MAC_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/crypto/cbc_generic.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,110 @@ ++#ifndef _CBC_GENERIC_H ++#define _CBC_GENERIC_H ++/* ++ * CBC macro helpers ++ * ++ * Author: JuanJo Ciarlante ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ */ ++ ++/* ++ * Heavily inspired in loop_AES ++ */ ++#define CBC_IMPL_BLK16(name, ctx_type, addr_type, enc_func, dec_func) \ ++int name(ctx_type *ctx, const u_int8_t * in, u_int8_t * out, int ilen, const u_int8_t * iv, int encrypt) { \ ++ int ret=ilen, pos; \ ++ const u_int32_t *iv_i; \ ++ if ((ilen) % 16) return 0; \ ++ if (encrypt) { \ ++ pos=0; \ ++ while(pos=0) { \ ++ dec_func(ctx, (const addr_type) in, (addr_type) out); \ ++ if (pos==0) \ ++ iv_i=(const u_int32_t*) (iv); \ ++ else \ ++ iv_i=(const u_int32_t*) (in-16); \ ++ *((u_int32_t *)(&out[ 0])) ^= iv_i[0]; \ ++ *((u_int32_t *)(&out[ 4])) ^= iv_i[1]; \ ++ *((u_int32_t *)(&out[ 8])) ^= iv_i[2]; \ ++ *((u_int32_t *)(&out[12])) ^= iv_i[3]; \ ++ in-=16; \ ++ out-=16; \ ++ pos-=16; \ ++ } \ ++ } \ ++ return ret; \ ++} ++#define CBC_IMPL_BLK8(name, ctx_type, addr_type, enc_func, dec_func) \ ++int name(ctx_type *ctx, u_int8_t * in, u_int8_t * out, int ilen, const u_int8_t * iv, int encrypt) { \ ++ int ret=ilen, pos; \ ++ const u_int32_t *iv_i; \ ++ if ((ilen) % 8) return 0; \ ++ if (encrypt) { \ ++ pos=0; \ ++ while(pos=0) { \ ++ dec_func(ctx, (const addr_type)in, (addr_type)out); \ ++ if (pos==0) \ ++ iv_i=(const u_int32_t*) (iv); \ ++ else \ ++ iv_i=(const u_int32_t*) (in-8); \ ++ *((u_int32_t *)(&out[ 0])) ^= iv_i[0]; \ ++ *((u_int32_t *)(&out[ 4])) ^= iv_i[1]; \ ++ in-=8; \ ++ out-=8; \ ++ pos-=8; \ ++ } \ ++ } \ ++ return ret; \ ++} ++#define CBC_DECL(name, ctx_type) \ ++int name(ctx_type *ctx, u_int8_t * in, u_int8_t * out, int ilen, const u_int8_t * iv, int encrypt) ++/* ++Eg.: ++CBC_IMPL_BLK16(AES_cbc_encrypt, aes_context, u_int8_t *, aes_encrypt, aes_decrypt); ++CBC_DECL(AES_cbc_encrypt, aes_context); ++*/ ++#endif /* _CBC_GENERIC_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/crypto/des.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,298 @@ ++/* crypto/des/des.org */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++/* WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING ++ * ++ * Always modify des.org since des.h is automatically generated from ++ * it during SSLeay configuration. ++ * ++ * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING ++ */ ++ ++#ifndef HEADER_DES_H ++#define HEADER_DES_H ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++ ++/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a ++ * %20 speed up (longs are 8 bytes, int's are 4). */ ++/* Must be unsigned int on ia64/Itanium or DES breaks badly */ ++ ++#ifdef __KERNEL__ ++#include ++#else ++#include ++#endif ++ ++#ifndef DES_LONG ++#define DES_LONG u_int32_t ++#endif ++ ++typedef unsigned char des_cblock[8]; ++typedef struct { des_cblock ks; } des_key_schedule[16]; ++ ++#define DES_KEY_SZ (sizeof(des_cblock)) ++#define DES_SCHEDULE_SZ (sizeof(des_key_schedule)) ++ ++#define DES_ENCRYPT 1 ++#define DES_DECRYPT 0 ++ ++#define DES_CBC_MODE 0 ++#define DES_PCBC_MODE 1 ++ ++#define des_ecb2_encrypt(i,o,k1,k2,e) \ ++ des_ecb3_encrypt((i),(o),(k1),(k2),(k1),(e)) ++ ++#define des_ede2_cbc_encrypt(i,o,l,k1,k2,iv,e) \ ++ des_ede3_cbc_encrypt((i),(o),(l),(k1),(k2),(k1),(iv),(e)) ++ ++#define des_ede2_cfb64_encrypt(i,o,l,k1,k2,iv,n,e) \ ++ des_ede3_cfb64_encrypt((i),(o),(l),(k1),(k2),(k1),(iv),(n),(e)) ++ ++#define des_ede2_ofb64_encrypt(i,o,l,k1,k2,iv,n) \ ++ des_ede3_ofb64_encrypt((i),(o),(l),(k1),(k2),(k1),(iv),(n)) ++ ++#define C_Block des_cblock ++#define Key_schedule des_key_schedule ++#ifdef KERBEROS ++#define ENCRYPT DES_ENCRYPT ++#define DECRYPT DES_DECRYPT ++#endif ++#define KEY_SZ DES_KEY_SZ ++#define string_to_key des_string_to_key ++#define read_pw_string des_read_pw_string ++#define random_key des_random_key ++#define pcbc_encrypt des_pcbc_encrypt ++#define set_key des_set_key ++#define key_sched des_key_sched ++#define ecb_encrypt des_ecb_encrypt ++#define cbc_encrypt des_cbc_encrypt ++#define ncbc_encrypt des_ncbc_encrypt ++#define xcbc_encrypt des_xcbc_encrypt ++#define cbc_cksum des_cbc_cksum ++#define quad_cksum des_quad_cksum ++ ++/* For compatibility with the MIT lib - eay 20/05/92 */ ++typedef des_key_schedule bit_64; ++#define des_fixup_key_parity des_set_odd_parity ++#define des_check_key_parity check_parity ++ ++extern int des_check_key; /* defaults to false */ ++extern int des_rw_mode; /* defaults to DES_PCBC_MODE */ ++ ++/* The next line is used to disable full ANSI prototypes, if your ++ * compiler has problems with the prototypes, make sure this line always ++ * evaluates to true :-) */ ++#if defined(MSDOS) || defined(__STDC__) ++#undef NOPROTO ++#endif ++#ifndef NOPROTO ++char *des_options(void); ++void des_ecb3_encrypt(des_cblock *input,des_cblock *output, ++ des_key_schedule ks1,des_key_schedule ks2, ++ des_key_schedule ks3, int enc); ++DES_LONG des_cbc_cksum(des_cblock *input,des_cblock *output, ++ long length,des_key_schedule schedule,des_cblock *ivec); ++void des_cbc_encrypt(des_cblock *input,des_cblock *output,long length, ++ des_key_schedule schedule,des_cblock *ivec,int enc); ++void des_ncbc_encrypt(des_cblock *input,des_cblock *output,long length, ++ des_key_schedule schedule,des_cblock *ivec,int enc); ++void des_xcbc_encrypt(des_cblock *input,des_cblock *output,long length, ++ des_key_schedule schedule,des_cblock *ivec, ++ des_cblock *inw,des_cblock *outw,int enc); ++void des_cfb_encrypt(unsigned char *in,unsigned char *out,int numbits, ++ long length,des_key_schedule schedule,des_cblock *ivec,int enc); ++void des_ecb_encrypt(des_cblock *input,des_cblock *output, ++ des_key_schedule ks,int enc); ++void des_encrypt(DES_LONG *data,des_key_schedule ks, int enc); ++void des_encrypt2(DES_LONG *data,des_key_schedule ks, int enc); ++void des_encrypt3(DES_LONG *data, des_key_schedule ks1, ++ des_key_schedule ks2, des_key_schedule ks3); ++void des_decrypt3(DES_LONG *data, des_key_schedule ks1, ++ des_key_schedule ks2, des_key_schedule ks3); ++void des_ede3_cbc_encrypt(des_cblock *input, des_cblock *output, ++ long length, des_key_schedule ks1, des_key_schedule ks2, ++ des_key_schedule ks3, des_cblock *ivec, int enc); ++void des_ede3_cfb64_encrypt(unsigned char *in, unsigned char *out, ++ long length, des_key_schedule ks1, des_key_schedule ks2, ++ des_key_schedule ks3, des_cblock *ivec, int *num, int enc); ++void des_ede3_ofb64_encrypt(unsigned char *in, unsigned char *out, ++ long length, des_key_schedule ks1, des_key_schedule ks2, ++ des_key_schedule ks3, des_cblock *ivec, int *num); ++ ++void des_xwhite_in2out(des_cblock (*des_key), des_cblock (*in_white), ++ des_cblock (*out_white)); ++ ++int des_enc_read(int fd,char *buf,int len,des_key_schedule sched, ++ des_cblock *iv); ++int des_enc_write(int fd,char *buf,int len,des_key_schedule sched, ++ des_cblock *iv); ++char *des_fcrypt(const char *buf,const char *salt, char *ret); ++#ifdef PERL5 ++char *des_crypt(const char *buf,const char *salt); ++#else ++/* some stupid compilers complain because I have declared char instead ++ * of const char */ ++#ifndef __KERNEL__ ++#ifdef HEADER_DES_LOCL_H ++char *crypt(const char *buf,const char *salt); ++#else /* HEADER_DES_LOCL_H */ ++char *crypt(void); ++#endif /* HEADER_DES_LOCL_H */ ++#endif /* __KERNEL__ */ ++#endif /* PERL5 */ ++void des_ofb_encrypt(unsigned char *in,unsigned char *out, ++ int numbits,long length,des_key_schedule schedule,des_cblock *ivec); ++void des_pcbc_encrypt(des_cblock *input,des_cblock *output,long length, ++ des_key_schedule schedule,des_cblock *ivec,int enc); ++DES_LONG des_quad_cksum(des_cblock *input,des_cblock *output, ++ long length,int out_count,des_cblock *seed); ++void des_random_seed(des_cblock key); ++void des_random_key(des_cblock ret); ++int des_read_password(des_cblock *key,char *prompt,int verify); ++int des_read_2passwords(des_cblock *key1,des_cblock *key2, ++ char *prompt,int verify); ++int des_read_pw_string(char *buf,int length,char *prompt,int verify); ++void des_set_odd_parity(des_cblock *key); ++int des_is_weak_key(des_cblock *key); ++int des_set_key(des_cblock *key,des_key_schedule schedule); ++int des_key_sched(des_cblock *key,des_key_schedule schedule); ++void des_string_to_key(char *str,des_cblock *key); ++void des_string_to_2keys(char *str,des_cblock *key1,des_cblock *key2); ++void des_cfb64_encrypt(unsigned char *in, unsigned char *out, long length, ++ des_key_schedule schedule, des_cblock *ivec, int *num, int enc); ++void des_ofb64_encrypt(unsigned char *in, unsigned char *out, long length, ++ des_key_schedule schedule, des_cblock *ivec, int *num); ++int des_read_pw(char *buf, char *buff, int size, char *prompt, int verify); ++ ++/* Extra functions from Mark Murray */ ++/* The following functions are not in the normal unix build or the ++ * SSLeay build. When using the SSLeay build, use RAND_seed() ++ * and RAND_bytes() instead. */ ++int des_new_random_key(des_cblock *key); ++void des_init_random_number_generator(des_cblock *key); ++void des_set_random_generator_seed(des_cblock *key); ++void des_set_sequence_number(des_cblock new_sequence_number); ++void des_generate_random_block(des_cblock *block); ++ ++#else ++ ++char *des_options(); ++void des_ecb3_encrypt(); ++DES_LONG des_cbc_cksum(); ++void des_cbc_encrypt(); ++void des_ncbc_encrypt(); ++void des_xcbc_encrypt(); ++void des_cfb_encrypt(); ++void des_ede3_cfb64_encrypt(); ++void des_ede3_ofb64_encrypt(); ++void des_ecb_encrypt(); ++void des_encrypt(); ++void des_encrypt2(); ++void des_encrypt3(); ++void des_decrypt3(); ++void des_ede3_cbc_encrypt(); ++int des_enc_read(); ++int des_enc_write(); ++char *des_fcrypt(); ++#ifdef PERL5 ++char *des_crypt(); ++#else ++char *crypt(); ++#endif ++void des_ofb_encrypt(); ++void des_pcbc_encrypt(); ++DES_LONG des_quad_cksum(); ++void des_random_seed(); ++void des_random_key(); ++int des_read_password(); ++int des_read_2passwords(); ++int des_read_pw_string(); ++void des_set_odd_parity(); ++int des_is_weak_key(); ++int des_set_key(); ++int des_key_sched(); ++void des_string_to_key(); ++void des_string_to_2keys(); ++void des_cfb64_encrypt(); ++void des_ofb64_encrypt(); ++int des_read_pw(); ++void des_xwhite_in2out(); ++ ++/* Extra functions from Mark Murray */ ++/* The following functions are not in the normal unix build or the ++ * SSLeay build. When using the SSLeay build, use RAND_seed() ++ * and RAND_bytes() instead. */ ++#ifdef FreeBSD ++int des_new_random_key(); ++void des_init_random_number_generator(); ++void des_set_random_generator_seed(); ++void des_set_sequence_number(); ++void des_generate_random_block(); ++#endif ++ ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/des/des_locl.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,515 @@ ++/* crypto/des/des_locl.org */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++/* WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING ++ * ++ * Always modify des_locl.org since des_locl.h is automatically generated from ++ * it during SSLeay configuration. ++ * ++ * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING ++ */ ++ ++#ifndef HEADER_DES_LOCL_H ++#define HEADER_DES_LOCL_H ++ ++#if defined(WIN32) || defined(WIN16) ++#ifndef MSDOS ++#define MSDOS ++#endif ++#endif ++ ++#include "crypto/des.h" ++ ++#ifndef DES_DEFAULT_OPTIONS ++/* the following is tweaked from a config script, that is why it is a ++ * protected undef/define */ ++#ifndef DES_PTR ++#define DES_PTR ++#endif ++ ++/* This helps C compiler generate the correct code for multiple functional ++ * units. It reduces register dependancies at the expense of 2 more ++ * registers */ ++#ifndef DES_RISC1 ++#define DES_RISC1 ++#endif ++ ++#ifndef DES_RISC2 ++#undef DES_RISC2 ++#endif ++ ++#if defined(DES_RISC1) && defined(DES_RISC2) ++YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! ++#endif ++ ++/* Unroll the inner loop, this sometimes helps, sometimes hinders. ++ * Very mucy CPU dependant */ ++#ifndef DES_UNROLL ++#define DES_UNROLL ++#endif ++ ++/* These default values were supplied by ++ * Peter Gutman ++ * They are only used if nothing else has been defined */ ++#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) ++/* Special defines which change the way the code is built depending on the ++ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find ++ even newer MIPS CPU's, but at the moment one size fits all for ++ optimization options. Older Sparc's work better with only UNROLL, but ++ there's no way to tell at compile time what it is you're running on */ ++ ++#if defined( sun ) /* Newer Sparc's */ ++ #define DES_PTR ++ #define DES_RISC1 ++ #define DES_UNROLL ++#elif defined( __ultrix ) /* Older MIPS */ ++ #define DES_PTR ++ #define DES_RISC2 ++ #define DES_UNROLL ++#elif defined( __osf1__ ) /* Alpha */ ++ #define DES_PTR ++ #define DES_RISC2 ++#elif defined ( _AIX ) /* RS6000 */ ++ /* Unknown */ ++#elif defined( __hpux ) /* HP-PA */ ++ /* Unknown */ ++#elif defined( __aux ) /* 68K */ ++ /* Unknown */ ++#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ ++ #define DES_UNROLL ++#elif defined( __sgi ) /* Newer MIPS */ ++ #define DES_PTR ++ #define DES_RISC2 ++ #define DES_UNROLL ++#elif defined( i386 ) /* x86 boxes, should be gcc */ ++ #define DES_PTR ++ #define DES_RISC1 ++ #define DES_UNROLL ++#endif /* Systems-specific speed defines */ ++#endif ++ ++#endif /* DES_DEFAULT_OPTIONS */ ++ ++#ifdef MSDOS /* Visual C++ 2.1 (Windows NT/95) */ ++#include ++#include ++#include ++#include ++#ifndef RAND ++#define RAND ++#endif ++#undef NOPROTO ++#endif ++ ++#if defined(__STDC__) || defined(VMS) || defined(M_XENIX) || defined(MSDOS) ++#ifndef __KERNEL__ ++#include ++#else ++#include ++#endif ++#endif ++ ++#ifndef RAND ++#define RAND ++#endif ++ ++#ifdef linux ++#undef RAND ++#endif ++ ++#ifdef MSDOS ++#define getpid() 2 ++#define RAND ++#undef NOPROTO ++#endif ++ ++#if defined(NOCONST) ++#define const ++#endif ++ ++#ifdef __STDC__ ++#undef NOPROTO ++#endif ++ ++#ifdef RAND ++#define srandom(s) srand(s) ++#define random rand ++#endif ++ ++#define ITERATIONS 16 ++#define HALF_ITERATIONS 8 ++ ++/* used in des_read and des_write */ ++#define MAXWRITE (1024*16) ++#define BSIZE (MAXWRITE+4) ++ ++#define c2l(c,l) (l =((DES_LONG)(*((c)++))) , \ ++ l|=((DES_LONG)(*((c)++)))<< 8L, \ ++ l|=((DES_LONG)(*((c)++)))<<16L, \ ++ l|=((DES_LONG)(*((c)++)))<<24L) ++ ++/* NOTE - c is not incremented as per c2l */ ++#define c2ln(c,l1,l2,n) { \ ++ c+=n; \ ++ l1=l2=0; \ ++ switch (n) { \ ++ case 8: l2 =((DES_LONG)(*(--(c))))<<24L; \ ++ case 7: l2|=((DES_LONG)(*(--(c))))<<16L; \ ++ case 6: l2|=((DES_LONG)(*(--(c))))<< 8L; \ ++ case 5: l2|=((DES_LONG)(*(--(c)))); \ ++ case 4: l1 =((DES_LONG)(*(--(c))))<<24L; \ ++ case 3: l1|=((DES_LONG)(*(--(c))))<<16L; \ ++ case 2: l1|=((DES_LONG)(*(--(c))))<< 8L; \ ++ case 1: l1|=((DES_LONG)(*(--(c)))); \ ++ } \ ++ } ++ ++#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ ++ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \ ++ *((c)++)=(unsigned char)(((l)>>16L)&0xff), \ ++ *((c)++)=(unsigned char)(((l)>>24L)&0xff)) ++ ++/* replacements for htonl and ntohl since I have no idea what to do ++ * when faced with machines with 8 byte longs. */ ++#define HDRSIZE 4 ++ ++#define n2l(c,l) (l =((DES_LONG)(*((c)++)))<<24L, \ ++ l|=((DES_LONG)(*((c)++)))<<16L, \ ++ l|=((DES_LONG)(*((c)++)))<< 8L, \ ++ l|=((DES_LONG)(*((c)++)))) ++ ++#define l2n(l,c) (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \ ++ *((c)++)=(unsigned char)(((l)>>16L)&0xff), \ ++ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \ ++ *((c)++)=(unsigned char)(((l) )&0xff)) ++ ++/* NOTE - c is not incremented as per l2c */ ++#define l2cn(l1,l2,c,n) { \ ++ c+=n; \ ++ switch (n) { \ ++ case 8: *(--(c))=(unsigned char)(((l2)>>24L)&0xff); \ ++ case 7: *(--(c))=(unsigned char)(((l2)>>16L)&0xff); \ ++ case 6: *(--(c))=(unsigned char)(((l2)>> 8L)&0xff); \ ++ case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \ ++ case 4: *(--(c))=(unsigned char)(((l1)>>24L)&0xff); \ ++ case 3: *(--(c))=(unsigned char)(((l1)>>16L)&0xff); \ ++ case 2: *(--(c))=(unsigned char)(((l1)>> 8L)&0xff); \ ++ case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \ ++ } \ ++ } ++ ++#if defined(WIN32) ++#define ROTATE(a,n) (_lrotr(a,n)) ++#else ++#define ROTATE(a,n) (((a)>>(n))+((a)<<(32-(n)))) ++#endif ++ ++/* Don't worry about the LOAD_DATA() stuff, that is used by ++ * fcrypt() to add it's little bit to the front */ ++ ++#ifdef DES_FCRYPT ++ ++#define LOAD_DATA_tmp(R,S,u,t,E0,E1) \ ++ { DES_LONG tmp; LOAD_DATA(R,S,u,t,E0,E1,tmp); } ++ ++#define LOAD_DATA(R,S,u,t,E0,E1,tmp) \ ++ t=R^(R>>16L); \ ++ u=t&E0; t&=E1; \ ++ tmp=(u<<16); u^=R^s[S ]; u^=tmp; \ ++ tmp=(t<<16); t^=R^s[S+1]; t^=tmp ++#else ++#define LOAD_DATA_tmp(a,b,c,d,e,f) LOAD_DATA(a,b,c,d,e,f,g) ++#define LOAD_DATA(R,S,u,t,E0,E1,tmp) \ ++ u=R^s[S ]; \ ++ t=R^s[S+1] ++#endif ++ ++/* The changes to this macro may help or hinder, depending on the ++ * compiler and the achitecture. gcc2 always seems to do well :-). ++ * Inspired by Dana How ++ * DO NOT use the alternative version on machines with 8 byte longs. ++ * It does not seem to work on the Alpha, even when DES_LONG is 4 ++ * bytes, probably an issue of accessing non-word aligned objects :-( */ ++#ifdef DES_PTR ++ ++/* It recently occured to me that 0^0^0^0^0^0^0 == 0, so there ++ * is no reason to not xor all the sub items together. This potentially ++ * saves a register since things can be xored directly into L */ ++ ++#if defined(DES_RISC1) || defined(DES_RISC2) ++#ifdef DES_RISC1 ++#define D_ENCRYPT(LL,R,S) { \ ++ unsigned int u1,u2,u3; \ ++ LOAD_DATA(R,S,u,t,E0,E1,u1); \ ++ u2=(int)u>>8L; \ ++ u1=(int)u&0xfc; \ ++ u2&=0xfc; \ ++ t=ROTATE(t,4); \ ++ u>>=16L; \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP +u1); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x200+u2); \ ++ u3=(int)(u>>8L); \ ++ u1=(int)u&0xfc; \ ++ u3&=0xfc; \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x400+u1); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x600+u3); \ ++ u2=(int)t>>8L; \ ++ u1=(int)t&0xfc; \ ++ u2&=0xfc; \ ++ t>>=16L; \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x100+u1); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x300+u2); \ ++ u3=(int)t>>8L; \ ++ u1=(int)t&0xfc; \ ++ u3&=0xfc; \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x500+u1); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x700+u3); } ++#endif ++#ifdef DES_RISC2 ++#define D_ENCRYPT(LL,R,S) { \ ++ unsigned int u1,u2,s1,s2; \ ++ LOAD_DATA(R,S,u,t,E0,E1,u1); \ ++ u2=(int)u>>8L; \ ++ u1=(int)u&0xfc; \ ++ u2&=0xfc; \ ++ t=ROTATE(t,4); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP +u1); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x200+u2); \ ++ s1=(int)(u>>16L); \ ++ s2=(int)(u>>24L); \ ++ s1&=0xfc; \ ++ s2&=0xfc; \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x400+s1); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x600+s2); \ ++ u2=(int)t>>8L; \ ++ u1=(int)t&0xfc; \ ++ u2&=0xfc; \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x100+u1); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x300+u2); \ ++ s1=(int)(t>>16L); \ ++ s2=(int)(t>>24L); \ ++ s1&=0xfc; \ ++ s2&=0xfc; \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x500+s1); \ ++ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x700+s2); } ++#endif ++#else ++#define D_ENCRYPT(LL,R,S) { \ ++ LOAD_DATA_tmp(R,S,u,t,E0,E1); \ ++ t=ROTATE(t,4); \ ++ LL^= \ ++ *(DES_LONG *)((unsigned char *)des_SP +((u )&0xfc))^ \ ++ *(DES_LONG *)((unsigned char *)des_SP+0x200+((u>> 8L)&0xfc))^ \ ++ *(DES_LONG *)((unsigned char *)des_SP+0x400+((u>>16L)&0xfc))^ \ ++ *(DES_LONG *)((unsigned char *)des_SP+0x600+((u>>24L)&0xfc))^ \ ++ *(DES_LONG *)((unsigned char *)des_SP+0x100+((t )&0xfc))^ \ ++ *(DES_LONG *)((unsigned char *)des_SP+0x300+((t>> 8L)&0xfc))^ \ ++ *(DES_LONG *)((unsigned char *)des_SP+0x500+((t>>16L)&0xfc))^ \ ++ *(DES_LONG *)((unsigned char *)des_SP+0x700+((t>>24L)&0xfc)); } ++#endif ++ ++#else /* original version */ ++ ++#if defined(DES_RISC1) || defined(DES_RISC2) ++#ifdef DES_RISC1 ++#define D_ENCRYPT(LL,R,S) {\ ++ unsigned int u1,u2,u3; \ ++ LOAD_DATA(R,S,u,t,E0,E1,u1); \ ++ u>>=2L; \ ++ t=ROTATE(t,6); \ ++ u2=(int)u>>8L; \ ++ u1=(int)u&0x3f; \ ++ u2&=0x3f; \ ++ u>>=16L; \ ++ LL^=des_SPtrans[0][u1]; \ ++ LL^=des_SPtrans[2][u2]; \ ++ u3=(int)u>>8L; \ ++ u1=(int)u&0x3f; \ ++ u3&=0x3f; \ ++ LL^=des_SPtrans[4][u1]; \ ++ LL^=des_SPtrans[6][u3]; \ ++ u2=(int)t>>8L; \ ++ u1=(int)t&0x3f; \ ++ u2&=0x3f; \ ++ t>>=16L; \ ++ LL^=des_SPtrans[1][u1]; \ ++ LL^=des_SPtrans[3][u2]; \ ++ u3=(int)t>>8L; \ ++ u1=(int)t&0x3f; \ ++ u3&=0x3f; \ ++ LL^=des_SPtrans[5][u1]; \ ++ LL^=des_SPtrans[7][u3]; } ++#endif ++#ifdef DES_RISC2 ++#define D_ENCRYPT(LL,R,S) {\ ++ unsigned int u1,u2,s1,s2; \ ++ LOAD_DATA(R,S,u,t,E0,E1,u1); \ ++ u>>=2L; \ ++ t=ROTATE(t,6); \ ++ u2=(int)u>>8L; \ ++ u1=(int)u&0x3f; \ ++ u2&=0x3f; \ ++ LL^=des_SPtrans[0][u1]; \ ++ LL^=des_SPtrans[2][u2]; \ ++ s1=(int)u>>16L; \ ++ s2=(int)u>>24L; \ ++ s1&=0x3f; \ ++ s2&=0x3f; \ ++ LL^=des_SPtrans[4][s1]; \ ++ LL^=des_SPtrans[6][s2]; \ ++ u2=(int)t>>8L; \ ++ u1=(int)t&0x3f; \ ++ u2&=0x3f; \ ++ LL^=des_SPtrans[1][u1]; \ ++ LL^=des_SPtrans[3][u2]; \ ++ s1=(int)t>>16; \ ++ s2=(int)t>>24L; \ ++ s1&=0x3f; \ ++ s2&=0x3f; \ ++ LL^=des_SPtrans[5][s1]; \ ++ LL^=des_SPtrans[7][s2]; } ++#endif ++ ++#else ++ ++#define D_ENCRYPT(LL,R,S) {\ ++ LOAD_DATA_tmp(R,S,u,t,E0,E1); \ ++ t=ROTATE(t,4); \ ++ LL^=\ ++ des_SPtrans[0][(u>> 2L)&0x3f]^ \ ++ des_SPtrans[2][(u>>10L)&0x3f]^ \ ++ des_SPtrans[4][(u>>18L)&0x3f]^ \ ++ des_SPtrans[6][(u>>26L)&0x3f]^ \ ++ des_SPtrans[1][(t>> 2L)&0x3f]^ \ ++ des_SPtrans[3][(t>>10L)&0x3f]^ \ ++ des_SPtrans[5][(t>>18L)&0x3f]^ \ ++ des_SPtrans[7][(t>>26L)&0x3f]; } ++#endif ++#endif ++ ++ /* IP and FP ++ * The problem is more of a geometric problem that random bit fiddling. ++ 0 1 2 3 4 5 6 7 62 54 46 38 30 22 14 6 ++ 8 9 10 11 12 13 14 15 60 52 44 36 28 20 12 4 ++ 16 17 18 19 20 21 22 23 58 50 42 34 26 18 10 2 ++ 24 25 26 27 28 29 30 31 to 56 48 40 32 24 16 8 0 ++ ++ 32 33 34 35 36 37 38 39 63 55 47 39 31 23 15 7 ++ 40 41 42 43 44 45 46 47 61 53 45 37 29 21 13 5 ++ 48 49 50 51 52 53 54 55 59 51 43 35 27 19 11 3 ++ 56 57 58 59 60 61 62 63 57 49 41 33 25 17 9 1 ++ ++ The output has been subject to swaps of the form ++ 0 1 -> 3 1 but the odd and even bits have been put into ++ 2 3 2 0 ++ different words. The main trick is to remember that ++ t=((l>>size)^r)&(mask); ++ r^=t; ++ l^=(t<>(n))^(b))&(m)),\ ++ (b)^=(t),\ ++ (a)^=((t)<<(n))) ++ ++#define IP(l,r) \ ++ { \ ++ register DES_LONG tt; \ ++ PERM_OP(r,l,tt, 4,0x0f0f0f0fL); \ ++ PERM_OP(l,r,tt,16,0x0000ffffL); \ ++ PERM_OP(r,l,tt, 2,0x33333333L); \ ++ PERM_OP(l,r,tt, 8,0x00ff00ffL); \ ++ PERM_OP(r,l,tt, 1,0x55555555L); \ ++ } ++ ++#define FP(l,r) \ ++ { \ ++ register DES_LONG tt; \ ++ PERM_OP(l,r,tt, 1,0x55555555L); \ ++ PERM_OP(r,l,tt, 8,0x00ff00ffL); \ ++ PERM_OP(l,r,tt, 2,0x33333333L); \ ++ PERM_OP(r,l,tt,16,0x0000ffffL); \ ++ PERM_OP(l,r,tt, 4,0x0f0f0f0fL); \ ++ } ++ ++extern const DES_LONG des_SPtrans[8][64]; ++ ++#ifndef NOPROTO ++void fcrypt_body(DES_LONG *out,des_key_schedule ks, ++ DES_LONG Eswap0, DES_LONG Eswap1); ++#else ++void fcrypt_body(); ++#endif ++ ++#endif +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/des/des_ver.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,60 @@ ++/* crypto/des/des_ver.h */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++extern char *DES_version; /* SSLeay version string */ ++extern char *libdes_version; /* old libdes version string */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/des/podd.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,75 @@ ++/* crypto/des/podd.h */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++static const unsigned char odd_parity[256]={ ++ 1, 1, 2, 2, 4, 4, 7, 7, 8, 8, 11, 11, 13, 13, 14, 14, ++ 16, 16, 19, 19, 21, 21, 22, 22, 25, 25, 26, 26, 28, 28, 31, 31, ++ 32, 32, 35, 35, 37, 37, 38, 38, 41, 41, 42, 42, 44, 44, 47, 47, ++ 49, 49, 50, 50, 52, 52, 55, 55, 56, 56, 59, 59, 61, 61, 62, 62, ++ 64, 64, 67, 67, 69, 69, 70, 70, 73, 73, 74, 74, 76, 76, 79, 79, ++ 81, 81, 82, 82, 84, 84, 87, 87, 88, 88, 91, 91, 93, 93, 94, 94, ++ 97, 97, 98, 98,100,100,103,103,104,104,107,107,109,109,110,110, ++112,112,115,115,117,117,118,118,121,121,122,122,124,124,127,127, ++128,128,131,131,133,133,134,134,137,137,138,138,140,140,143,143, ++145,145,146,146,148,148,151,151,152,152,155,155,157,157,158,158, ++161,161,162,162,164,164,167,167,168,168,171,171,173,173,174,174, ++176,176,179,179,181,181,182,182,185,185,186,186,188,188,191,191, ++193,193,194,194,196,196,199,199,200,200,203,203,205,205,206,206, ++208,208,211,211,213,213,214,214,217,217,218,218,220,220,223,223, ++224,224,227,227,229,229,230,230,233,233,234,234,236,236,239,239, ++241,241,242,242,244,244,247,247,248,248,251,251,253,253,254,254}; +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/des/sk.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,204 @@ ++/* crypto/des/sk.h */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++static const DES_LONG des_skb[8][64]={ ++{ ++/* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ ++0x00000000L,0x00000010L,0x20000000L,0x20000010L, ++0x00010000L,0x00010010L,0x20010000L,0x20010010L, ++0x00000800L,0x00000810L,0x20000800L,0x20000810L, ++0x00010800L,0x00010810L,0x20010800L,0x20010810L, ++0x00000020L,0x00000030L,0x20000020L,0x20000030L, ++0x00010020L,0x00010030L,0x20010020L,0x20010030L, ++0x00000820L,0x00000830L,0x20000820L,0x20000830L, ++0x00010820L,0x00010830L,0x20010820L,0x20010830L, ++0x00080000L,0x00080010L,0x20080000L,0x20080010L, ++0x00090000L,0x00090010L,0x20090000L,0x20090010L, ++0x00080800L,0x00080810L,0x20080800L,0x20080810L, ++0x00090800L,0x00090810L,0x20090800L,0x20090810L, ++0x00080020L,0x00080030L,0x20080020L,0x20080030L, ++0x00090020L,0x00090030L,0x20090020L,0x20090030L, ++0x00080820L,0x00080830L,0x20080820L,0x20080830L, ++0x00090820L,0x00090830L,0x20090820L,0x20090830L, ++},{ ++/* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ ++0x00000000L,0x02000000L,0x00002000L,0x02002000L, ++0x00200000L,0x02200000L,0x00202000L,0x02202000L, ++0x00000004L,0x02000004L,0x00002004L,0x02002004L, ++0x00200004L,0x02200004L,0x00202004L,0x02202004L, ++0x00000400L,0x02000400L,0x00002400L,0x02002400L, ++0x00200400L,0x02200400L,0x00202400L,0x02202400L, ++0x00000404L,0x02000404L,0x00002404L,0x02002404L, ++0x00200404L,0x02200404L,0x00202404L,0x02202404L, ++0x10000000L,0x12000000L,0x10002000L,0x12002000L, ++0x10200000L,0x12200000L,0x10202000L,0x12202000L, ++0x10000004L,0x12000004L,0x10002004L,0x12002004L, ++0x10200004L,0x12200004L,0x10202004L,0x12202004L, ++0x10000400L,0x12000400L,0x10002400L,0x12002400L, ++0x10200400L,0x12200400L,0x10202400L,0x12202400L, ++0x10000404L,0x12000404L,0x10002404L,0x12002404L, ++0x10200404L,0x12200404L,0x10202404L,0x12202404L, ++},{ ++/* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ ++0x00000000L,0x00000001L,0x00040000L,0x00040001L, ++0x01000000L,0x01000001L,0x01040000L,0x01040001L, ++0x00000002L,0x00000003L,0x00040002L,0x00040003L, ++0x01000002L,0x01000003L,0x01040002L,0x01040003L, ++0x00000200L,0x00000201L,0x00040200L,0x00040201L, ++0x01000200L,0x01000201L,0x01040200L,0x01040201L, ++0x00000202L,0x00000203L,0x00040202L,0x00040203L, ++0x01000202L,0x01000203L,0x01040202L,0x01040203L, ++0x08000000L,0x08000001L,0x08040000L,0x08040001L, ++0x09000000L,0x09000001L,0x09040000L,0x09040001L, ++0x08000002L,0x08000003L,0x08040002L,0x08040003L, ++0x09000002L,0x09000003L,0x09040002L,0x09040003L, ++0x08000200L,0x08000201L,0x08040200L,0x08040201L, ++0x09000200L,0x09000201L,0x09040200L,0x09040201L, ++0x08000202L,0x08000203L,0x08040202L,0x08040203L, ++0x09000202L,0x09000203L,0x09040202L,0x09040203L, ++},{ ++/* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ ++0x00000000L,0x00100000L,0x00000100L,0x00100100L, ++0x00000008L,0x00100008L,0x00000108L,0x00100108L, ++0x00001000L,0x00101000L,0x00001100L,0x00101100L, ++0x00001008L,0x00101008L,0x00001108L,0x00101108L, ++0x04000000L,0x04100000L,0x04000100L,0x04100100L, ++0x04000008L,0x04100008L,0x04000108L,0x04100108L, ++0x04001000L,0x04101000L,0x04001100L,0x04101100L, ++0x04001008L,0x04101008L,0x04001108L,0x04101108L, ++0x00020000L,0x00120000L,0x00020100L,0x00120100L, ++0x00020008L,0x00120008L,0x00020108L,0x00120108L, ++0x00021000L,0x00121000L,0x00021100L,0x00121100L, ++0x00021008L,0x00121008L,0x00021108L,0x00121108L, ++0x04020000L,0x04120000L,0x04020100L,0x04120100L, ++0x04020008L,0x04120008L,0x04020108L,0x04120108L, ++0x04021000L,0x04121000L,0x04021100L,0x04121100L, ++0x04021008L,0x04121008L,0x04021108L,0x04121108L, ++},{ ++/* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ ++0x00000000L,0x10000000L,0x00010000L,0x10010000L, ++0x00000004L,0x10000004L,0x00010004L,0x10010004L, ++0x20000000L,0x30000000L,0x20010000L,0x30010000L, ++0x20000004L,0x30000004L,0x20010004L,0x30010004L, ++0x00100000L,0x10100000L,0x00110000L,0x10110000L, ++0x00100004L,0x10100004L,0x00110004L,0x10110004L, ++0x20100000L,0x30100000L,0x20110000L,0x30110000L, ++0x20100004L,0x30100004L,0x20110004L,0x30110004L, ++0x00001000L,0x10001000L,0x00011000L,0x10011000L, ++0x00001004L,0x10001004L,0x00011004L,0x10011004L, ++0x20001000L,0x30001000L,0x20011000L,0x30011000L, ++0x20001004L,0x30001004L,0x20011004L,0x30011004L, ++0x00101000L,0x10101000L,0x00111000L,0x10111000L, ++0x00101004L,0x10101004L,0x00111004L,0x10111004L, ++0x20101000L,0x30101000L,0x20111000L,0x30111000L, ++0x20101004L,0x30101004L,0x20111004L,0x30111004L, ++},{ ++/* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ ++0x00000000L,0x08000000L,0x00000008L,0x08000008L, ++0x00000400L,0x08000400L,0x00000408L,0x08000408L, ++0x00020000L,0x08020000L,0x00020008L,0x08020008L, ++0x00020400L,0x08020400L,0x00020408L,0x08020408L, ++0x00000001L,0x08000001L,0x00000009L,0x08000009L, ++0x00000401L,0x08000401L,0x00000409L,0x08000409L, ++0x00020001L,0x08020001L,0x00020009L,0x08020009L, ++0x00020401L,0x08020401L,0x00020409L,0x08020409L, ++0x02000000L,0x0A000000L,0x02000008L,0x0A000008L, ++0x02000400L,0x0A000400L,0x02000408L,0x0A000408L, ++0x02020000L,0x0A020000L,0x02020008L,0x0A020008L, ++0x02020400L,0x0A020400L,0x02020408L,0x0A020408L, ++0x02000001L,0x0A000001L,0x02000009L,0x0A000009L, ++0x02000401L,0x0A000401L,0x02000409L,0x0A000409L, ++0x02020001L,0x0A020001L,0x02020009L,0x0A020009L, ++0x02020401L,0x0A020401L,0x02020409L,0x0A020409L, ++},{ ++/* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ ++0x00000000L,0x00000100L,0x00080000L,0x00080100L, ++0x01000000L,0x01000100L,0x01080000L,0x01080100L, ++0x00000010L,0x00000110L,0x00080010L,0x00080110L, ++0x01000010L,0x01000110L,0x01080010L,0x01080110L, ++0x00200000L,0x00200100L,0x00280000L,0x00280100L, ++0x01200000L,0x01200100L,0x01280000L,0x01280100L, ++0x00200010L,0x00200110L,0x00280010L,0x00280110L, ++0x01200010L,0x01200110L,0x01280010L,0x01280110L, ++0x00000200L,0x00000300L,0x00080200L,0x00080300L, ++0x01000200L,0x01000300L,0x01080200L,0x01080300L, ++0x00000210L,0x00000310L,0x00080210L,0x00080310L, ++0x01000210L,0x01000310L,0x01080210L,0x01080310L, ++0x00200200L,0x00200300L,0x00280200L,0x00280300L, ++0x01200200L,0x01200300L,0x01280200L,0x01280300L, ++0x00200210L,0x00200310L,0x00280210L,0x00280310L, ++0x01200210L,0x01200310L,0x01280210L,0x01280310L, ++},{ ++/* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ ++0x00000000L,0x04000000L,0x00040000L,0x04040000L, ++0x00000002L,0x04000002L,0x00040002L,0x04040002L, ++0x00002000L,0x04002000L,0x00042000L,0x04042000L, ++0x00002002L,0x04002002L,0x00042002L,0x04042002L, ++0x00000020L,0x04000020L,0x00040020L,0x04040020L, ++0x00000022L,0x04000022L,0x00040022L,0x04040022L, ++0x00002020L,0x04002020L,0x00042020L,0x04042020L, ++0x00002022L,0x04002022L,0x00042022L,0x04042022L, ++0x00000800L,0x04000800L,0x00040800L,0x04040800L, ++0x00000802L,0x04000802L,0x00040802L,0x04040802L, ++0x00002800L,0x04002800L,0x00042800L,0x04042800L, ++0x00002802L,0x04002802L,0x00042802L,0x04042802L, ++0x00000820L,0x04000820L,0x00040820L,0x04040820L, ++0x00000822L,0x04000822L,0x00040822L,0x04040822L, ++0x00002820L,0x04002820L,0x00042820L,0x04042820L, ++0x00002822L,0x04002822L,0x00042822L,0x04042822L, ++}}; +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/des/spr.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,204 @@ ++/* crypto/des/spr.h */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++const DES_LONG des_SPtrans[8][64]={ ++{ ++/* nibble 0 */ ++0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, ++0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L, ++0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L, ++0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L, ++0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L, ++0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L, ++0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L, ++0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L, ++0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L, ++0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L, ++0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L, ++0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L, ++0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L, ++0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L, ++0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L, ++0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L, ++},{ ++/* nibble 1 */ ++0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L, ++0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L, ++0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L, ++0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L, ++0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L, ++0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L, ++0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L, ++0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L, ++0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L, ++0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L, ++0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L, ++0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L, ++0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L, ++0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L, ++0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L, ++0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L, ++},{ ++/* nibble 2 */ ++0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L, ++0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L, ++0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L, ++0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L, ++0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L, ++0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L, ++0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L, ++0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L, ++0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L, ++0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L, ++0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L, ++0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L, ++0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L, ++0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L, ++0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L, ++0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L, ++},{ ++/* nibble 3 */ ++0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L, ++0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L, ++0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L, ++0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L, ++0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L, ++0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L, ++0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L, ++0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L, ++0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L, ++0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L, ++0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L, ++0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L, ++0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L, ++0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L, ++0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L, ++0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L, ++},{ ++/* nibble 4 */ ++0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L, ++0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L, ++0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L, ++0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L, ++0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L, ++0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L, ++0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L, ++0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L, ++0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L, ++0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L, ++0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L, ++0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L, ++0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L, ++0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L, ++0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L, ++0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L, ++},{ ++/* nibble 5 */ ++0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L, ++0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L, ++0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L, ++0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L, ++0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L, ++0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L, ++0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L, ++0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L, ++0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L, ++0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L, ++0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L, ++0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L, ++0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L, ++0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L, ++0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L, ++0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L, ++},{ ++/* nibble 6 */ ++0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L, ++0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L, ++0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L, ++0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L, ++0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L, ++0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L, ++0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L, ++0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L, ++0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L, ++0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L, ++0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L, ++0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L, ++0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L, ++0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L, ++0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L, ++0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L, ++},{ ++/* nibble 7 */ ++0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L, ++0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L, ++0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L, ++0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L, ++0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L, ++0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L, ++0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L, ++0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L, ++0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L, ++0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L, ++0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L, ++0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L, ++0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L, ++0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L, ++0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L, ++0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L, ++}}; +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/mast.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,33 @@ ++struct mast_callbacks { ++ int (*packet_encap)(struct device *mast, void *context, ++ struct sk_buff *skb, int flowref); ++ int (*link_inquire)(struct device *mast, void *context); ++}; ++ ++ ++struct device *mast_init (int family, ++ struct mast_callbacks *callbacks, ++ unsigned int flags, ++ unsigned int desired_unit, ++ unsigned int max_flowref, ++ void *context); ++ ++int mast_destroy(struct device *mast); ++ ++int mast_recv(struct device *mast, struct sk_buff *skb, int flowref); ++ ++/* free this skb as being useless, increment failure count. */ ++int mast_toast(struct device *mast, struct sk_buff *skb, int flowref); ++ ++int mast_linkstat (struct device *mast, int flowref, ++ int status); ++ ++int mast_setreference (struct device *mast, ++ int defaultSA); ++ ++int mast_setneighbor (struct device *mast, ++ struct sockaddr *source, ++ struct sockaddr *destination, ++ int flowref); ++ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,518 @@ ++#ifndef _OPENSWAN_H ++/* ++ * header file for FreeS/WAN library functions ++ * Copyright (C) 1998, 1999, 2000 Henry Spencer. ++ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: openswan.h,v 1.93 2005-04-14 20:21:51 mcr Exp $ ++ */ ++#define _OPENSWAN_H /* seen it, no need to see it again */ ++ ++/* you'd think this should be builtin to compiler... */ ++#ifndef TRUE ++#define TRUE 1 ++#endif ++ ++#ifndef FALSE ++#define FALSE 0 ++#endif ++ ++ ++ ++/* ++ * We've just got to have some datatypes defined... And annoyingly, just ++ * where we get them depends on whether we're in userland or not. ++ */ ++/* things that need to come from one place or the other, depending */ ++#ifdef __KERNEL__ ++#include ++#include ++#include ++#include ++#include ++#define user_assert(foo) /*nothing*/ ++#else ++#include ++#include ++#include ++#include ++#include ++#define user_assert(foo) assert(foo) ++#include ++ ++# define uint8_t u_int8_t ++# define uint16_t u_int16_t ++# define uint32_t u_int32_t ++# define uint64_t u_int64_t ++ ++ ++# define DEBUG_NO_STATIC static ++ ++#endif ++ ++#include ++ ++ ++/* ++ * Grab the kernel version to see if we have NET_21, and therefore ++ * IPv6. Some of this is repeated from ipsec_kversions.h. Of course, ++ * we aren't really testing if the kernel has IPv6, but rather if the ++ * the include files do. ++ */ ++#include ++#ifndef KERNEL_VERSION ++#define KERNEL_VERSION(x,y,z) (((x)<<16)+((y)<<8)+(z)) ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0) ++#define NET_21 ++#endif ++ ++#ifndef IPPROTO_COMP ++# define IPPROTO_COMP 108 ++#endif /* !IPPROTO_COMP */ ++ ++#ifndef IPPROTO_INT ++# define IPPROTO_INT 61 ++#endif /* !IPPROTO_INT */ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++#ifndef DEBUG_NO_STATIC ++# define DEBUG_NO_STATIC ++#endif ++#else /* CONFIG_KLIPS_DEBUG */ ++#ifndef DEBUG_NO_STATIC ++# define DEBUG_NO_STATIC static ++#endif ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++#if !defined(ESPINUDP_WITH_NON_IKE) ++#define ESPINUDP_WITH_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ ++#define ESPINUDP_WITH_NON_ESP 2 /* draft-ietf-ipsec-nat-t-ike-02 */ ++#endif ++ ++/* ++ * Basic data types for the address-handling functions. ++ * ip_address and ip_subnet are supposed to be opaque types; do not ++ * use their definitions directly, they are subject to change! ++ */ ++ ++/* first, some quick fakes in case we're on an old system with no IPv6 */ ++#ifndef s6_addr16 ++struct in6_addr { ++ union ++ { ++ __u8 u6_addr8[16]; ++ __u16 u6_addr16[8]; ++ __u32 u6_addr32[4]; ++ } in6_u; ++#define s6_addr in6_u.u6_addr8 ++#define s6_addr16 in6_u.u6_addr16 ++#define s6_addr32 in6_u.u6_addr32 ++}; ++struct sockaddr_in6 { ++ unsigned short int sin6_family; /* AF_INET6 */ ++ __u16 sin6_port; /* Transport layer port # */ ++ __u32 sin6_flowinfo; /* IPv6 flow information */ ++ struct in6_addr sin6_addr; /* IPv6 address */ ++ __u32 sin6_scope_id; /* scope id (new in RFC2553) */ ++}; ++#endif /* !s6_addr16 */ ++ ++/* then the main types */ ++typedef struct { ++ union { ++ struct sockaddr_in v4; ++ struct sockaddr_in6 v6; ++ } u; ++} ip_address; ++typedef struct { ++ ip_address addr; ++ int maskbits; ++} ip_subnet; ++ ++/* and the SA ID stuff */ ++#ifdef __KERNEL__ ++typedef __u32 ipsec_spi_t; ++#else ++typedef u_int32_t ipsec_spi_t; ++#endif ++typedef struct { /* to identify an SA, we need: */ ++ ip_address dst; /* A. destination host */ ++ ipsec_spi_t spi; /* B. 32-bit SPI, assigned by dest. host */ ++# define SPI_PASS 256 /* magic values... */ ++# define SPI_DROP 257 /* ...for use... */ ++# define SPI_REJECT 258 /* ...with SA_INT */ ++# define SPI_HOLD 259 ++# define SPI_TRAP 260 ++# define SPI_TRAPSUBNET 261 ++ int proto; /* C. protocol */ ++# define SA_ESP 50 /* IPPROTO_ESP */ ++# define SA_AH 51 /* IPPROTO_AH */ ++# define SA_IPIP 4 /* IPPROTO_IPIP */ ++# define SA_COMP 108 /* IPPROTO_COMP */ ++# define SA_INT 61 /* IANA reserved for internal use */ ++} ip_said; ++ ++/* misc */ ++typedef const char *err_t; /* error message, or NULL for success */ ++struct prng { /* pseudo-random-number-generator guts */ ++ unsigned char sbox[256]; ++ int i, j; ++ unsigned long count; ++}; ++ ++ ++/* ++ * definitions for user space, taken from freeswan/ipsec_sa.h ++ */ ++typedef uint32_t IPsecSAref_t; ++ ++#define IPSEC_SA_REF_FIELD_WIDTH (8 * sizeof(IPsecSAref_t)) ++ ++#define IPsecSAref2NFmark(x) ((x) << (IPSEC_SA_REF_FIELD_WIDTH - IPSEC_SA_REF_TABLE_IDX_WIDTH)) ++#define NFmark2IPsecSAref(x) ((x) >> (IPSEC_SA_REF_FIELD_WIDTH - IPSEC_SA_REF_TABLE_IDX_WIDTH)) ++ ++#define IPSEC_SAREF_NULL (~((IPsecSAref_t)0)) ++ ++/* GCC magic for use in function definitions! */ ++#ifdef GCC_LINT ++# define PRINTF_LIKE(n) __attribute__ ((format(printf, n, n+1))) ++# define NEVER_RETURNS __attribute__ ((noreturn)) ++# define UNUSED __attribute__ ((unused)) ++# define BLANK_FORMAT " " /* GCC_LINT whines about empty formats */ ++#else ++# define PRINTF_LIKE(n) /* ignore */ ++# define NEVER_RETURNS /* ignore */ ++# define UNUSED /* ignore */ ++# define BLANK_FORMAT "" ++#endif ++ ++ ++ ++ ++ ++/* ++ * new IPv6-compatible functions ++ */ ++ ++/* text conversions */ ++err_t ttoul(const char *src, size_t srclen, int format, unsigned long *dst); ++size_t ultot(unsigned long src, int format, char *buf, size_t buflen); ++#define ULTOT_BUF (22+1) /* holds 64 bits in octal */ ++err_t ttoaddr(const char *src, size_t srclen, int af, ip_address *dst); ++err_t tnatoaddr(const char *src, size_t srclen, int af, ip_address *dst); ++size_t addrtot(const ip_address *src, int format, char *buf, size_t buflen); ++/* RFC 1886 old IPv6 reverse-lookup format is the bulkiest */ ++#define ADDRTOT_BUF (32*2 + 3 + 1 + 3 + 1 + 1) ++err_t ttosubnet(const char *src, size_t srclen, int af, ip_subnet *dst); ++size_t subnettot(const ip_subnet *src, int format, char *buf, size_t buflen); ++#define SUBNETTOT_BUF (ADDRTOT_BUF + 1 + 3) ++size_t subnetporttot(const ip_subnet *src, int format, char *buf, size_t buflen); ++#define SUBNETPROTOTOT_BUF (SUBNETTOTO_BUF + ULTOT_BUF) ++err_t ttosa(const char *src, size_t srclen, ip_said *dst); ++size_t satot(const ip_said *src, int format, char *bufptr, size_t buflen); ++#define SATOT_BUF (5 + ULTOA_BUF + 1 + ADDRTOT_BUF) ++err_t ttodata(const char *src, size_t srclen, int base, char *buf, ++ size_t buflen, size_t *needed); ++err_t ttodatav(const char *src, size_t srclen, int base, ++ char *buf, size_t buflen, size_t *needed, ++ char *errp, size_t errlen, unsigned int flags); ++#define TTODATAV_BUF 40 /* ttodatav's largest non-literal message */ ++#define TTODATAV_IGNORESPACE (1<<1) /* ignore spaces in base64 encodings*/ ++#define TTODATAV_SPACECOUNTS 0 /* do not ignore spaces in base64 */ ++ ++size_t datatot(const char *src, size_t srclen, int format, char *buf, ++ size_t buflen); ++size_t keyblobtoid(const unsigned char *src, size_t srclen, char *dst, ++ size_t dstlen); ++size_t splitkeytoid(const unsigned char *e, size_t elen, const unsigned char *m, ++ size_t mlen, char *dst, size_t dstlen); ++#define KEYID_BUF 10 /* up to 9 text digits plus NUL */ ++err_t ttoprotoport(char *src, size_t src_len, u_int8_t *proto, u_int16_t *port, ++ int *has_port_wildcard); ++ ++/* initializations */ ++void initsaid(const ip_address *addr, ipsec_spi_t spi, int proto, ip_said *dst); ++err_t loopbackaddr(int af, ip_address *dst); ++err_t unspecaddr(int af, ip_address *dst); ++err_t anyaddr(int af, ip_address *dst); ++err_t initaddr(const unsigned char *src, size_t srclen, int af, ip_address *dst); ++err_t initsubnet(const ip_address *addr, int maskbits, int clash, ip_subnet *dst); ++err_t addrtosubnet(const ip_address *addr, ip_subnet *dst); ++ ++/* misc. conversions and related */ ++err_t rangetosubnet(const ip_address *from, const ip_address *to, ip_subnet *dst); ++int addrtypeof(const ip_address *src); ++int subnettypeof(const ip_subnet *src); ++size_t addrlenof(const ip_address *src); ++size_t addrbytesptr(const ip_address *src, const unsigned char **dst); ++size_t addrbytesof(const ip_address *src, unsigned char *dst, size_t dstlen); ++int masktocount(const ip_address *src); ++void networkof(const ip_subnet *src, ip_address *dst); ++void maskof(const ip_subnet *src, ip_address *dst); ++ ++/* tests */ ++int sameaddr(const ip_address *a, const ip_address *b); ++int addrcmp(const ip_address *a, const ip_address *b); ++int samesubnet(const ip_subnet *a, const ip_subnet *b); ++int addrinsubnet(const ip_address *a, const ip_subnet *s); ++int subnetinsubnet(const ip_subnet *a, const ip_subnet *b); ++int subnetishost(const ip_subnet *s); ++int samesaid(const ip_said *a, const ip_said *b); ++int sameaddrtype(const ip_address *a, const ip_address *b); ++int samesubnettype(const ip_subnet *a, const ip_subnet *b); ++int isanyaddr(const ip_address *src); ++int isunspecaddr(const ip_address *src); ++int isloopbackaddr(const ip_address *src); ++ ++/* low-level grot */ ++int portof(const ip_address *src); ++void setportof(int port, ip_address *dst); ++struct sockaddr *sockaddrof(ip_address *src); ++size_t sockaddrlenof(const ip_address *src); ++ ++/* PRNG */ ++void prng_init(struct prng *prng, const unsigned char *key, size_t keylen); ++void prng_bytes(struct prng *prng, unsigned char *dst, size_t dstlen); ++unsigned long prng_count(struct prng *prng); ++void prng_final(struct prng *prng); ++ ++/* odds and ends */ ++const char *ipsec_version_code(void); ++const char *ipsec_version_string(void); ++const char **ipsec_copyright_notice(void); ++ ++const char *dns_string_rr(int rr, char *buf, int bufsize); ++const char *dns_string_datetime(time_t seconds, ++ char *buf, ++ int bufsize); ++ ++ ++/* ++ * old functions, to be deleted eventually ++ */ ++ ++/* unsigned long */ ++const char * /* NULL for success, else string literal */ ++atoul( ++ const char *src, ++ size_t srclen, /* 0 means strlen(src) */ ++ int base, /* 0 means figure it out */ ++ unsigned long *resultp ++); ++size_t /* space needed for full conversion */ ++ultoa( ++ unsigned long n, ++ int base, ++ char *dst, ++ size_t dstlen ++); ++#define ULTOA_BUF 21 /* just large enough for largest result, */ ++ /* assuming 64-bit unsigned long! */ ++ ++/* Internet addresses */ ++const char * /* NULL for success, else string literal */ ++atoaddr( ++ const char *src, ++ size_t srclen, /* 0 means strlen(src) */ ++ struct in_addr *addr ++); ++size_t /* space needed for full conversion */ ++addrtoa( ++ struct in_addr addr, ++ int format, /* character; 0 means default */ ++ char *dst, ++ size_t dstlen ++); ++#define ADDRTOA_BUF 16 /* just large enough for largest result */ ++ ++/* subnets */ ++const char * /* NULL for success, else string literal */ ++atosubnet( ++ const char *src, ++ size_t srclen, /* 0 means strlen(src) */ ++ struct in_addr *addr, ++ struct in_addr *mask ++); ++size_t /* space needed for full conversion */ ++subnettoa( ++ struct in_addr addr, ++ struct in_addr mask, ++ int format, /* character; 0 means default */ ++ char *dst, ++ size_t dstlen ++); ++#define SUBNETTOA_BUF 32 /* large enough for worst case result */ ++ ++/* ranges */ ++const char * /* NULL for success, else string literal */ ++atoasr( ++ const char *src, ++ size_t srclen, /* 0 means strlen(src) */ ++ char *type, /* 'a', 's', 'r' */ ++ struct in_addr *addrs /* two-element array */ ++); ++size_t /* space needed for full conversion */ ++rangetoa( ++ struct in_addr *addrs, /* two-element array */ ++ int format, /* character; 0 means default */ ++ char *dst, ++ size_t dstlen ++); ++#define RANGETOA_BUF 34 /* large enough for worst case result */ ++ ++/* data types for SA conversion functions */ ++ ++/* generic data, e.g. keys */ ++const char * /* NULL for success, else string literal */ ++atobytes( ++ const char *src, ++ size_t srclen, /* 0 means strlen(src) */ ++ char *dst, ++ size_t dstlen, ++ size_t *lenp /* NULL means don't bother telling me */ ++); ++size_t /* 0 failure, else true size */ ++bytestoa( ++ const char *src, ++ size_t srclen, ++ int format, /* character; 0 means default */ ++ char *dst, ++ size_t dstlen ++); ++ ++/* old versions of generic-data functions; deprecated */ ++size_t /* 0 failure, else true size */ ++atodata( ++ const char *src, ++ size_t srclen, /* 0 means strlen(src) */ ++ char *dst, ++ size_t dstlen ++); ++size_t /* 0 failure, else true size */ ++datatoa( ++ const char *src, ++ size_t srclen, ++ int format, /* character; 0 means default */ ++ char *dst, ++ size_t dstlen ++); ++ ++/* part extraction and special addresses */ ++struct in_addr ++subnetof( ++ struct in_addr addr, ++ struct in_addr mask ++); ++struct in_addr ++hostof( ++ struct in_addr addr, ++ struct in_addr mask ++); ++struct in_addr ++broadcastof( ++ struct in_addr addr, ++ struct in_addr mask ++); ++ ++/* mask handling */ ++int ++goodmask( ++ struct in_addr mask ++); ++int ++masktobits( ++ struct in_addr mask ++); ++struct in_addr ++bitstomask( ++ int n ++); ++ ++ ++ ++/* ++ * general utilities ++ */ ++ ++#ifndef __KERNEL__ ++/* option pickup from files (userland only because of use of FILE) */ ++const char *optionsfrom(const char *filename, int *argcp, char ***argvp, ++ int optind, FILE *errorreport); ++ ++/* sanitize a string */ ++extern size_t sanitize_string(char *buf, size_t size); ++ ++#endif ++ ++ ++/* ++ * ENUM of klips debugging values. Not currently used in klips. ++ * debug flag is actually 32 -bits, but only one bit is ever used, ++ * so we can actually pack it all into a single 32-bit word. ++ */ ++enum klips_debug_flags { ++ KDF_VERBOSE = 0, ++ KDF_XMIT = 1, ++ KDF_NETLINK = 2, /* obsolete */ ++ KDF_XFORM = 3, ++ KDF_EROUTE = 4, ++ KDF_SPI = 5, ++ KDF_RADIJ = 6, ++ KDF_ESP = 7, ++ KDF_AH = 8, /* obsolete */ ++ KDF_RCV = 9, ++ KDF_TUNNEL = 10, ++ KDF_PFKEY = 11, ++ KDF_COMP = 12 ++}; ++ ++ ++/* ++ * Debugging levels for pfkey_lib_debug ++ */ ++#define PF_KEY_DEBUG_PARSE_NONE 0 ++#define PF_KEY_DEBUG_PARSE_PROBLEM 1 ++#define PF_KEY_DEBUG_PARSE_STRUCT 2 ++#define PF_KEY_DEBUG_PARSE_FLOW 4 ++#define PF_KEY_DEBUG_BUILD 8 ++#define PF_KEY_DEBUG_PARSE_MAX 15 ++ ++extern unsigned int pfkey_lib_debug; /* bits selecting what to report */ ++ ++/* ++ * pluto and lwdnsq need to know the maximum size of the commands to, ++ * and replies from lwdnsq. ++ */ ++ ++#define LWDNSQ_CMDBUF_LEN 1024 ++#define LWDNSQ_RESULT_LEN_MAX 4096 ++ ++ ++/* syntax for passthrough SA */ ++#ifndef PASSTHROUGHNAME ++#define PASSTHROUGHNAME "%passthrough" ++#define PASSTHROUGH4NAME "%passthrough4" ++#define PASSTHROUGH6NAME "%passthrough6" ++#define PASSTHROUGHIS "tun0@0.0.0.0" ++#define PASSTHROUGH4IS "tun0@0.0.0.0" ++#define PASSTHROUGH6IS "tun0@::" ++#define PASSTHROUGHTYPE "tun" ++#define PASSTHROUGHSPI 0 ++#define PASSTHROUGHDST 0 ++#endif ++ ++ ++ ++#endif /* _OPENSWAN_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipcomp.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,61 @@ ++/* ++ * IPCOMP zlib interface code. ++ * Copyright (C) 2000 Svenning Soerensen ++ * Copyright (C) 2000, 2001 Richard Guy Briggs ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ ++ RCSID $Id: ipcomp.h,v 1.14 2004-07-10 19:08:41 mcr Exp $ ++ ++ */ ++ ++/* SSS */ ++ ++#ifndef _IPCOMP_H ++#define _IPCOMP_H ++ ++/* Prefix all global deflate symbols with "ipcomp_" to avoid collisions with ppp_deflate & ext2comp */ ++#ifndef IPCOMP_PREFIX ++#define IPCOMP_PREFIX ++#endif /* IPCOMP_PREFIX */ ++ ++#ifndef IPPROTO_COMP ++#define IPPROTO_COMP 108 ++#endif /* IPPROTO_COMP */ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int sysctl_ipsec_debug_ipcomp; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++struct ipcomphdr { /* IPCOMP header */ ++ __u8 ipcomp_nh; /* Next header (protocol) */ ++ __u8 ipcomp_flags; /* Reserved, must be 0 */ ++ __u16 ipcomp_cpi; /* Compression Parameter Index */ ++}; ++ ++extern struct inet_protocol comp_protocol; ++extern int sysctl_ipsec_debug_ipcomp; ++ ++#define IPCOMP_UNCOMPRESSABLE 0x000000001 ++#define IPCOMP_COMPRESSIONERROR 0x000000002 ++#define IPCOMP_PARMERROR 0x000000004 ++#define IPCOMP_DECOMPRESSIONERROR 0x000000008 ++ ++#define IPCOMP_ADAPT_INITIAL_TRIES 8 ++#define IPCOMP_ADAPT_INITIAL_SKIP 4 ++#define IPCOMP_ADAPT_SUBSEQ_TRIES 2 ++#define IPCOMP_ADAPT_SUBSEQ_SKIP 8 ++ ++/* Function prototypes */ ++struct sk_buff *skb_compress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags); ++struct sk_buff *skb_decompress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags); ++ ++#endif /* _IPCOMP_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_ah.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,200 @@ ++/* ++ * Authentication Header declarations ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_ah.h,v 1.26 2004-09-13 02:22:10 mcr Exp $ ++ */ ++ ++#include "ipsec_md5h.h" ++#include "ipsec_sha1.h" ++ ++#ifndef IPPROTO_AH ++#define IPPROTO_AH 51 ++#endif /* IPPROTO_AH */ ++ ++#include "ipsec_auth.h" ++ ++#ifdef __KERNEL__ ++ ++extern struct inet_protocol ah_protocol; ++ ++struct options; ++ ++struct ahhdr /* Generic AH header */ ++{ ++ __u8 ah_nh; /* Next header (protocol) */ ++ __u8 ah_hl; /* AH length, in 32-bit words */ ++ __u16 ah_rv; /* reserved, must be 0 */ ++ __u32 ah_spi; /* Security Parameters Index */ ++ __u32 ah_rpl; /* Replay prevention */ ++ __u8 ah_data[AHHMAC_HASHLEN];/* Authentication hash */ ++}; ++#define AH_BASIC_LEN 8 /* basic AH header is 8 bytes, nh,hl,rv,spi ++ * and the ah_hl, says how many bytes after that ++ * to cover. */ ++ ++extern struct xform_functions ah_xform_funcs[]; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int debug_ah; ++#endif /* CONFIG_KLIPS_DEBUG */ ++#endif /* __KERNEL__ */ ++ ++/* ++ * $Log: ipsec_ah.h,v $ ++ * Revision 1.26 2004-09-13 02:22:10 mcr ++ * #define inet_protocol if necessary. ++ * ++ * Revision 1.25 2004/09/06 18:35:41 mcr ++ * 2.6.8.1 gets rid of inet_protocol->net_protocol compatibility, ++ * so adjust for that. ++ * ++ * Revision 1.24 2004/07/10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.23 2004/04/05 19:55:04 mcr ++ * Moved from linux/include/freeswan/ipsec_ah.h,v ++ * ++ * Revision 1.22 2004/04/05 19:41:05 mcr ++ * merged alg-branch code. ++ * ++ * Revision 1.21 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.22 2003/12/11 20:14:58 mcr ++ * refactored the xmit code, to move all encapsulation ++ * code into protocol functions. Note that all functions ++ * are essentially done by a single function, which is probably ++ * wrong. ++ * the rcv_functions structures are renamed xform_functions. ++ * ++ * Revision 1.21 2003/12/06 21:21:19 mcr ++ * split up receive path into per-transform files, for ++ * easier later removal. ++ * ++ * Revision 1.20.8.1 2003/12/22 15:25:52 jjo ++ * Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.20 2003/02/06 02:21:34 rgb ++ * ++ * Moved "struct auth_alg" from ipsec_rcv.c to ipsec_ah.h . ++ * Changed "struct ah" to "struct ahhdr" and "struct esp" to "struct esphdr". ++ * Removed "#ifdef INBOUND_POLICY_CHECK_eroute" dead code. ++ * ++ * Revision 1.19 2002/09/16 21:19:13 mcr ++ * fixes for west-ah-icmp-01 - length of AH header must be ++ * calculated properly, and next_header field properly copied. ++ * ++ * Revision 1.18 2002/05/14 02:37:02 rgb ++ * Change reference from _TDB to _IPSA. ++ * ++ * Revision 1.17 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_ah.h,v ++ * ++ * Revision 1.16 2002/02/20 01:27:06 rgb ++ * Ditched a pile of structs only used by the old Netlink interface. ++ * ++ * Revision 1.15 2001/12/11 02:35:57 rgb ++ * Change "struct net_device" to "struct device" for 2.2 compatibility. ++ * ++ * Revision 1.14 2001/11/26 09:23:47 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.13.2.1 2001/09/25 02:18:24 mcr ++ * replace "struct device" with "struct netdevice" ++ * ++ * Revision 1.13 2001/06/14 19:35:08 rgb ++ * Update copyright date. ++ * ++ * Revision 1.12 2000/09/12 03:21:20 rgb ++ * Cleared out unused htonq. ++ * ++ * Revision 1.11 2000/09/08 19:12:55 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.10 2000/01/21 06:13:10 rgb ++ * Tidied up spacing. ++ * Added macros for HMAC padding magic numbers.(kravietz) ++ * ++ * Revision 1.9 1999/12/07 18:16:23 rgb ++ * Fixed comments at end of #endif lines. ++ * ++ * Revision 1.8 1999/04/11 00:28:56 henry ++ * GPL boilerplate ++ * ++ * Revision 1.7 1999/04/06 04:54:25 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.6 1999/01/26 02:06:01 rgb ++ * Removed CONFIG_IPSEC_ALGO_SWITCH macro. ++ * ++ * Revision 1.5 1999/01/22 06:17:49 rgb ++ * Updated macro comments. ++ * Added context types to support algorithm switch code. ++ * 64-bit clean-up -- converting 'u long long' to __u64. ++ * ++ * Revision 1.4 1998/07/14 15:54:56 rgb ++ * Add #ifdef __KERNEL__ to protect kernel-only structures. ++ * ++ * Revision 1.3 1998/06/30 18:05:16 rgb ++ * Comment out references to htonq. ++ * ++ * Revision 1.2 1998/06/25 19:33:46 rgb ++ * Add prototype for protocol receive function. ++ * Rearrange for more logical layout. ++ * ++ * Revision 1.1 1998/06/18 21:27:43 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.4 1998/05/18 22:28:43 rgb ++ * Disable key printing facilities from /proc/net/ipsec_*. ++ * ++ * Revision 1.3 1998/04/21 21:29:07 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.2 1998/04/12 22:03:17 rgb ++ * Updated ESP-3DES-HMAC-MD5-96, ++ * ESP-DES-HMAC-MD5-96, ++ * AH-HMAC-MD5-96, ++ * AH-HMAC-SHA1-96 since Henry started freeswan cvs repository ++ * from old standards (RFC182[5-9] to new (as of March 1998) drafts. ++ * ++ * Fixed eroute references in /proc/net/ipsec*. ++ * ++ * Started to patch module unloading memory leaks in ipsec_netlink and ++ * radij tree unloading. ++ * ++ * Revision 1.1 1998/04/09 03:05:55 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:02 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * Added definitions for new AH transforms. ++ * ++ * Revision 0.3 1996/11/20 14:35:48 ji ++ * Minor Cleanup. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_alg.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,248 @@ ++/* ++ * Modular extensions service and registration functions interface ++ * ++ * Author: JuanJo Ciarlante ++ * ++ * ipsec_alg.h,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp ++ * ++ */ ++/* ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ */ ++#ifndef IPSEC_ALG_H ++#define IPSEC_ALG_H ++ ++/* ++ * gcc >= 3.2 has removed __FUNCTION__, replaced by C99 __func__ ++ * *BUT* its a compiler variable. ++ */ ++#if (__GNUC__ >= 3) ++#ifndef __FUNCTION__ ++#define __FUNCTION__ __func__ ++#endif ++#endif ++ ++/* Version 0.8.1-0 */ ++#define IPSEC_ALG_VERSION 0x00080100 ++ ++#include ++#include ++#include ++#include ++ ++/* ++ * The following structs are used via pointers in ipsec_alg object to ++ * avoid ipsec_alg.h coupling with freeswan headers, thus simplifying ++ * module development ++ */ ++struct ipsec_sa; ++struct esp; ++ ++/************************************** ++ * ++ * Main registration object ++ * ++ *************************************/ ++#define IPSEC_ALG_VERSION_QUAD(v) \ ++ (v>>24),((v>>16)&0xff),((v>>8)&0xff),(v&0xff) ++/* ++ * Main ipsec_alg objects: "OOPrograming wannabe" ++ * Hierachy (carefully handled with _minimal_ cast'ing): ++ * ++ * ipsec_alg+ ++ * +->ipsec_alg_enc (ixt_alg_type=SADB_EXT_SUPPORTED_ENCRYPT) ++ * +->ipsec_alg_auth (ixt_alg_type=SADB_EXT_SUPPORTED_AUTH) ++ */ ++ ++/*************************************************************** ++ * ++ * INTERFACE object: struct ipsec_alg ++ * ++ ***************************************************************/ ++ ++#define ixt_alg_type ixt_support.ias_exttype ++#define ixt_alg_id ixt_support.ias_id ++ ++#define IPSEC_ALG_ST_SUPP 0x01 ++#define IPSEC_ALG_ST_REGISTERED 0x02 ++#define IPSEC_ALG_ST_EXCL 0x04 ++struct ipsec_alg { ++ unsigned ixt_version; /* only allow this version (or 'near')*/ \ ++ struct list_head ixt_list; /* dlinked list */ \ ++ struct module *ixt_module; /* THIS_MODULE */ \ ++ unsigned ixt_state; /* state flags */ \ ++ atomic_t ixt_refcnt; /* ref. count when pointed from ipsec_sa */ \ ++ char ixt_name[16]; /* descriptive short name, eg. "3des" */ \ ++ void *ixt_data; /* private for algo implementation */ \ ++ uint8_t ixt_blocksize; /* blocksize in bytes */ \ ++ ++ struct ipsec_alg_supported ixt_support; ++}; ++/* ++ * Note the const in cbc_encrypt IV arg: ++ * some ciphers like to toast passed IV (eg. 3DES): make a local IV copy ++ */ ++struct ipsec_alg_enc { ++ struct ipsec_alg ixt_common; ++ unsigned ixt_e_keylen; /* raw key length in bytes */ ++ unsigned ixt_e_ctx_size; /* sa_p->key_e_size */ ++ int (*ixt_e_set_key)(struct ipsec_alg_enc *alg, __u8 *key_e, const __u8 *key, size_t keysize); ++ __u8 *(*ixt_e_new_key)(struct ipsec_alg_enc *alg, const __u8 *key, size_t keysize); ++ void (*ixt_e_destroy_key)(struct ipsec_alg_enc *alg, __u8 *key_e); ++ int (*ixt_e_cbc_encrypt)(struct ipsec_alg_enc *alg, __u8 *key_e, __u8 *in, int ilen, const __u8 *iv, int encrypt); ++}; ++struct ipsec_alg_auth { ++ struct ipsec_alg ixt_common; ++ unsigned ixt_a_keylen; /* raw key length in bytes */ ++ unsigned ixt_a_ctx_size; /* sa_p->key_a_size */ ++ unsigned ixt_a_authlen; /* 'natural' auth. hash len (bytes) */ ++ int (*ixt_a_hmac_set_key)(struct ipsec_alg_auth *alg, __u8 *key_a, const __u8 *key, int keylen); ++ int (*ixt_a_hmac_hash)(struct ipsec_alg_auth *alg, __u8 *key_a, const __u8 *dat, int len, __u8 *hash, int hashlen); ++}; ++/* ++ * These are _copies_ of SADB_EXT_SUPPORTED_{AUTH,ENCRYPT}, ++ * to avoid header coupling for true constants ++ * about headers ... "cp is your friend" --Linus ++ */ ++#define IPSEC_ALG_TYPE_AUTH 14 ++#define IPSEC_ALG_TYPE_ENCRYPT 15 ++ ++/*************************************************************** ++ * ++ * INTERFACE for module loading,testing, and unloading ++ * ++ ***************************************************************/ ++/* - registration calls */ ++int register_ipsec_alg(struct ipsec_alg *); ++int unregister_ipsec_alg(struct ipsec_alg *); ++/* - optional (simple test) for algos */ ++int ipsec_alg_test(unsigned alg_type, unsigned alg_id, int testparm); ++/* inline wrappers (usefull for type validation */ ++static inline int register_ipsec_alg_enc(struct ipsec_alg_enc *ixt) { ++ return register_ipsec_alg((struct ipsec_alg*)ixt); ++} ++static inline int unregister_ipsec_alg_enc(struct ipsec_alg_enc *ixt) { ++ return unregister_ipsec_alg((struct ipsec_alg*)ixt); ++} ++static inline int register_ipsec_alg_auth(struct ipsec_alg_auth *ixt) { ++ return register_ipsec_alg((struct ipsec_alg*)ixt); ++} ++static inline int unregister_ipsec_alg_auth(struct ipsec_alg_auth *ixt) { ++ return unregister_ipsec_alg((struct ipsec_alg*)ixt); ++} ++ ++/***************************************************************** ++ * ++ * INTERFACE for ENC services: key creation, encrypt function ++ * ++ *****************************************************************/ ++ ++#define IPSEC_ALG_ENCRYPT 1 ++#define IPSEC_ALG_DECRYPT 0 ++ ++/* encryption key context creation function */ ++int ipsec_alg_enc_key_create(struct ipsec_sa *sa_p); ++/* ++ * ipsec_alg_esp_encrypt(): encrypt ilen bytes in idat returns ++ * 0 or ERR<0 ++ */ ++int ipsec_alg_esp_encrypt(struct ipsec_sa *sa_p, __u8 *idat, int ilen, const __u8 *iv, int action); ++ ++/*************************************************************** ++ * ++ * INTERFACE for AUTH services: key creation, hash functions ++ * ++ ***************************************************************/ ++int ipsec_alg_auth_key_create(struct ipsec_sa *sa_p); ++int ipsec_alg_sa_esp_hash(const struct ipsec_sa *sa_p, const __u8 *espp, int len, __u8 *hash, int hashlen) ; ++#define ipsec_alg_sa_esp_update(c,k,l) ipsec_alg_sa_esp_hash(c,k,l,NULL,0) ++ ++/* only called from ipsec_init.c */ ++int ipsec_alg_init(void); ++ ++/* algo module glue for static algos */ ++void ipsec_alg_static_init(void); ++typedef int (*ipsec_alg_init_func_t) (void); ++ ++/********************************************** ++ * ++ * INTERFACE for ipsec_sa init and wipe ++ * ++ **********************************************/ ++ ++/* returns true if ipsec_sa has ipsec_alg obj attached */ ++/* ++ * Initializes ipsec_sa's ipsec_alg object, using already loaded ++ * proto, authalg, encalg.; links ipsec_alg objects (enc, auth) ++ */ ++int ipsec_alg_sa_init(struct ipsec_sa *sa_p); ++/* ++ * Destroys ipsec_sa's ipsec_alg object ++ * unlinking ipsec_alg objects ++ */ ++int ipsec_alg_sa_wipe(struct ipsec_sa *sa_p); ++ ++#define IPSEC_ALG_MODULE_INIT_MOD( func_name ) \ ++ static int func_name(void); \ ++ module_init(func_name); \ ++ static int __init func_name(void) ++#define IPSEC_ALG_MODULE_EXIT_MOD( func_name ) \ ++ static void func_name(void); \ ++ module_exit(func_name); \ ++ static void __exit func_name(void) ++ ++#define IPSEC_ALG_MODULE_INIT_STATIC( func_name ) \ ++ extern int func_name(void); \ ++ int func_name(void) ++#define IPSEC_ALG_MODULE_EXIT_STATIC( func_name ) \ ++ extern void func_name(void); \ ++ void func_name(void) ++ ++/********************************************** ++ * ++ * 2.2 backport for some 2.4 useful module stuff ++ * ++ **********************************************/ ++#ifdef MODULE ++#ifndef THIS_MODULE ++#define THIS_MODULE (&__this_module) ++#endif ++#ifndef module_init ++typedef int (*__init_module_func_t)(void); ++typedef void (*__cleanup_module_func_t)(void); ++ ++#define module_init(x) \ ++ int init_module(void) __attribute__((alias(#x))); \ ++ static inline __init_module_func_t __init_module_inline(void) \ ++ { return x; } ++#define module_exit(x) \ ++ void cleanup_module(void) __attribute__((alias(#x))); \ ++ static inline __cleanup_module_func_t __cleanup_module_inline(void) \ ++ { return x; } ++#endif ++#define IPSEC_ALG_MODULE_INIT( func_name ) IPSEC_ALG_MODULE_INIT_MOD( func_name ) ++#define IPSEC_ALG_MODULE_EXIT( func_name ) IPSEC_ALG_MODULE_EXIT_MOD( func_name ) ++ ++#else /* not MODULE */ ++#ifndef THIS_MODULE ++#define THIS_MODULE NULL ++#endif ++/* ++ * I only want module_init() magic ++ * when algo.c file *is THE MODULE*, in all other ++ * cases, initialization is called explicitely from ipsec_alg_init() ++ */ ++#define IPSEC_ALG_MODULE_INIT( func_name ) IPSEC_ALG_MODULE_INIT_STATIC(func_name) ++#define IPSEC_ALG_MODULE_EXIT( func_name ) IPSEC_ALG_MODULE_EXIT_STATIC(func_name) ++#endif ++ ++#endif /* IPSEC_ALG_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_alg_3des.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,12 @@ ++struct TripleDES_context { ++ des_key_schedule s1; ++ des_key_schedule s2; ++ des_key_schedule s3; ++}; ++typedef struct TripleDES_context TripleDES_context; ++ ++#define ESP_3DES_KEY_SZ 3*(sizeof(des_cblock)) ++#define ESP_3DES_CBC_BLK_LEN 8 ++ ++ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_auth.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,100 @@ ++/* ++ * Authentication Header declarations ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_auth.h,v 1.3 2004-04-06 02:49:08 mcr Exp $ ++ */ ++ ++#include "ipsec_md5h.h" ++#include "ipsec_sha1.h" ++ ++#ifndef IPSEC_AUTH_H ++#define IPSEC_AUTH_H ++ ++#define AH_FLENGTH 12 /* size of fixed part */ ++#define AHMD5_KMAX 64 /* MD5 max 512 bits key */ ++#define AHMD5_AMAX 12 /* MD5 96 bits of authenticator */ ++ ++#define AHMD596_KLEN 16 /* MD5 128 bits key */ ++#define AHSHA196_KLEN 20 /* SHA1 160 bits key */ ++ ++#define AHMD596_ALEN 16 /* MD5 128 bits authentication length */ ++#define AHSHA196_ALEN 20 /* SHA1 160 bits authentication length */ ++ ++#define AHMD596_BLKLEN 64 /* MD5 block length */ ++#define AHSHA196_BLKLEN 64 /* SHA1 block length */ ++#define AHSHA2_256_BLKLEN 64 /* SHA2-256 block length */ ++#define AHSHA2_384_BLKLEN 128 /* SHA2-384 block length (?) */ ++#define AHSHA2_512_BLKLEN 128 /* SHA2-512 block length */ ++ ++#define AH_BLKLEN_MAX 128 /* keep up to date! */ ++ ++ ++#define AH_AMAX AHSHA196_ALEN /* keep up to date! */ ++#define AHHMAC_HASHLEN 12 /* authenticator length of 96bits */ ++#define AHHMAC_RPLLEN 4 /* 32 bit replay counter */ ++ ++#define DB_AH_PKTRX 0x0001 ++#define DB_AH_PKTRX2 0x0002 ++#define DB_AH_DMP 0x0004 ++#define DB_AH_IPSA 0x0010 ++#define DB_AH_XF 0x0020 ++#define DB_AH_INAU 0x0040 ++#define DB_AH_REPLAY 0x0100 ++ ++#ifdef __KERNEL__ ++ ++/* General HMAC algorithm is described in RFC 2104 */ ++ ++#define HMAC_IPAD 0x36 ++#define HMAC_OPAD 0x5C ++ ++struct md5_ctx { ++ MD5_CTX ictx; /* context after H(K XOR ipad) */ ++ MD5_CTX octx; /* context after H(K XOR opad) */ ++}; ++ ++struct sha1_ctx { ++ SHA1_CTX ictx; /* context after H(K XOR ipad) */ ++ SHA1_CTX octx; /* context after H(K XOR opad) */ ++}; ++ ++struct auth_alg { ++ void (*init)(void *ctx); ++ void (*update)(void *ctx, unsigned char *bytes, __u32 len); ++ void (*final)(unsigned char *hash, void *ctx); ++ int hashlen; ++}; ++ ++struct options; ++ ++#endif /* __KERNEL__ */ ++#endif /* IPSEC_AUTH_H */ ++ ++/* ++ * $Log: ipsec_auth.h,v $ ++ * Revision 1.3 2004-04-06 02:49:08 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.2 2004/04/05 19:55:04 mcr ++ * Moved from linux/include/freeswan/ipsec_auth.h,v ++ * ++ * Revision 1.1 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.1 2003/12/06 21:21:19 mcr ++ * split up receive path into per-transform files, for ++ * easier later removal. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_encap.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,149 @@ ++/* ++ * declarations relevant to encapsulation-like operations ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_encap.h,v 1.19 2004-04-05 19:55:04 mcr Exp $ ++ */ ++ ++#ifndef _IPSEC_ENCAP_H_ ++ ++#define SENT_IP4 16 /* data is two struct in_addr + proto + ports*/ ++ /* (2 * sizeof(struct in_addr)) */ ++ /* sizeof(struct sockaddr_encap) ++ - offsetof(struct sockaddr_encap, Sen.Sip4.Src) */ ++ ++struct sockaddr_encap ++{ ++ __u8 sen_len; /* length */ ++ __u8 sen_family; /* AF_ENCAP */ ++ __u16 sen_type; /* see SENT_* */ ++ union ++ { ++ struct /* SENT_IP4 */ ++ { ++ struct in_addr Src; ++ struct in_addr Dst; ++ __u8 Proto; ++ __u16 Sport; ++ __u16 Dport; ++ } Sip4; ++ } Sen; ++}; ++ ++#define sen_ip_src Sen.Sip4.Src ++#define sen_ip_dst Sen.Sip4.Dst ++#define sen_proto Sen.Sip4.Proto ++#define sen_sport Sen.Sip4.Sport ++#define sen_dport Sen.Sip4.Dport ++ ++#ifndef AF_ENCAP ++#define AF_ENCAP 26 ++#endif /* AF_ENCAP */ ++ ++#define _IPSEC_ENCAP_H_ ++#endif /* _IPSEC_ENCAP_H_ */ ++ ++/* ++ * $Log: ipsec_encap.h,v $ ++ * Revision 1.19 2004-04-05 19:55:04 mcr ++ * Moved from linux/include/freeswan/ipsec_encap.h,v ++ * ++ * Revision 1.18 2003/10/31 02:27:05 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.17.30.1 2003/09/21 13:59:38 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.17 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_encap.h,v ++ * ++ * Revision 1.16 2001/11/26 09:23:47 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.15.2.1 2001/09/25 02:18:54 mcr ++ * struct eroute moved to ipsec_eroute.h ++ * ++ * Revision 1.15 2001/09/14 16:58:36 rgb ++ * Added support for storing the first and last packets through a HOLD. ++ * ++ * Revision 1.14 2001/09/08 21:13:31 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.13 2001/06/14 19:35:08 rgb ++ * Update copyright date. ++ * ++ * Revision 1.12 2001/05/27 06:12:10 rgb ++ * Added structures for pid, packet count and last access time to eroute. ++ * Added packet count to beginning of /proc/net/ipsec_eroute. ++ * ++ * Revision 1.11 2000/09/08 19:12:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.10 2000/03/22 16:15:36 rgb ++ * Fixed renaming of dev_get (MB). ++ * ++ * Revision 1.9 2000/01/21 06:13:26 rgb ++ * Added a macro for AF_ENCAP ++ * ++ * Revision 1.8 1999/12/31 14:56:55 rgb ++ * MB fix for 2.3 dev-use-count. ++ * ++ * Revision 1.7 1999/11/18 04:09:18 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * ++ * Revision 1.6 1999/09/24 00:34:13 rgb ++ * Add Marc Boucher's support for 2.3.xx+. ++ * ++ * Revision 1.5 1999/04/11 00:28:57 henry ++ * GPL boilerplate ++ * ++ * Revision 1.4 1999/04/06 04:54:25 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.3 1998/10/19 14:44:28 rgb ++ * Added inclusion of freeswan.h. ++ * sa_id structure implemented and used: now includes protocol. ++ * ++ * Revision 1.2 1998/07/14 18:19:33 rgb ++ * Added #ifdef __KERNEL__ directives to restrict scope of header. ++ * ++ * Revision 1.1 1998/06/18 21:27:44 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.2 1998/04/21 21:29:10 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.1 1998/04/09 03:05:58 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:02 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * Minor cosmetic changes. ++ * ++ * Revision 0.3 1996/11/20 14:35:48 ji ++ * Minor Cleanup. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_eroute.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,112 @@ ++/* ++ * @(#) declarations of eroute structures ++ * ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs ++ * Copyright (C) 2001 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_eroute.h,v 1.5 2004-04-05 19:55:05 mcr Exp $ ++ * ++ * derived from ipsec_encap.h 1.15 on 2001/9/18 by mcr. ++ * ++ */ ++ ++#ifndef _IPSEC_EROUTE_H_ ++ ++#include "radij.h" ++#include "ipsec_encap.h" ++#include "ipsec_radij.h" ++ ++/* ++ * The "type" is really part of the address as far as the routing ++ * system is concerned. By using only one bit in the type field ++ * for each type, we sort-of make sure that different types of ++ * encapsulation addresses won't be matched against the wrong type. ++ */ ++ ++/* ++ * An entry in the radix tree ++ */ ++ ++struct rjtentry ++{ ++ struct radij_node rd_nodes[2]; /* tree glue, and other values */ ++#define rd_key(r) ((struct sockaddr_encap *)((r)->rd_nodes->rj_key)) ++#define rd_mask(r) ((struct sockaddr_encap *)((r)->rd_nodes->rj_mask)) ++ short rd_flags; ++ short rd_count; ++}; ++ ++struct ident ++{ ++ __u16 type; /* identity type */ ++ __u64 id; /* identity id */ ++ __u8 len; /* identity len */ ++ caddr_t data; /* identity data */ ++}; ++ ++/* ++ * An encapsulation route consists of a pointer to a ++ * radix tree entry and a SAID (a destination_address/SPI/protocol triple). ++ */ ++ ++struct eroute ++{ ++ struct rjtentry er_rjt; ++ ip_said er_said; ++ uint32_t er_pid; ++ uint32_t er_count; ++ uint64_t er_lasttime; ++ struct sockaddr_encap er_eaddr; /* MCR get rid of _encap, it is silly*/ ++ struct sockaddr_encap er_emask; ++ struct ident er_ident_s; ++ struct ident er_ident_d; ++ struct sk_buff* er_first; ++ struct sk_buff* er_last; ++}; ++ ++#define er_dst er_said.dst ++#define er_spi er_said.spi ++ ++#define _IPSEC_EROUTE_H_ ++#endif /* _IPSEC_EROUTE_H_ */ ++ ++/* ++ * $Log: ipsec_eroute.h,v $ ++ * Revision 1.5 2004-04-05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_eroute.h,v ++ * ++ * Revision 1.4 2003/10/31 02:27:05 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.3.30.2 2003/10/29 01:10:19 mcr ++ * elimited "struct sa_id" ++ * ++ * Revision 1.3.30.1 2003/09/21 13:59:38 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.3 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_eroute.h,v ++ * ++ * Revision 1.2 2001/11/26 09:16:13 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:18:54 mcr ++ * struct eroute moved to ipsec_eroute.h ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_errs.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,53 @@ ++/* ++ * @(#) definition of ipsec_errs structure ++ * ++ * Copyright (C) 2001 Richard Guy Briggs ++ * and Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_errs.h,v 1.4 2004-04-05 19:55:05 mcr Exp $ ++ * ++ */ ++ ++/* ++ * This file describes the errors/statistics that FreeSWAN collects. ++ * ++ */ ++ ++struct ipsec_errs { ++ __u32 ips_alg_errs; /* number of algorithm errors */ ++ __u32 ips_auth_errs; /* # of authentication errors */ ++ __u32 ips_encsize_errs; /* # of encryption size errors*/ ++ __u32 ips_encpad_errs; /* # of encryption pad errors*/ ++ __u32 ips_replaywin_errs; /* # of pkt sequence errors */ ++}; ++ ++/* ++ * $Log: ipsec_errs.h,v $ ++ * Revision 1.4 2004-04-05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_errs.h,v ++ * ++ * Revision 1.3 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_errs.h,v ++ * ++ * Revision 1.2 2001/11/26 09:16:13 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:25:57 mcr ++ * lifetime structure created and common functions created. ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_esp.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,157 @@ ++/* ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_esp.h,v 1.28 2004-09-13 02:22:10 mcr Exp $ ++ */ ++ ++#include "openswan/ipsec_md5h.h" ++#include "openswan/ipsec_sha1.h" ++ ++#include "crypto/des.h" ++ ++#ifndef IPPROTO_ESP ++#define IPPROTO_ESP 50 ++#endif /* IPPROTO_ESP */ ++ ++#define ESP_HEADER_LEN 8 /* 64 bits header (spi+rpl)*/ ++ ++#define EMT_ESPDESCBC_ULEN 20 /* coming from user mode */ ++#define EMT_ESPDES_KMAX 64 /* 512 bit secret key enough? */ ++#define EMT_ESPDES_KEY_SZ 8 /* 56 bit secret key with parity = 64 bits */ ++#define EMT_ESP3DES_KEY_SZ 24 /* 168 bit secret key with parity = 192 bits */ ++#define EMT_ESPDES_IV_SZ 8 /* IV size */ ++#define ESP_DESCBC_BLKLEN 8 /* DES-CBC block size */ ++ ++#define ESP_IV_MAXSZ 16 /* This is _critical_ */ ++#define ESP_IV_MAXSZ_INT (ESP_IV_MAXSZ/sizeof(int)) ++ ++#define DB_ES_PKTRX 0x0001 ++#define DB_ES_PKTRX2 0x0002 ++#define DB_ES_IPSA 0x0010 ++#define DB_ES_XF 0x0020 ++#define DB_ES_IPAD 0x0040 ++#define DB_ES_INAU 0x0080 ++#define DB_ES_OINFO 0x0100 ++#define DB_ES_OINFO2 0x0200 ++#define DB_ES_OH 0x0400 ++#define DB_ES_REPLAY 0x0800 ++ ++#ifdef __KERNEL__ ++struct des_eks { ++ des_key_schedule ks; ++}; ++ ++extern struct inet_protocol esp_protocol; ++ ++struct options; ++ ++struct esphdr ++{ ++ __u32 esp_spi; /* Security Parameters Index */ ++ __u32 esp_rpl; /* Replay counter */ ++ __u8 esp_iv[8]; /* iv */ ++}; ++ ++extern struct xform_functions esp_xform_funcs[]; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int debug_esp; ++#endif /* CONFIG_KLIPS_DEBUG */ ++#endif /* __KERNEL__ */ ++ ++/* ++ * $Log: ipsec_esp.h,v $ ++ * Revision 1.28 2004-09-13 02:22:10 mcr ++ * #define inet_protocol if necessary. ++ * ++ * Revision 1.27 2004/09/06 18:35:41 mcr ++ * 2.6.8.1 gets rid of inet_protocol->net_protocol compatibility, ++ * so adjust for that. ++ * ++ * Revision 1.26 2004/07/10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.25 2004/04/06 02:49:08 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.24 2004/04/05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_esp.h,v ++ * ++ * Revision 1.23 2004/04/05 19:41:05 mcr ++ * merged alg-branch code. ++ * ++ * Revision 1.22 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.23 2003/12/11 20:14:58 mcr ++ * refactored the xmit code, to move all encapsulation ++ * code into protocol functions. Note that all functions ++ * are essentially done by a single function, which is probably ++ * wrong. ++ * the rcv_functions structures are renamed xform_functions. ++ * ++ * Revision 1.22 2003/12/06 21:21:19 mcr ++ * split up receive path into per-transform files, for ++ * easier later removal. ++ * ++ * Revision 1.21.8.1 2003/12/22 15:25:52 jjo ++ * Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.21 2003/02/06 02:21:34 rgb ++ * ++ * Moved "struct auth_alg" from ipsec_rcv.c to ipsec_ah.h . ++ * Changed "struct ah" to "struct ahhdr" and "struct esp" to "struct esphdr". ++ * Removed "#ifdef INBOUND_POLICY_CHECK_eroute" dead code. ++ * ++ * Revision 1.20 2002/05/14 02:37:02 rgb ++ * Change reference from _TDB to _IPSA. ++ * ++ * Revision 1.19 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.18 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_esp.h,v ++ * ++ * Revision 1.17 2002/02/20 01:27:07 rgb ++ * Ditched a pile of structs only used by the old Netlink interface. ++ * ++ * Revision 1.16 2001/12/11 02:35:57 rgb ++ * Change "struct net_device" to "struct device" for 2.2 compatibility. ++ * ++ * Revision 1.15 2001/11/26 09:23:48 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.14.2.3 2001/10/23 04:16:42 mcr ++ * get definition of des_key_schedule from des.h ++ * ++ * Revision 1.14.2.2 2001/10/22 20:33:13 mcr ++ * use "des_key_schedule" structure instead of cooking our own. ++ * ++ * Revision 1.14.2.1 2001/09/25 02:18:25 mcr ++ * replace "struct device" with "struct netdevice" ++ * ++ * Revision 1.14 2001/06/14 19:35:08 rgb ++ * Update copyright date. ++ * ++ * Revision 1.13 2000/09/08 19:12:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.12 2000/08/01 14:51:50 rgb ++ * Removed _all_ remaining traces of DES. ++ * ++ * Revision 1.11 2000/01/10 16:36:20 rgb ++ * Ditch last of EME option flags, including initiator. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_ipcomp.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,94 @@ ++/* ++ * IP compression header declations ++ * ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_ipcomp.h,v 1.4 2004-07-10 19:08:41 mcr Exp $ ++ */ ++ ++#ifndef IPSEC_IPCOMP_H ++#define IPSEC_IPCOMP_H ++ ++#include "openswan/ipsec_auth.h" ++ ++/* Prefix all global deflate symbols with "ipcomp_" to avoid collisions with ppp_deflate & ext2comp */ ++#ifndef IPCOMP_PREFIX ++#define IPCOMP_PREFIX ++#endif /* IPCOMP_PREFIX */ ++ ++#ifndef IPPROTO_COMP ++#define IPPROTO_COMP 108 ++#endif /* IPPROTO_COMP */ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int sysctl_ipsec_debug_ipcomp; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++struct ipcomphdr { /* IPCOMP header */ ++ __u8 ipcomp_nh; /* Next header (protocol) */ ++ __u8 ipcomp_flags; /* Reserved, must be 0 */ ++ __u16 ipcomp_cpi; /* Compression Parameter Index */ ++}; ++ ++extern struct inet_protocol comp_protocol; ++extern int sysctl_ipsec_debug_ipcomp; ++ ++#define IPCOMP_UNCOMPRESSABLE 0x000000001 ++#define IPCOMP_COMPRESSIONERROR 0x000000002 ++#define IPCOMP_PARMERROR 0x000000004 ++#define IPCOMP_DECOMPRESSIONERROR 0x000000008 ++ ++#define IPCOMP_ADAPT_INITIAL_TRIES 8 ++#define IPCOMP_ADAPT_INITIAL_SKIP 4 ++#define IPCOMP_ADAPT_SUBSEQ_TRIES 2 ++#define IPCOMP_ADAPT_SUBSEQ_SKIP 8 ++ ++/* Function prototypes */ ++struct sk_buff *skb_compress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags); ++struct sk_buff *skb_decompress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags); ++ ++extern struct xform_functions ipcomp_xform_funcs[]; ++ ++#endif /* IPSEC_IPCOMP_H */ ++ ++/* ++ * $Log: ipsec_ipcomp.h,v $ ++ * Revision 1.4 2004-07-10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.3 2004/04/06 02:49:08 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.2 2004/04/05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_ipcomp.h,v ++ * ++ * Revision 1.1 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.2 2003/12/11 20:14:58 mcr ++ * refactored the xmit code, to move all encapsulation ++ * code into protocol functions. Note that all functions ++ * are essentially done by a single function, which is probably ++ * wrong. ++ * the rcv_functions structures are renamed xform_functions. ++ * ++ * Revision 1.1 2003/12/06 21:21:19 mcr ++ * split up receive path into per-transform files, for ++ * easier later removal. ++ * ++ * ++ * ++ */ ++ ++ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_ipe4.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,68 @@ ++/* ++ * IP-in-IP Header declarations ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_ipe4.h,v 1.6 2004-04-05 19:55:05 mcr Exp $ ++ */ ++ ++/* The packet header is an IP header! */ ++ ++struct ipe4_xdata /* transform table data */ ++{ ++ struct in_addr i4_src; ++ struct in_addr i4_dst; ++}; ++ ++#define EMT_IPE4_ULEN 8 /* coming from user mode */ ++ ++ ++/* ++ * $Log: ipsec_ipe4.h,v $ ++ * Revision 1.6 2004-04-05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_ipe4.h,v ++ * ++ * Revision 1.5 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_ipe4.h,v ++ * ++ * Revision 1.4 2001/06/14 19:35:08 rgb ++ * Update copyright date. ++ * ++ * Revision 1.3 1999/04/11 00:28:57 henry ++ * GPL boilerplate ++ * ++ * Revision 1.2 1999/04/06 04:54:25 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.1 1998/06/18 21:27:47 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.1 1998/04/09 03:06:07 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:03 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * No changes. ++ * ++ * Revision 0.3 1996/11/20 14:48:53 ji ++ * Release update only. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_ipip.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_ipip.h,v 1.2 2004-04-05 19:55:05 mcr Exp $ ++ */ ++ ++#ifndef _IPSEC_IPIP_H_ ++ ++#ifndef IPPROTO_IPIP ++#define IPPROTO_IPIP 4 ++#endif /* IPPROTO_ESP */ ++ ++extern struct xform_functions ipip_xform_funcs[]; ++ ++#define _IPSEC_IPIP_H_ ++ ++#endif /* _IPSEC_IPIP_H_ */ ++ ++/* ++ * $Log: ipsec_ipip.h,v $ ++ * Revision 1.2 2004-04-05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_ipip.h,v ++ * ++ * Revision 1.1 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.1 2003/12/11 20:14:58 mcr ++ * refactored the xmit code, to move all encapsulation ++ * code into protocol functions. Note that all functions ++ * are essentially done by a single function, which is probably ++ * wrong. ++ * the rcv_functions structures are renamed xform_functions. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_kern24.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,61 @@ ++/* ++ * @(#) routines to makes kernel 2.4 compatible with 2.6 usage. ++ * ++ * Copyright (C) 2004 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_kern24.h,v 1.4 2005-05-20 03:19:18 mcr Exp $ ++ */ ++ ++#ifndef _IPSEC_KERN24_H ++ ++#ifndef NET_26 ++#define sk_receive_queue receive_queue ++#define sk_destruct destruct ++#define sk_reuse reuse ++#define sk_zapped zapped ++#define sk_family family ++#define sk_protocol protocol ++#define sk_protinfo protinfo ++#define sk_sleep sleep ++#define sk_state_change state_change ++#define sk_shutdown shutdown ++#define sk_err err ++#define sk_stamp stamp ++#define sk_socket socket ++#define sk_sndbuf sndbuf ++#define sock_flag(sk, flag) sk->dead ++#define sk_for_each(sk, node, plist) for(sk=*plist; sk!=NULL; sk = sk->next) ++#endif ++ ++/* deal with 2.4 vs 2.6 issues with module counts */ ++ ++/* in 2.6, all refcounts are maintained *outside* of the ++ * module to deal with race conditions. ++ */ ++ ++#ifdef NET_26 ++#define KLIPS_INC_USE /* nothing */ ++#define KLIPS_DEC_USE /* nothing */ ++ ++#else ++#define KLIPS_INC_USE MOD_INC_USE_COUNT ++#define KLIPS_DEC_USE MOD_DEC_USE_COUNT ++#endif ++ ++extern int printk_ratelimit(void); ++ ++ ++#define _IPSEC_KERN24_H 1 ++ ++#endif /* _IPSEC_KERN24_H */ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_kversion.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,429 @@ ++#ifndef _OPENSWAN_KVERSIONS_H ++/* ++ * header file for FreeS/WAN library functions ++ * Copyright (C) 1998, 1999, 2000 Henry Spencer. ++ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: ipsec_kversion.h,v 1.15.2.21 2008-02-17 20:35:35 paul Exp $ ++ */ ++#define _OPENSWAN_KVERSIONS_H /* seen it, no need to see it again */ ++ ++/* ++ * this file contains a series of atomic defines that depend upon ++ * kernel version numbers. The kernel versions are arranged ++ * in version-order number (which is often not chronological) ++ * and each clause enables or disables a feature. ++ */ ++ ++/* ++ * First, assorted kernel-version-dependent trickery. ++ */ ++#include ++#ifndef KERNEL_VERSION ++#define KERNEL_VERSION(x,y,z) (((x)<<16)+((y)<<8)+(z)) ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,1,0) ++#define HEADER_CACHE_BIND_21 ++#error "KLIPS is no longer supported on Linux 2.0. Sorry" ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0) ++#define SPINLOCK ++#define PROC_FS_21 ++#define NETLINK_SOCK ++#define NET_21 ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,1,19) ++#define net_device_stats enet_statistics ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) ++#define SPINLOCK_23 ++#define NETDEV_23 ++# ifndef CONFIG_IP_ALIAS ++# define CONFIG_IP_ALIAS ++# endif ++#include ++#include ++#include ++# ifdef NETLINK_XFRM ++# define NETDEV_25 ++# endif ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,25) ++#define PROC_FS_2325 ++#undef PROC_FS_21 ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30) ++#define PROC_NO_DUMMY ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,35) ++#define SKB_COPY_EXPAND ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,37) ++#define IP_SELECT_IDENT ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,50) ++# if(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) && defined(CONFIG_NETFILTER)) ++# define SKB_RESET_NFCT ++# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) ++# if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) ++# define SKB_RESET_NFCT ++# endif ++# endif ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,2) ++#define IP_SELECT_IDENT_NEW ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,4) ++#define IPH_is_SKB_PULLED ++#define SKB_COW_NEW ++#define PROTO_HANDLER_SINGLE_PARM ++#define IP_FRAGMENT_LINEARIZE 1 ++#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,4) */ ++# ifdef REDHAT_BOGOSITY ++# define IP_SELECT_IDENT_NEW ++# define IPH_is_SKB_PULLED ++# define SKB_COW_NEW ++# define PROTO_HANDLER_SINGLE_PARM ++# endif /* REDHAT_BOGOSITY */ ++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,4) */ ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9) ++#define MALLOC_SLAB ++#define LINUX_KERNEL_HAS_SNPRINTF ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ++#define HAVE_NETDEV_PRINTK 1 ++#define NET_26 ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,8) ++#define NEED_INET_PROTOCOL ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) ++#define HAVE_SOCK_ZAPPED ++#define NET_26_12_SKALLOC ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ++#define HAVE_SOCK_SECURITY ++/* skb->nf_debug disappared completely in 2.6.13 */ ++#define HAVE_SKB_NF_DEBUG ++#endif ++ ++#define SYSCTL_IPSEC_DEFAULT_TTL sysctl_ip_default_ttl ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) ++/* skb->stamp changed to skb->tstamp in 2.6.14 */ ++#define HAVE_TSTAMP ++#define HAVE_INET_SK_SPORT ++#undef SYSCTL_IPSEC_DEFAULT_TTL ++#define SYSCTL_IPSEC_DEFAULT_TTL IPSEC_DEFAULT_TTL ++#else ++#define HAVE_SKB_LIST ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) || SLE_VERSION_CODE >= 655616 ++#define HAVE_NEW_SKB_LINEARIZE ++#endif ++ ++/* this is the best we can do to detect XEN, which makes ++ * * patches to linux/skbuff.h, making it look like 2.6.18 version ++ * */ ++#ifdef CONFIG_XEN ++#define HAVE_NEW_SKB_LINEARIZE ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19) ++#define VOID_SOCK_UNREGISTER ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) ++/* skb->nfmark changed to skb->mark in 2.6.20 */ ++#define nfmark mark ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) ++/* need to include ip.h early, no longer pick it up in skbuff.h */ ++#include ++# define HAVE_KERNEL_TSTAMP ++/* type of sock.sk_stamp changed from timeval to ktime */ ++# define grab_socket_timeval(tv, sock) { (tv) = ktime_to_timeval((sock).sk_stamp); } ++#else ++# define grab_socket_timeval(tv, sock) { (tv) = (sock).sk_stamp; } ++/* internals of struct skbuff changed */ ++# define HAVE_DEV_NEXT ++# define ip_hdr(skb) ((skb)->nh.iph) ++# define skb_tail_pointer(skb) ((skb)->tail) ++# define skb_end_pointer(skb) ((skb)->end) ++# define skb_network_header(skb) ((skb)->nh.raw) ++# define skb_set_network_header(skb,off) ((skb)->nh.raw = (skb)->data + (off)) ++# define tcp_hdr(skb) ((skb)->h.th) ++# define udp_hdr(skb) ((skb)->h.uh) ++# define skb_transport_header(skb) ((skb)->h.raw) ++# define skb_set_transport_header(skb,off) ((skb)->h.raw = (skb)->data + (off)) ++# define skb_mac_header(skb) ((skb)->mac.raw) ++# define skb_set_mac_header(skb,off) ((skb)->mac.raw = (skb)->data + (off)) ++#endif ++/* turn a pointer into an offset for above macros */ ++#define ipsec_skb_offset(skb, ptr) (((unsigned char *)(ptr)) - (skb)->data) ++ ++#ifdef NET_21 ++# include ++#else ++ /* old kernel in.h has some IPv6 stuff, but not quite enough */ ++# define s6_addr16 s6_addr ++# define AF_INET6 10 ++# define uint8_t __u8 ++# define uint16_t __u16 ++# define uint32_t __u32 ++# define uint64_t __u64 ++#endif ++ ++#ifdef NET_21 ++# define ipsec_kfree_skb(a) kfree_skb(a) ++#else /* NET_21 */ ++# define ipsec_kfree_skb(a) kfree_skb(a, FREE_WRITE) ++#endif /* NET_21 */ ++ ++#ifdef NETDEV_23 ++#if 0 ++#ifndef NETDEV_25 ++#define device net_device ++#endif ++#endif ++# define ipsec_dev_get dev_get_by_name ++# define __ipsec_dev_get __dev_get_by_name ++# define ipsec_dev_put(x) dev_put(x) ++# define __ipsec_dev_put(x) __dev_put(x) ++# define ipsec_dev_hold(x) dev_hold(x) ++#else /* NETDEV_23 */ ++# define ipsec_dev_get dev_get ++# define __ipsec_dev_put(x) ++# define ipsec_dev_put(x) ++# define ipsec_dev_hold(x) ++#endif /* NETDEV_23 */ ++ ++#ifndef SPINLOCK ++# include ++ /* simulate spin locks and read/write locks */ ++ typedef struct { ++ volatile char lock; ++ } spinlock_t; ++ ++ typedef struct { ++ volatile unsigned int lock; ++ } rwlock_t; ++ ++# define spin_lock_init(x) { (x)->lock = 0;} ++# define rw_lock_init(x) { (x)->lock = 0; } ++ ++# define spin_lock(x) { while ((x)->lock) barrier(); (x)->lock=1;} ++# define spin_lock_irq(x) { cli(); spin_lock(x);} ++# define spin_lock_irqsave(x,flags) { save_flags(flags); spin_lock_irq(x);} ++ ++# define spin_unlock(x) { (x)->lock=0;} ++# define spin_unlock_irq(x) { spin_unlock(x); sti();} ++# define spin_unlock_irqrestore(x,flags) { spin_unlock(x); restore_flags(flags);} ++ ++# define read_lock(x) spin_lock(x) ++# define read_lock_irq(x) spin_lock_irq(x) ++# define read_lock_irqsave(x,flags) spin_lock_irqsave(x,flags) ++ ++# define read_unlock(x) spin_unlock(x) ++# define read_unlock_irq(x) spin_unlock_irq(x) ++# define read_unlock_irqrestore(x,flags) spin_unlock_irqrestore(x,flags) ++ ++# define write_lock(x) spin_lock(x) ++# define write_lock_irq(x) spin_lock_irq(x) ++# define write_lock_irqsave(x,flags) spin_lock_irqsave(x,flags) ++ ++# define write_unlock(x) spin_unlock(x) ++# define write_unlock_irq(x) spin_unlock_irq(x) ++# define write_unlock_irqrestore(x,flags) spin_unlock_irqrestore(x,flags) ++#endif /* !SPINLOCK */ ++ ++#ifndef SPINLOCK_23 ++# define spin_lock_bh(x) spin_lock_irq(x) ++# define spin_unlock_bh(x) spin_unlock_irq(x) ++ ++# define read_lock_bh(x) read_lock_irq(x) ++# define read_unlock_bh(x) read_unlock_irq(x) ++ ++# define write_lock_bh(x) write_lock_irq(x) ++# define write_unlock_bh(x) write_unlock_irq(x) ++#endif /* !SPINLOCK_23 */ ++ ++#ifndef HAVE_NETDEV_PRINTK ++#define netdev_printk(sevlevel, netdev, msglevel, format, arg...) \ ++ printk(sevlevel "%s: " format , netdev->name , ## arg) ++#endif ++ ++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) ++#include "openswan/ipsec_kern24.h" ++#else ++#error "kernels before 2.4 are not supported at this time" ++#endif ++#endif ++ ++ ++#endif /* _OPENSWAN_KVERSIONS_H */ ++ ++/* ++ * $Log: ipsec_kversion.h,v $ ++ * Revision 1.15.2.21 2008-02-17 20:35:35 paul ++ * enable HAVE_NEW_SKB_LINEARIZE for Suse Linux SLES10 SP1 ++ * ++ * Revision 1.15.2.20 2007-11-16 06:16:10 paul ++ * Fix brackets on SKB_RESET_NFCT case ++ * ++ * Revision 1.15.2.19 2007-11-16 06:01:27 paul ++ * On 2.6.23+, sk->nfct is part of skbut only when CONFIG_NF_CONNTRACK or ++ * CONFIG_NF_CONNTRACK_MODUE is set, where previously this was handled with ++ * CONFIG_NETFILTER. ++ * ++ * Revision 1.15.2.18 2007-11-07 14:17:56 paul ++ * Xen modifies skb structures, so xen kernels < 2.6.18 need to have ++ * HAVE_NEW_SKB_LINEARIZE defined. ++ * ++ * Revision 1.15.2.17 2007-10-31 19:57:40 paul ++ * type of sock.sk_stamp changed from timeval to ktime [dhr] ++ * ++ * Revision 1.15.2.16 2007-10-30 22:17:02 paul ++ * Move the define for ktime_to_timeval() from "not 2.6.22" to "< 2.6.16", ++ * where it belongs. ++ * ++ * Revision 1.15.2.15 2007-10-30 21:44:00 paul ++ * added a backport definition for define skb_end_pointer [dhr] ++ * ++ * Revision 1.15.2.14 2007-10-28 00:26:03 paul ++ * Start of fix for 2.6.22+ kernels and skb_tail_pointer() ++ * ++ * Revision 1.15.2.13 2007/09/05 02:28:27 paul ++ * Patch by David McCullough for 2.6.22 compatibility (HAVE_KERNEL_TSTAMP, ++ * HAVE_DEV_NEXT and other header surgery) ++ * ++ * Revision 1.15.2.12 2007/08/10 01:40:49 paul ++ * Fix for sock_unregister for 2.6.19 by Sergeil ++ * ++ * Revision 1.15.2.11 2007/02/20 03:53:16 paul ++ * Added comment, made layout consistent with other checks. ++ * ++ * Revision 1.15.2.10 2007/02/16 19:08:12 paul ++ * Fix for compiling on 2.6.20 (nfmark is now called mark in sk_buff) ++ * ++ * Revision 1.15.2.9 2006/07/29 05:00:40 paul ++ * Added HAVE_NEW_SKB_LINEARIZE for 2.6.18+ kernels where skb_linearize ++ * only takes 1 argument. ++ * ++ * Revision 1.15.2.8 2006/05/01 14:31:52 mcr ++ * FREESWAN->OPENSWAN in #ifdef. ++ * ++ * Revision 1.15.2.7 2006/01/11 02:02:59 mcr ++ * updated patches and DEFAULT_TTL code to work ++ * ++ * Revision 1.15.2.6 2006/01/03 19:25:02 ken ++ * Remove duplicated #ifdef for TTL fix - bad patch ++ * ++ * Revision 1.15.2.5 2006/01/03 18:06:33 ken ++ * Fix for missing sysctl default ttl ++ * ++ * Revision 1.15.2.4 2005/11/27 21:40:14 paul ++ * Pull down TTL fixes from head. this fixes "Unknown symbol sysctl_ip_default_ttl" ++ * in for klips as module. ++ * ++ * Revision 1.15.2.3 2005/11/22 04:11:52 ken ++ * Backport fixes for 2.6.14 kernels from HEAD ++ * ++ * Revision 1.15.2.2 2005/09/01 01:57:19 paul ++ * michael's fixes for 2.6.13 from head ++ * ++ * Revision 1.15.2.1 2005/08/27 23:13:48 paul ++ * Fix for: ++ * 7 weeks ago: [NET]: Remove unused security member in sk_buff ++ * changeset 4280: 328ea53f5fee ++ * parent 4279: beb0afb0e3f8 ++ * author: Thomas Graf ++ * date: Tue Jul 5 21:12:44 2005 ++ * files: include/linux/skbuff.h include/linux/tc_ematch/tc_em_meta.h net/core/skbuff.c net/ipv4/ip_output.c net/ipv6/ip6_output.c net/sched/em_meta.c ++ * ++ * This should fix compilation on 2.6.13(rc) kernels ++ * ++ * Revision 1.15 2005/07/19 20:02:15 mcr ++ * sk_alloc() interface change. ++ * ++ * Revision 1.14 2005/07/08 16:20:05 mcr ++ * fix for 2.6.12 disapperance of sk_zapped field -> sock_flags. ++ * ++ * Revision 1.13 2005/05/20 03:19:18 mcr ++ * modifications for use on 2.4.30 kernel, with backported ++ * printk_ratelimit(). all warnings removed. ++ * ++ * Revision 1.12 2005/04/13 22:46:21 mcr ++ * note that KLIPS does not work on Linux 2.0. ++ * ++ * Revision 1.11 2004/09/13 02:22:26 mcr ++ * #define inet_protocol if necessary. ++ * ++ * Revision 1.10 2004/08/03 18:17:15 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.9 2004/04/05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_kversion.h,v ++ * ++ * Revision 1.8 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.7 2003/07/31 22:48:08 mcr ++ * derive NET25-ness from presence of NETLINK_XFRM macro. ++ * ++ * Revision 1.6 2003/06/24 20:22:32 mcr ++ * added new global: ipsecdevices[] so that we can keep track of ++ * the ipsecX devices. They will be referenced with dev_hold(), ++ * so 2.2 may need this as well. ++ * ++ * Revision 1.5 2003/04/03 17:38:09 rgb ++ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}. ++ * ++ * Revision 1.4 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_kversion.h,v ++ * ++ * Revision 1.3 2002/04/12 03:21:17 mcr ++ * three parameter version of ip_select_ident appears first ++ * in 2.4.2 (RH7.1) not 2.4.4. ++ * ++ * Revision 1.2 2002/03/08 21:35:22 rgb ++ * Defined LINUX_KERNEL_HAS_SNPRINTF to shut up compiler warnings after ++ * 2.4.9. (Andreas Piesk). ++ * ++ * Revision 1.1 2002/01/29 02:11:42 mcr ++ * removal of kversions.h - sources that needed it now use ipsec_param.h. ++ * updating of IPv6 structures to match latest in6.h version. ++ * removed dead code from freeswan.h that also duplicated kversions.h ++ * code. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_life.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,112 @@ ++/* ++ * Definitions relevant to IPSEC lifetimes ++ * Copyright (C) 2001 Richard Guy Briggs ++ * and Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_life.h,v 1.4 2004-04-05 19:55:05 mcr Exp $ ++ * ++ * This file derived from ipsec_xform.h on 2001/9/18 by mcr. ++ * ++ */ ++ ++/* ++ * This file describes the book keeping fields for the ++ * IPsec Security Association Structure. ("ipsec_sa") ++ * ++ * This structure is never allocated directly by kernel code, ++ * (it is always a static/auto or is part of a structure) ++ * so it does not have a reference count. ++ * ++ */ ++ ++#ifndef _IPSEC_LIFE_H_ ++ ++/* ++ * _count is total count. ++ * _hard is hard limit (kill SA after this number) ++ * _soft is soft limit (try to renew SA after this number) ++ * _last is used in some special cases. ++ * ++ */ ++ ++struct ipsec_lifetime64 ++{ ++ __u64 ipl_count; ++ __u64 ipl_soft; ++ __u64 ipl_hard; ++ __u64 ipl_last; ++}; ++ ++struct ipsec_lifetimes ++{ ++ /* number of bytes processed */ ++ struct ipsec_lifetime64 ipl_bytes; ++ ++ /* number of packets processed */ ++ struct ipsec_lifetime64 ipl_packets; ++ ++ /* time since SA was added */ ++ struct ipsec_lifetime64 ipl_addtime; ++ ++ /* time since SA was first used */ ++ struct ipsec_lifetime64 ipl_usetime; ++ ++ /* from rfc2367: ++ * For CURRENT, the number of different connections, ++ * endpoints, or flows that the association has been ++ * allocated towards. For HARD and SOFT, the number of ++ * these the association may be allocated towards ++ * before it expires. The concept of a connection, ++ * flow, or endpoint is system specific. ++ * ++ * mcr(2001-9-18) it is unclear what purpose these serve for FreeSWAN. ++ * They are maintained for PF_KEY compatibility. ++ */ ++ struct ipsec_lifetime64 ipl_allocations; ++}; ++ ++enum ipsec_life_alive { ++ ipsec_life_harddied = -1, ++ ipsec_life_softdied = 0, ++ ipsec_life_okay = 1 ++}; ++ ++enum ipsec_life_type { ++ ipsec_life_timebased = 1, ++ ipsec_life_countbased= 0 ++}; ++ ++#define _IPSEC_LIFE_H_ ++#endif /* _IPSEC_LIFE_H_ */ ++ ++ ++/* ++ * $Log: ipsec_life.h,v $ ++ * Revision 1.4 2004-04-05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_life.h,v ++ * ++ * Revision 1.3 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_life.h,v ++ * ++ * Revision 1.2 2001/11/26 09:16:14 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:25:58 mcr ++ * lifetime structure created and common functions created. ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_md5h.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,143 @@ ++/* ++ * RCSID $Id: ipsec_md5h.h,v 1.10 2004-09-08 17:21:35 ken Exp $ ++ */ ++ ++/* ++ * The rest of this file is Copyright RSA DSI. See the following comments ++ * for the full Copyright notice. ++ */ ++ ++#ifndef _IPSEC_MD5H_H_ ++#define _IPSEC_MD5H_H_ ++ ++/* GLOBAL.H - RSAREF types and constants ++ */ ++ ++/* PROTOTYPES should be set to one if and only if the compiler supports ++ function argument prototyping. ++ The following makes PROTOTYPES default to 0 if it has not already ++ been defined with C compiler flags. ++ */ ++#ifndef PROTOTYPES ++#define PROTOTYPES 1 ++#endif /* !PROTOTYPES */ ++ ++/* POINTER defines a generic pointer type */ ++typedef __u8 *POINTER; ++ ++/* UINT2 defines a two byte word */ ++typedef __u16 UINT2; ++ ++/* UINT4 defines a four byte word */ ++typedef __u32 UINT4; ++ ++/* PROTO_LIST is defined depending on how PROTOTYPES is defined above. ++ If using PROTOTYPES, then PROTO_LIST returns the list, otherwise it ++ returns an empty list. ++ */ ++ ++#if PROTOTYPES ++#define PROTO_LIST(list) list ++#else /* PROTOTYPES */ ++#define PROTO_LIST(list) () ++#endif /* PROTOTYPES */ ++ ++ ++/* MD5.H - header file for MD5C.C ++ */ ++ ++/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All ++rights reserved. ++ ++License to copy and use this software is granted provided that it ++is identified as the "RSA Data Security, Inc. MD5 Message-Digest ++Algorithm" in all material mentioning or referencing this software ++or this function. ++ ++License is also granted to make and use derivative works provided ++that such works are identified as "derived from the RSA Data ++Security, Inc. MD5 Message-Digest Algorithm" in all material ++mentioning or referencing the derived work. ++ ++RSA Data Security, Inc. makes no representations concerning either ++the merchantability of this software or the suitability of this ++software for any particular purpose. It is provided "as is" ++without express or implied warranty of any kind. ++ ++These notices must be retained in any copies of any part of this ++documentation and/or software. ++ */ ++ ++/* MD5 context. */ ++typedef struct { ++ UINT4 state[4]; /* state (ABCD) */ ++ UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */ ++ unsigned char buffer[64]; /* input buffer */ ++} MD5_CTX; ++ ++void osMD5Init PROTO_LIST ((void *)); ++void osMD5Update PROTO_LIST ++ ((void *, unsigned char *, __u32)); ++void osMD5Final PROTO_LIST ((unsigned char [16], void *)); ++ ++#endif /* _IPSEC_MD5H_H_ */ ++ ++/* ++ * $Log: ipsec_md5h.h,v $ ++ * Revision 1.10 2004-09-08 17:21:35 ken ++ * Rename MD5* -> osMD5 functions to prevent clashes with other symbols exported by kernel modules (CIFS in 2.6 initiated this) ++ * ++ * Revision 1.9 2004/04/05 19:55:05 mcr ++ * Moved from linux/include/freeswan/ipsec_md5h.h,v ++ * ++ * Revision 1.8 2002/09/10 01:45:09 mcr ++ * changed type of MD5_CTX and SHA1_CTX to void * so that ++ * the function prototypes would match, and could be placed ++ * into a pointer to a function. ++ * ++ * Revision 1.7 2002/04/24 07:36:46 mcr ++ * Moved from ./klips/net/ipsec/ipsec_md5h.h,v ++ * ++ * Revision 1.6 1999/12/13 13:59:13 rgb ++ * Quick fix to argument size to Update bugs. ++ * ++ * Revision 1.5 1999/12/07 18:16:23 rgb ++ * Fixed comments at end of #endif lines. ++ * ++ * Revision 1.4 1999/04/06 04:54:26 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.3 1999/01/22 06:19:58 rgb ++ * 64-bit clean-up. ++ * ++ * Revision 1.2 1998/11/30 13:22:54 rgb ++ * Rationalised all the klips kernel file headers. They are much shorter ++ * now and won't conflict under RH5.2. ++ * ++ * Revision 1.1 1998/06/18 21:27:48 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.2 1998/04/23 20:54:03 rgb ++ * Fixed md5 and sha1 include file nesting issues, to be cleaned up when ++ * verified. ++ * ++ * Revision 1.1 1998/04/09 03:04:21 henry ++ * sources moved up from linux/net/ipsec ++ * these two include files modified not to include others except in kernel ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:03 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * No changes. ++ * ++ * Revision 0.3 1996/11/20 14:48:53 ji ++ * Release update only. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_param.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,397 @@ ++/* ++ * @(#) Openswan tunable paramaters ++ * ++ * Copyright (C) 2001 Richard Guy Briggs ++ * and Michael Richardson ++ * Copyright (C) 2004 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_param.h,v 1.29.6.5 2008-02-18 16:27:19 paul Exp $ ++ * ++ */ ++ ++/* ++ * This file provides a set of #define's which may be tuned by various ++ * people/configurations. It keeps all compile-time tunables in one place. ++ * ++ * This file should be included before all other IPsec kernel-only files. ++ * ++ */ ++ ++#ifndef _IPSEC_PARAM_H_ ++ ++#ifdef __KERNEL__ ++#include "ipsec_kversion.h" ++ ++/* Set number of ipsecX virtual devices here. */ ++/* This must be < exp(field width of IPSEC_DEV_FORMAT) */ ++/* It must also be reasonable so as not to overload the memory and CPU */ ++/* constraints of the host. */ ++#define IPSEC_NUM_IF 4 ++/* The field width must be < IF_NAM_SIZ - strlen("ipsec") - 1. */ ++/* With "ipsec" being 5 characters, that means 10 is the max field width */ ++/* but machine memory and CPU constraints are not likely to tollerate */ ++/* more than 3 digits. The default is one digit. */ ++/* Update: userland scripts get upset if they can't find "ipsec0", so */ ++/* for now, no "0"-padding should be used (which would have been helpful */ ++/* to make text-searches work */ ++#define IPSEC_DEV_FORMAT "ipsec%d" ++/* For, say, 500 virtual ipsec devices, I would recommend: */ ++/* #define IPSEC_NUM_IF 500 */ ++/* #define IPSEC_DEV_FORMAT "ipsec%03d" */ ++/* Note that the "interfaces=" line in /etc/ipsec.conf would be, um, challenging. */ ++ ++/* use dynamic ipsecX device allocation */ ++#ifndef CONFIG_KLIPS_DYNDEV ++#define CONFIG_KLIPS_DYNDEV 1 ++#endif /* CONFIG_KLIPS_DYNDEV */ ++ ++ ++#ifdef CONFIG_KLIPS_BIGGATE ++# define SADB_HASHMOD 8069 ++#else /* CONFIG_KLIPS_BIGGATE */ ++# define SADB_HASHMOD 257 ++#endif /* CONFIG_KLIPS_BIGGATE */ ++#endif /* __KERNEL__ */ ++ ++/* ++ * This is for the SA reference table. This number is related to the ++ * maximum number of SAs that KLIPS can concurrently deal with, plus enough ++ * space for keeping expired SAs around. ++ * ++ * TABLE_MAX_WIDTH is the number of bits that we will use. ++ * MAIN_TABLE_WIDTH is the number of bits used for the primary index table. ++ * ++ */ ++#ifndef IPSEC_SA_REF_TABLE_IDX_WIDTH ++# define IPSEC_SA_REF_TABLE_IDX_WIDTH 16 ++#endif ++ ++#ifndef IPSEC_SA_REF_MAINTABLE_IDX_WIDTH ++# define IPSEC_SA_REF_MAINTABLE_IDX_WIDTH 4 ++#endif ++ ++#ifndef IPSEC_SA_REF_FREELIST_NUM_ENTRIES ++# define IPSEC_SA_REF_FREELIST_NUM_ENTRIES 256 ++#endif ++ ++#ifndef IPSEC_SA_REF_CODE ++# define IPSEC_SA_REF_CODE 1 ++#endif ++ ++#ifdef __KERNEL__ ++/* This is defined for 2.4, but not 2.2.... */ ++#ifndef ARPHRD_VOID ++# define ARPHRD_VOID 0xFFFF ++#endif ++ ++/* always turn on IPIP mode */ ++#ifndef CONFIG_KLIPS_IPIP ++#define CONFIG_KLIPS_IPIP 1 ++#endif ++ ++/* ++ * Worry about PROC_FS stuff ++ */ ++#if defined(PROC_FS_2325) ++/* kernel 2.4 */ ++# define IPSEC_PROC_LAST_ARG ,int *eof,void *data ++# define IPSEC_PROCFS_DEBUG_NO_STATIC ++# define IPSEC_PROC_SUBDIRS ++#else ++/* kernel <2.4 */ ++# define IPSEC_PROCFS_DEBUG_NO_STATIC DEBUG_NO_STATIC ++ ++# ifndef PROC_NO_DUMMY ++# define IPSEC_PROC_LAST_ARG , int dummy ++# else ++# define IPSEC_PROC_LAST_ARG ++# endif /* !PROC_NO_DUMMY */ ++#endif /* PROC_FS_2325 */ ++ ++#if !defined(LINUX_KERNEL_HAS_SNPRINTF) ++/* GNU CPP specific! */ ++# define snprintf(buf, len, fmt...) sprintf(buf, ##fmt) ++#endif /* !LINUX_KERNEL_HAS_SNPRINTF */ ++ ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#ifndef KLIPS_FIXES_DES_PARITY ++# define KLIPS_FIXES_DES_PARITY 1 ++#endif /* !KLIPS_FIXES_DES_PARITY */ ++ ++/* we don't really want to print these unless there are really big problems */ ++#ifndef KLIPS_DIVULGE_CYPHER_KEY ++# define KLIPS_DIVULGE_CYPHER_KEY 0 ++#endif /* !KLIPS_DIVULGE_CYPHER_KEY */ ++ ++#ifndef KLIPS_DIVULGE_HMAC_KEY ++# define KLIPS_DIVULGE_HMAC_KEY 0 ++#endif /* !KLIPS_DIVULGE_HMAC_KEY */ ++ ++#ifndef IPSEC_DISALLOW_IPOPTIONS ++# define IPSEC_DISALLOW_IPOPTIONS 1 ++#endif /* !KLIPS_DIVULGE_HMAC_KEY */ ++ ++/* extra toggles for regression testing */ ++#ifdef CONFIG_KLIPS_REGRESS ++ ++/* ++ * should pfkey_acquire() become 100% lossy? ++ * ++ */ ++extern int sysctl_ipsec_regress_pfkey_lossage; ++#ifndef KLIPS_PFKEY_ACQUIRE_LOSSAGE ++# ifdef CONFIG_KLIPS_PFKEY_ACQUIRE_LOSSAGE ++# define KLIPS_PFKEY_ACQUIRE_LOSSAGE 100 ++# endif /* CONFIG_KLIPS_PFKEY_ACQUIRE_LOSSAGE */ ++#else ++#define KLIPS_PFKEY_ACQUIRE_LOSSAGE 0 ++#endif /* KLIPS_PFKEY_ACQUIRE_LOSSAGE */ ++ ++#else /* CONFIG_KLIPS_REGRESS */ ++#define KLIPS_PFKEY_ACQUIRE_LOSSAGE 0 ++ ++#endif /* CONFIG_KLIPS_REGRESS */ ++ ++ ++/* ++ * debugging routines. ++ */ ++#define KLIPS_ERROR(flag, format, args...) if(printk_ratelimit() || flag) printk(KERN_ERR "KLIPS " format, ## args) ++#ifdef CONFIG_KLIPS_DEBUG ++#include ++extern void ipsec_print_ip(struct iphdr *ip); ++ ++ #define KLIPS_PRINT(flag, format, args...) \ ++ ((flag) ? printk(KERN_INFO format , ## args) : 0) ++ #define KLIPS_PRINTMORE(flag, format, args...) \ ++ ((flag) ? printk(format , ## args) : 0) ++ #define KLIPS_IP_PRINT(flag, ip) \ ++ ((flag) ? ipsec_print_ip(ip) : 0) ++ #define KLIPS_SATOT(flag, sa, format, dst, dstlen) \ ++ ((flag) ? satot(sa, format, dst, dstlen) : 0) ++#else /* CONFIG_KLIPS_DEBUG */ ++ #define KLIPS_PRINT(flag, format, args...) do ; while(0) ++ #define KLIPS_PRINTMORE(flag, format, args...) do ; while(0) ++ #define KLIPS_IP_PRINT(flag, ip) do ; while(0) ++ #define KLIPS_SATOT(flag, sa, format, dst, dstlen) (0) ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ ++/* ++ * Stupid kernel API differences in APIs. Not only do some ++ * kernels not have ip_select_ident, but some have differing APIs, ++ * and SuSE has one with one parameter, but no way of checking to ++ * see what is really what. ++ */ ++ ++#ifdef SUSE_LINUX_2_4_19_IS_STUPID ++#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph) ++#else ++ ++/* simplest case, nothing */ ++#if !defined(IP_SELECT_IDENT) ++#define KLIPS_IP_SELECT_IDENT(iph, skb) do { iph->id = htons(ip_id_count++); } while(0) ++#endif ++ ++/* kernels > 2.3.37-ish */ ++#if defined(IP_SELECT_IDENT) && !defined(IP_SELECT_IDENT_NEW) ++#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph, skb->dst) ++#endif ++ ++/* kernels > 2.4.2 */ ++#if defined(IP_SELECT_IDENT) && defined(IP_SELECT_IDENT_NEW) ++#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph, skb->dst, NULL) ++#endif ++ ++#endif /* SUSE_LINUX_2_4_19_IS_STUPID */ ++ ++/* ++ * make klips fail test:east-espiv-01. ++ * exploit is at testing/attacks/espiv ++ * ++ */ ++#define KLIPS_IMPAIRMENT_ESPIV_CBC_ATTACK 0 ++ ++ ++/* IP_FRAGMENT_LINEARIZE is set in freeswan.h if Kernel > 2.4.4 */ ++#ifndef IP_FRAGMENT_LINEARIZE ++# define IP_FRAGMENT_LINEARIZE 0 ++#endif /* IP_FRAGMENT_LINEARIZE */ ++#endif /* __KERNEL__ */ ++ ++#ifdef NEED_INET_PROTOCOL ++#define inet_protocol net_protocol ++#endif ++ ++#if defined(CONFIG_IPSEC_NAT_TRAVERSAL) && CONFIG_IPSEC_NAT_TRAVERSAL ++#define NAT_TRAVERSAL 1 ++#else ++/* let people either #undef, or #define = 0 it */ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++#undef CONFIG_IPSEC_NAT_TRAVERSAL ++#endif ++#endif ++ ++#ifndef IPSEC_DEFAULT_TTL ++#define IPSEC_DEFAULT_TTL 64 ++#endif ++ ++#define _IPSEC_PARAM_H_ ++#endif /* _IPSEC_PARAM_H_ */ ++ ++/* ++ * $Log: ipsec_param.h,v $ ++ * Revision 1.29.6.5 2008-02-18 16:27:19 paul ++ * include linux/ip.h for struct iphdr (bug 814) - patch by sedrez ++ * ++ * Revision 1.29.6.4 2007/09/05 02:30:06 paul ++ * KLIPS_SATOT macro. Patch by David McCullough ++ * ++ * Revision 1.29.6.3 2006/05/01 14:32:31 mcr ++ * added KLIPS_ERROR and make sure that things work without CONFIG_KLIPS_REGRESS. ++ * ++ * Revision 1.29.6.2 2005/11/27 21:40:14 paul ++ * Pull down TTL fixes from head. this fixes "Unknown symbol sysctl_ip_default_ttl" ++ * in for klips as module. ++ * ++ * Revision 1.29.6.1 2005/08/12 16:24:18 ken ++ * Pull in NAT-T compile logic from HEAD ++ * ++ * Revision 1.29 2005/01/26 00:50:35 mcr ++ * adjustment of confusion of CONFIG_IPSEC_NAT vs CONFIG_KLIPS_NAT, ++ * and make sure that NAT_TRAVERSAL is set as well to match ++ * userspace compiles of code. ++ * ++ * Revision 1.28 2004/09/13 15:50:15 mcr ++ * spell NEED_INET properly, not NET_INET. ++ * ++ * Revision 1.27 2004/09/13 02:21:45 mcr ++ * always turn on IPIP mode. ++ * #define inet_protocol if necessary. ++ * ++ * Revision 1.26 2004/08/17 03:25:43 mcr ++ * freeswan->openswan. ++ * ++ * Revision 1.25 2004/07/10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.24 2004/04/05 19:55:06 mcr ++ * Moved from linux/include/freeswan/ipsec_param.h,v ++ * ++ * Revision 1.23 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.22 2003/10/31 02:27:05 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.21.4.1 2003/10/29 01:10:19 mcr ++ * elimited "struct sa_id" ++ * ++ * Revision 1.21 2003/04/03 17:38:18 rgb ++ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}. ++ * Change indentation for readability. ++ * ++ * Revision 1.20 2003/03/14 08:09:26 rgb ++ * Fixed up CONFIG_IPSEC_DYNDEV definitions. ++ * ++ * Revision 1.19 2003/01/30 02:31:43 rgb ++ * ++ * Rename SAref table macro names for clarity. ++ * ++ * Revision 1.18 2002/09/30 19:06:26 rgb ++ * Reduce default table to 16 bits width. ++ * ++ * Revision 1.17 2002/09/20 15:40:29 rgb ++ * Define switch to activate new SAref code. ++ * Prefix macros with "IPSEC_". ++ * Rework saref freelist. ++ * Restrict some bits to kernel context for use to klips utils. ++ * ++ * Revision 1.16 2002/09/20 05:00:31 rgb ++ * Define switch to divulge hmac keys for debugging. ++ * Added IPOPTIONS switch. ++ * ++ * Revision 1.15 2002/09/19 02:34:24 mcr ++ * define IPSEC_PROC_SUBDIRS if we are 2.4, and use that in ipsec_proc.c ++ * to decide if we are to create /proc/net/ipsec/. ++ * ++ * Revision 1.14 2002/08/30 01:20:54 mcr ++ * reorganized 2.0/2.2/2.4 procfs support macro so match ++ * 2.4 values/typedefs. ++ * ++ * Revision 1.13 2002/07/28 22:03:28 mcr ++ * added some documentation to SA_REF_* ++ * turned on fix for ESPIV attack, now that we have the attack code. ++ * ++ * Revision 1.12 2002/07/26 08:48:31 rgb ++ * Added SA ref table code. ++ * ++ * Revision 1.11 2002/07/23 02:57:45 rgb ++ * Define ARPHRD_VOID for < 2.4 kernels. ++ * ++ * Revision 1.10 2002/05/27 21:37:28 rgb ++ * Set the defaults sanely for those adventurous enough to try more than 1 ++ * digit of ipsec devices. ++ * ++ * Revision 1.9 2002/05/27 18:56:07 rgb ++ * Convert to dynamic ipsec device allocation. ++ * ++ * Revision 1.8 2002/04/24 07:36:47 mcr ++ * Moved from ./klips/net/ipsec/ipsec_param.h,v ++ * ++ * Revision 1.7 2002/04/20 00:12:25 rgb ++ * Added esp IV CBC attack fix, disabled. ++ * ++ * Revision 1.6 2002/01/29 02:11:42 mcr ++ * removal of kversions.h - sources that needed it now use ipsec_param.h. ++ * updating of IPv6 structures to match latest in6.h version. ++ * removed dead code from freeswan.h that also duplicated kversions.h ++ * code. ++ * ++ * Revision 1.5 2002/01/28 19:22:01 mcr ++ * by default, turn off LINEARIZE option ++ * (let kversions.h turn it on) ++ * ++ * Revision 1.4 2002/01/20 20:19:36 mcr ++ * renamed option to IP_FRAGMENT_LINEARIZE. ++ * ++ * Revision 1.3 2002/01/12 02:57:25 mcr ++ * first regression test causes acquire messages to be lost ++ * 100% of the time. This is to help testing of pluto. ++ * ++ * Revision 1.2 2001/11/26 09:16:14 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.3 2001/10/23 04:40:16 mcr ++ * added #define for DIVULGING session keys in debug output. ++ * ++ * Revision 1.1.2.2 2001/10/22 20:53:25 mcr ++ * added a define to control forcing of DES parity. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:20:19 mcr ++ * many common kernel configuration questions centralized. ++ * more things remain that should be moved from freeswan.h. ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_policy.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,217 @@ ++#ifndef _IPSEC_POLICY_H ++/* ++ * policy interface file between pluto and applications ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: ipsec_policy.h,v 1.7.6.1 2005-07-26 01:53:07 ken Exp $ ++ */ ++#define _IPSEC_POLICY_H /* seen it, no need to see it again */ ++ ++ ++/* ++ * this file defines an interface between an application (or rather an ++ * application library) and a key/policy daemon. It provides for inquiries ++ * as to the current state of a connected socket, as well as for general ++ * questions. ++ * ++ * In general, the interface is defined as a series of functional interfaces, ++ * and the policy messages should be internal. However, because this is in ++ * fact an ABI between pieces of the system that may get compiled and revised ++ * seperately, this ABI must be public and revision controlled. ++ * ++ * It is expected that the daemon will always support previous versions. ++ */ ++ ++#define IPSEC_POLICY_MSG_REVISION (unsigned)200305061 ++ ++enum ipsec_policy_command { ++ IPSEC_CMD_QUERY_FD = 1, ++ IPSEC_CMD_QUERY_HOSTPAIR = 2, ++ IPSEC_CMD_QUERY_DSTONLY = 3, ++}; ++ ++struct ipsec_policy_msg_head { ++ u_int32_t ipm_version; ++ u_int32_t ipm_msg_len; ++ u_int32_t ipm_msg_type; ++ u_int32_t ipm_msg_seq; ++}; ++ ++enum ipsec_privacy_quality { ++ IPSEC_PRIVACY_NONE = 0, ++ IPSEC_PRIVACY_INTEGRAL = 4, /* not private at all. AH-like */ ++ IPSEC_PRIVACY_UNKNOWN = 8, /* something is claimed, but details unavail */ ++ IPSEC_PRIVACY_ROT13 = 12, /* trivially breakable, i.e. 1DES */ ++ IPSEC_PRIVACY_GAK = 16, /* known eavesdroppers */ ++ IPSEC_PRIVACY_PRIVATE = 32, /* secure for at least a decade */ ++ IPSEC_PRIVACY_STRONG = 64, /* ridiculously secure */ ++ IPSEC_PRIVACY_TORTOISE = 192, /* even stronger, but very slow */ ++ IPSEC_PRIVACY_OTP = 224, /* some kind of *true* one time pad */ ++}; ++ ++enum ipsec_bandwidth_quality { ++ IPSEC_QOS_UNKNOWN = 0, /* unknown bandwidth */ ++ IPSEC_QOS_INTERACTIVE = 16, /* reasonably moderate jitter, moderate fast. ++ Good enough for telnet/ssh. */ ++ IPSEC_QOS_VOIP = 32, /* faster crypto, predicable jitter */ ++ IPSEC_QOS_FTP = 64, /* higher throughput crypto, perhaps hardware ++ offloaded, but latency/jitter may be bad */ ++ IPSEC_QOS_WIRESPEED = 128, /* expect to be able to fill your pipe */ ++}; ++ ++/* moved from programs/pluto/constants.h */ ++/* IPsec AH transform values ++ * RFC2407 The Internet IP security Domain of Interpretation for ISAKMP 4.4.3 ++ * and in http://www.iana.org/assignments/isakmp-registry ++ */ ++enum ipsec_authentication_algo { ++ AH_MD5=2, ++ AH_SHA=3, ++ AH_DES=4, ++ AH_SHA2_256=5, ++ AH_SHA2_384=6, ++ AH_SHA2_512=7 ++}; ++ ++/* IPsec ESP transform values ++ * RFC2407 The Internet IP security Domain of Interpretation for ISAKMP 4.4.4 ++ * and from http://www.iana.org/assignments/isakmp-registry ++ */ ++ ++enum ipsec_cipher_algo { ++ ESP_reserved=0, ++ ESP_DES_IV64=1, ++ ESP_DES=2, ++ ESP_3DES=3, ++ ESP_RC5=4, ++ ESP_IDEA=5, ++ ESP_CAST=6, ++ ESP_BLOWFISH=7, ++ ESP_3IDEA=8, ++ ESP_DES_IV32=9, ++ ESP_RC4=10, ++ ESP_NULL=11, ++ ESP_AES=12, /* 128 bit AES */ ++}; ++ ++/* IPCOMP transform values ++ * RFC2407 The Internet IP security Domain of Interpretation for ISAKMP 4.4.5 ++ */ ++ ++enum ipsec_comp_algo { ++ IPCOMP_OUI= 1, ++ IPCOMP_DEFLATE= 2, ++ IPCOMP_LZS= 3, ++ IPCOMP_V42BIS= 4 ++}; ++ ++/* Identification type values ++ * RFC 2407 The Internet IP security Domain of Interpretation for ISAKMP 4.6.2.1 ++ */ ++ ++enum ipsec_id_type { ++ ID_IMPOSSIBLE= (-2), /* private to Pluto */ ++ ID_MYID= (-1), /* private to Pluto */ ++ ID_NONE= 0, /* private to Pluto */ ++ ID_IPV4_ADDR= 1, ++ ID_FQDN= 2, ++ ID_USER_FQDN= 3, ++ ID_IPV4_ADDR_SUBNET= 4, ++ ID_IPV6_ADDR= 5, ++ ID_IPV6_ADDR_SUBNET= 6, ++ ID_IPV4_ADDR_RANGE= 7, ++ ID_IPV6_ADDR_RANGE= 8, ++ ID_DER_ASN1_DN= 9, ++ ID_DER_ASN1_GN= 10, ++ ID_KEY_ID= 11 ++}; ++ ++/* Certificate type values ++ * RFC 2408 ISAKMP, chapter 3.9 ++ */ ++enum ipsec_cert_type { ++ CERT_NONE= 0, /* none, or guess from file contents */ ++ CERT_PKCS7_WRAPPED_X509= 1, /* self-signed certificate from disk */ ++ CERT_PGP= 2, ++ CERT_DNS_SIGNED_KEY= 3, /* KEY RR from DNS */ ++ CERT_X509_SIGNATURE= 4, ++ CERT_X509_KEY_EXCHANGE= 5, ++ CERT_KERBEROS_TOKENS= 6, ++ CERT_CRL= 7, ++ CERT_ARL= 8, ++ CERT_SPKI= 9, ++ CERT_X509_ATTRIBUTE= 10, ++ CERT_RAW_RSA= 11, /* raw RSA from config file */ ++}; ++ ++/* a SIG record in ASCII */ ++struct ipsec_dns_sig { ++ char fqdn[256]; ++ char dns_sig[768]; /* empty string if not signed */ ++}; ++ ++struct ipsec_raw_key { ++ char id_name[256]; ++ char fs_keyid[8]; ++}; ++ ++struct ipsec_identity { ++ enum ipsec_id_type ii_type; ++ enum ipsec_cert_type ii_format; ++ union { ++ struct ipsec_dns_sig ipsec_dns_signed; ++ /* some thing for PGP */ ++ /* some thing for PKIX */ ++ struct ipsec_raw_key ipsec_raw_key; ++ } ii_credential; ++}; ++ ++#define IPSEC_MAX_CREDENTIALS 32 ++ ++struct ipsec_policy_cmd_query { ++ struct ipsec_policy_msg_head head; ++ ++ /* Query section */ ++ ip_address query_local; /* us */ ++ ip_address query_remote; /* them */ ++ u_int8_t proto; /* TCP, ICMP, etc. */ ++ u_short src_port, dst_port; ++ ++ /* Answer section */ ++ enum ipsec_privacy_quality strength; ++ enum ipsec_bandwidth_quality bandwidth; ++ enum ipsec_authentication_algo auth_detail; ++ enum ipsec_cipher_algo esp_detail; ++ enum ipsec_comp_algo comp_detail; ++ ++ int credential_count; ++ ++ struct ipsec_identity credentials[IPSEC_MAX_CREDENTIALS]; ++}; ++ ++#define IPSEC_POLICY_SOCKET "/var/run/pluto/pluto.info" ++ ++/* prototypes */ ++extern err_t ipsec_policy_lookup(int fd, struct ipsec_policy_cmd_query *result); ++extern err_t ipsec_policy_init(void); ++extern err_t ipsec_policy_final(void); ++extern err_t ipsec_policy_readmsg(int policysock, ++ unsigned char *buf, size_t buflen); ++extern err_t ipsec_policy_sendrecv(unsigned char *buf, size_t buflen); ++extern err_t ipsec_policy_cgilookup(struct ipsec_policy_cmd_query *result); ++ ++ ++extern const char *ipsec_policy_version_code(void); ++extern const char *ipsec_policy_version_string(void); ++ ++#endif /* _IPSEC_POLICY_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_proto.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,199 @@ ++/* ++ * @(#) prototypes for FreeSWAN functions ++ * ++ * Copyright (C) 2001 Richard Guy Briggs ++ * and Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_proto.h,v 1.14 2005-04-29 04:50:03 mcr Exp $ ++ * ++ */ ++ ++#ifndef _IPSEC_PROTO_H_ ++ ++#include "ipsec_param.h" ++ ++/* ++ * This file is a kernel only file that declares prototypes for ++ * all intra-module function calls and global data structures. ++ * ++ * Include this file last. ++ * ++ */ ++ ++/* forward references */ ++enum ipsec_direction; ++enum ipsec_life_type; ++struct ipsec_lifetime64; ++struct ident; ++struct sockaddr_encap; ++struct ipsec_sa; ++ ++/* ipsec_init.c */ ++extern struct prng ipsec_prng; ++ ++/* ipsec_sa.c */ ++extern struct ipsec_sa *ipsec_sadb_hash[SADB_HASHMOD]; ++extern spinlock_t tdb_lock; ++extern int ipsec_sadb_init(void); ++extern int ipsec_sadb_cleanup(__u8); ++ ++extern struct ipsec_sa *ipsec_sa_alloc(int*error); ++ ++ ++extern struct ipsec_sa *ipsec_sa_getbyid(ip_said *); ++extern int ipsec_sa_put(struct ipsec_sa *); ++extern /* void */ int ipsec_sa_del(struct ipsec_sa *); ++extern /* void */ int ipsec_sa_delchain(struct ipsec_sa *); ++extern /* void */ int ipsec_sa_add(struct ipsec_sa *); ++ ++extern int ipsec_sa_init(struct ipsec_sa *ipsp); ++extern int ipsec_sa_wipe(struct ipsec_sa *ipsp); ++ ++/* debug declarations */ ++ ++/* ipsec_proc.c */ ++extern int ipsec_proc_init(void); ++extern void ipsec_proc_cleanup(void); ++ ++/* ipsec_rcv.c */ ++extern int ipsec_rcv(struct sk_buff *skb); ++extern int klips26_rcv_encap(struct sk_buff *skb, __u16 encap_type); ++ ++/* ipsec_xmit.c */ ++struct ipsec_xmit_state; ++extern enum ipsec_xmit_value ipsec_xmit_sanity_check_dev(struct ipsec_xmit_state *ixs); ++extern enum ipsec_xmit_value ipsec_xmit_sanity_check_skb(struct ipsec_xmit_state *ixs); ++extern void ipsec_print_ip(struct iphdr *ip); ++ ++ ++ ++/* ipsec_radij.c */ ++extern int ipsec_makeroute(struct sockaddr_encap *ea, ++ struct sockaddr_encap *em, ++ ip_said said, ++ uint32_t pid, ++ struct sk_buff *skb, ++ struct ident *ident_s, ++ struct ident *ident_d); ++ ++extern int ipsec_breakroute(struct sockaddr_encap *ea, ++ struct sockaddr_encap *em, ++ struct sk_buff **first, ++ struct sk_buff **last); ++ ++int ipsec_radijinit(void); ++int ipsec_cleareroutes(void); ++int ipsec_radijcleanup(void); ++ ++/* ipsec_life.c */ ++extern enum ipsec_life_alive ipsec_lifetime_check(struct ipsec_lifetime64 *il64, ++ const char *lifename, ++ const char *saname, ++ enum ipsec_life_type ilt, ++ enum ipsec_direction idir, ++ struct ipsec_sa *ips); ++ ++ ++extern int ipsec_lifetime_format(char *buffer, ++ int buflen, ++ char *lifename, ++ enum ipsec_life_type timebaselife, ++ struct ipsec_lifetime64 *lifetime); ++ ++extern void ipsec_lifetime_update_hard(struct ipsec_lifetime64 *lifetime, ++ __u64 newvalue); ++ ++extern void ipsec_lifetime_update_soft(struct ipsec_lifetime64 *lifetime, ++ __u64 newvalue); ++ ++/* ipsec_snprintf.c */ ++extern int ipsec_snprintf(char * buf, ssize_t size, const char *fmt, ...); ++extern void ipsec_dmp_block(char *s, caddr_t bb, int len); ++ ++ ++/* ipsec_alg.c */ ++extern int ipsec_alg_init(void); ++ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ ++extern int debug_xform; ++extern int debug_eroute; ++extern int debug_spi; ++extern int debug_netlink; ++ ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ ++ ++ ++#define _IPSEC_PROTO_H ++#endif /* _IPSEC_PROTO_H_ */ ++ ++/* ++ * $Log: ipsec_proto.h,v $ ++ * Revision 1.14 2005-04-29 04:50:03 mcr ++ * prototypes for xmit and alg code. ++ * ++ * Revision 1.13 2005/04/17 03:46:07 mcr ++ * added prototypes for ipsec_rcv() routines. ++ * ++ * Revision 1.12 2005/04/14 20:28:37 mcr ++ * added additional prototypes. ++ * ++ * Revision 1.11 2005/04/14 01:16:28 mcr ++ * add prototypes for snprintf. ++ * ++ * Revision 1.10 2005/04/13 22:47:28 mcr ++ * make sure that forward references are available. ++ * ++ * Revision 1.9 2004/07/10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.8 2004/04/05 19:55:06 mcr ++ * Moved from linux/include/freeswan/ipsec_proto.h,v ++ * ++ * Revision 1.7 2003/10/31 02:27:05 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.6.30.1 2003/10/29 01:10:19 mcr ++ * elimited "struct sa_id" ++ * ++ * Revision 1.6 2002/05/23 07:13:48 rgb ++ * Added ipsec_sa_put() for releasing an ipsec_sa refcount. ++ * ++ * Revision 1.5 2002/05/14 02:36:40 rgb ++ * Converted reference from ipsec_sa_put to ipsec_sa_add to avoid confusion ++ * with "put" usage in the kernel. ++ * ++ * Revision 1.4 2002/04/24 07:36:47 mcr ++ * Moved from ./klips/net/ipsec/ipsec_proto.h,v ++ * ++ * Revision 1.3 2002/04/20 00:12:25 rgb ++ * Added esp IV CBC attack fix, disabled. ++ * ++ * Revision 1.2 2001/11/26 09:16:15 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:21:01 mcr ++ * ipsec_proto.h created to keep prototypes rather than deal with ++ * cyclic dependancies of structures and prototypes in .h files. ++ * ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_radij.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,179 @@ ++/* ++ * @(#) Definitions relevant to the IPSEC <> radij tree interfacing ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_radij.h,v 1.22 2004-07-10 19:08:41 mcr Exp $ ++ */ ++ ++#ifndef _IPSEC_RADIJ_H ++ ++#include ++ ++int ipsec_walk(char *); ++ ++int ipsec_rj_walker_procprint(struct radij_node *, void *); ++int ipsec_rj_walker_delete(struct radij_node *, void *); ++ ++/* This structure is used to pass information between ++ * ipsec_eroute_get_info and ipsec_rj_walker_procprint ++ * (through rj_walktree) and between calls of ipsec_rj_walker_procprint. ++ */ ++struct wsbuf ++{ ++ /* from caller of ipsec_eroute_get_info: */ ++ char *const buffer; /* start of buffer provided */ ++ const int length; /* length of buffer provided */ ++ const off_t offset; /* file position of first character of interest */ ++ /* accumulated by ipsec_rj_walker_procprint: */ ++ int len; /* number of character filled into buffer */ ++ off_t begin; /* file position contained in buffer[0] (<=offset) */ ++}; ++ ++extern struct radij_node_head *rnh; ++extern spinlock_t eroute_lock; ++ ++struct eroute * ipsec_findroute(struct sockaddr_encap *); ++ ++#define O1(x) (int)(((x)>>24)&0xff) ++#define O2(x) (int)(((x)>>16)&0xff) ++#define O3(x) (int)(((x)>>8)&0xff) ++#define O4(x) (int)(((x))&0xff) ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int debug_radij; ++void rj_dumptrees(void); ++ ++#define DB_RJ_DUMPTREES 0x0001 ++#define DB_RJ_FINDROUTE 0x0002 ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++#define _IPSEC_RADIJ_H ++#endif ++ ++/* ++ * $Log: ipsec_radij.h,v $ ++ * Revision 1.22 2004-07-10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.21 2004/04/29 11:06:42 ken ++ * Last bits from 2.06 procfs updates ++ * ++ * Revision 1.20 2004/04/06 02:49:08 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.19 2004/04/05 19:55:06 mcr ++ * Moved from linux/include/freeswan/ipsec_radij.h,v ++ * ++ * Revision 1.18 2002/04/24 07:36:47 mcr ++ * Moved from ./klips/net/ipsec/ipsec_radij.h,v ++ * ++ * Revision 1.17 2001/11/26 09:23:49 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.16.2.1 2001/09/25 02:21:17 mcr ++ * ipsec_proto.h created to keep prototypes rather than deal with ++ * cyclic dependancies of structures and prototypes in .h files. ++ * ++ * Revision 1.16 2001/09/15 16:24:04 rgb ++ * Re-inject first and last HOLD packet when an eroute REPLACE is done. ++ * ++ * Revision 1.15 2001/09/14 16:58:37 rgb ++ * Added support for storing the first and last packets through a HOLD. ++ * ++ * Revision 1.14 2001/09/08 21:13:32 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.13 2001/06/14 19:35:09 rgb ++ * Update copyright date. ++ * ++ * Revision 1.12 2001/05/27 06:12:11 rgb ++ * Added structures for pid, packet count and last access time to eroute. ++ * Added packet count to beginning of /proc/net/ipsec_eroute. ++ * ++ * Revision 1.11 2000/09/08 19:12:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.10 1999/11/17 15:53:39 rgb ++ * Changed all occurrences of #include "../../../lib/freeswan.h" ++ * to #include which works due to -Ilibfreeswan in the ++ * klips/net/ipsec/Makefile. ++ * ++ * Revision 1.9 1999/10/01 00:01:23 rgb ++ * Added eroute structure locking. ++ * ++ * Revision 1.8 1999/04/11 00:28:59 henry ++ * GPL boilerplate ++ * ++ * Revision 1.7 1999/04/06 04:54:26 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.6 1999/01/22 06:23:26 rgb ++ * Cruft clean-out. ++ * ++ * Revision 1.5 1998/10/25 02:42:08 rgb ++ * Change return type on ipsec_breakroute and ipsec_makeroute and add an ++ * argument to be able to transmit more infomation about errors. ++ * ++ * Revision 1.4 1998/10/19 14:44:29 rgb ++ * Added inclusion of freeswan.h. ++ * sa_id structure implemented and used: now includes protocol. ++ * ++ * Revision 1.3 1998/07/28 00:03:31 rgb ++ * Comment out temporary inet_nto4u() kluge. ++ * ++ * Revision 1.2 1998/07/14 18:22:00 rgb ++ * Add function to clear the eroute table. ++ * ++ * Revision 1.1 1998/06/18 21:27:49 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.5 1998/05/25 20:30:38 rgb ++ * Remove temporary ipsec_walk, rj_deltree and rj_delnodes functions. ++ * ++ * Rename ipsec_rj_walker (ipsec_walk) to ipsec_rj_walker_procprint and ++ * add ipsec_rj_walker_delete. ++ * ++ * Revision 1.4 1998/05/21 13:02:56 rgb ++ * Imported definitions from ipsec_radij.c and radij.c to support /proc 3k ++ * limit fix. ++ * ++ * Revision 1.3 1998/04/21 21:29:09 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.2 1998/04/14 17:30:39 rgb ++ * Fix up compiling errors for radij tree memory reclamation. ++ * ++ * Revision 1.1 1998/04/09 03:06:10 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:04 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * No changes. ++ * ++ * Revision 0.3 1996/11/20 14:39:04 ji ++ * Minor cleanups. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_rcv.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,199 @@ ++/* ++ * ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_rcv.h,v 1.28.2.2 2006-10-06 21:39:26 paul Exp $ ++ */ ++ ++#ifndef IPSEC_RCV_H ++#define IPSEC_RCV_H ++ ++#include "openswan/ipsec_auth.h" ++ ++#define DB_RX_PKTRX 0x0001 ++#define DB_RX_PKTRX2 0x0002 ++#define DB_RX_DMP 0x0004 ++#define DB_RX_IPSA 0x0010 ++#define DB_RX_XF 0x0020 ++#define DB_RX_IPAD 0x0040 ++#define DB_RX_INAU 0x0080 ++#define DB_RX_OINFO 0x0100 ++#define DB_RX_OINFO2 0x0200 ++#define DB_RX_OH 0x0400 ++#define DB_RX_REPLAY 0x0800 ++ ++#ifdef __KERNEL__ ++/* struct options; */ ++ ++#define __NO_VERSION__ ++#ifndef AUTOCONF_INCLUDED ++#include /* for CONFIG_IP_FORWARD */ ++#endif ++#ifdef CONFIG_MODULES ++#include ++#endif ++#include ++#include ++ ++#define IPSEC_BIRTH_TEMPLATE_MAXLEN 256 ++ ++struct ipsec_birth_reply { ++ int packet_template_len; ++ unsigned char packet_template[IPSEC_BIRTH_TEMPLATE_MAXLEN]; ++}; ++ ++extern struct ipsec_birth_reply ipsec_ipv4_birth_packet; ++extern struct ipsec_birth_reply ipsec_ipv6_birth_packet; ++ ++enum ipsec_rcv_value { ++ IPSEC_RCV_LASTPROTO=1, ++ IPSEC_RCV_OK=0, ++ IPSEC_RCV_BADPROTO=-1, ++ IPSEC_RCV_BADLEN=-2, ++ IPSEC_RCV_ESP_BADALG=-3, ++ IPSEC_RCV_3DES_BADBLOCKING=-4, ++ IPSEC_RCV_ESP_DECAPFAIL=-5, ++ IPSEC_RCV_DECAPFAIL=-6, ++ IPSEC_RCV_SAIDNOTFOUND=-7, ++ IPSEC_RCV_IPCOMPALONE=-8, ++ IPSEC_RCV_IPCOMPFAILED=-10, ++ IPSEC_RCV_SAIDNOTLIVE=-11, ++ IPSEC_RCV_FAILEDINBOUND=-12, ++ IPSEC_RCV_LIFETIMEFAILED=-13, ++ IPSEC_RCV_BADAUTH=-14, ++ IPSEC_RCV_REPLAYFAILED=-15, ++ IPSEC_RCV_AUTHFAILED=-16, ++ IPSEC_RCV_REPLAYROLLED=-17, ++ IPSEC_RCV_BAD_DECRYPT=-18 ++}; ++ ++struct ipsec_rcv_state { ++ struct sk_buff *skb; ++ struct net_device_stats *stats; ++ struct iphdr *ipp; /* the IP header */ ++ struct ipsec_sa *ipsp; /* current SA being processed */ ++ int len; /* length of packet */ ++ int ilen; /* length of inner payload (-authlen) */ ++ int authlen; /* how big is the auth data at end */ ++ int hard_header_len; /* layer 2 size */ ++ int iphlen; /* how big is IP header */ ++ struct auth_alg *authfuncs; ++ ip_said said; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ __u8 next_header; ++ __u8 hash[AH_AMAX]; ++ char ipsaddr_txt[ADDRTOA_BUF]; ++ char ipdaddr_txt[ADDRTOA_BUF]; ++ __u8 *octx; ++ __u8 *ictx; ++ int ictx_len; ++ int octx_len; ++ union { ++ struct { ++ struct esphdr *espp; ++ } espstuff; ++ struct { ++ struct ahhdr *ahp; ++ } ahstuff; ++ struct { ++ struct ipcomphdr *compp; ++ } ipcompstuff; ++ } protostuff; ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ __u8 natt_type; ++ __u16 natt_sport; ++ __u16 natt_dport; ++ int natt_len; ++#endif ++}; ++ ++extern int ++#ifdef PROTO_HANDLER_SINGLE_PARM ++ipsec_rcv(struct sk_buff *skb); ++#else /* PROTO_HANDLER_SINGLE_PARM */ ++ipsec_rcv(struct sk_buff *skb, ++ unsigned short xlen); ++#endif /* PROTO_HANDLER_SINGLE_PARM */ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int debug_rcv; ++#define ipsec_rcv_dmp(_x,_y, _z) if (debug_rcv && sysctl_ipsec_debug_verbose) ipsec_dmp_block(_x,_y,_z) ++#else ++#define ipsec_rcv_dmp(_x,_y, _z) do {} while(0) ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++extern int sysctl_ipsec_inbound_policy_check; ++#endif /* __KERNEL__ */ ++ ++extern int klips26_rcv_encap(struct sk_buff *skb, __u16 encap_type); ++ ++ ++#endif /* IPSEC_RCV_H */ ++ ++/* ++ * $Log: ipsec_rcv.h,v $ ++ * Revision 1.28.2.2 2006-10-06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.28.2.1 2006/07/10 15:52:20 paul ++ * Fix for bug #642 by Bart Trojanowski ++ * ++ * Revision 1.28 2005/05/11 00:59:45 mcr ++ * do not call debug routines if !defined KLIPS_DEBUG. ++ * ++ * Revision 1.27 2005/04/29 04:59:46 mcr ++ * use ipsec_dmp_block. ++ * ++ * Revision 1.26 2005/04/13 22:48:35 mcr ++ * added comments, and removed some log. ++ * removed Linux 2.0 support. ++ * ++ * Revision 1.25 2005/04/08 18:25:37 mcr ++ * prototype klips26 encap receive function ++ * ++ * Revision 1.24 2004/08/20 21:45:37 mcr ++ * CONFIG_KLIPS_NAT_TRAVERSAL is not used in an attempt to ++ * be 26sec compatible. But, some defines where changed. ++ * ++ * Revision 1.23 2004/08/03 18:17:40 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.22 2004/07/10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.21 2004/04/06 02:49:08 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.20 2004/04/05 19:55:06 mcr ++ * Moved from linux/include/freeswan/ipsec_rcv.h,v ++ * ++ * Revision 1.19 2003/12/15 18:13:09 mcr ++ * when compiling with NAT traversal, don't assume that the ++ * kernel has been patched, unless CONFIG_IPSEC_NAT_NON_ESP ++ * is set. ++ * ++ * history elided 2005-04-12. ++ * ++ * Local Variables: ++ * c-basic-offset:8 ++ * c-style:linux ++ * End: ++ * ++ */ ++ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_sa.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,361 @@ ++/* ++ * @(#) Definitions of IPsec Security Association (ipsec_sa) ++ * ++ * Copyright (C) 2001, 2002, 2003 ++ * Richard Guy Briggs ++ * and Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_sa.h,v 1.23.2.1 2007-09-05 02:31:15 paul Exp $ ++ * ++ * This file derived from ipsec_xform.h on 2001/9/18 by mcr. ++ * ++ */ ++ ++/* ++ * This file describes the IPsec Security Association Structure. ++ * ++ * This structure keeps track of a single transform that may be done ++ * to a set of packets. It can describe applying the transform or ++ * apply the reverse. (e.g. compression vs expansion). However, it ++ * only describes one at a time. To describe both, two structures would ++ * be used, but since the sides of the transform are performed ++ * on different machines typically it is usual to have only one side ++ * of each association. ++ * ++ */ ++ ++#ifndef _IPSEC_SA_H_ ++ ++#ifdef __KERNEL__ ++#include "openswan/ipsec_stats.h" ++#include "openswan/ipsec_life.h" ++#include "openswan/ipsec_eroute.h" ++#endif /* __KERNEL__ */ ++#include "openswan/ipsec_param.h" ++ ++#include "pfkeyv2.h" ++ ++ ++/* SAs are held in a table. ++ * Entries in this table are referenced by IPsecSAref_t values. ++ * IPsecSAref_t values are conceptually subscripts. Because ++ * we want to allocate the table piece-meal, the subscripting ++ * is implemented with two levels, a bit like paged virtual memory. ++ * This representation mechanism is known as an Iliffe Vector. ++ * ++ * The Main table (AKA the refTable) consists of 2^IPSEC_SA_REF_MAINTABLE_IDX_WIDTH ++ * pointers to subtables. ++ * Each subtable has 2^IPSEC_SA_REF_SUBTABLE_IDX_WIDTH entries, each of which ++ * is a pointer to an SA. ++ * ++ * An IPsecSAref_t contains either an exceptional value (signified by the ++ * high-order bit being on) or a reference to a table entry. A table entry ++ * reference has the subtable subscript in the low-order ++ * IPSEC_SA_REF_SUBTABLE_IDX_WIDTH bits and the Main table subscript ++ * in the next lowest IPSEC_SA_REF_MAINTABLE_IDX_WIDTH bits. ++ * ++ * The Maintable entry for an IPsecSAref_t x, a pointer to its subtable, is ++ * IPsecSAref2table(x). It is of type struct IPsecSArefSubTable *. ++ * ++ * The pointer to the SA for x is IPsecSAref2SA(x). It is of type ++ * struct ipsec_sa*. The macro definition clearly shows the two-level ++ * access needed to find the SA pointer. ++ * ++ * The Maintable is allocated when IPsec is initialized. ++ * Each subtable is allocated when needed, but the first is allocated ++ * when IPsec is initialized. ++ * ++ * IPsecSAref_t is designed to be smaller than an NFmark so that ++ * they can be stored in NFmarks and still leave a few bits for other ++ * purposes. The spare bits are in the low order of the NFmark ++ * but in the high order of the IPsecSAref_t, so conversion is required. ++ * We pick the upper bits of NFmark on the theory that they are less likely to ++ * interfere with more pedestrian uses of nfmark. ++ */ ++ ++ ++typedef unsigned short int IPsecRefTableUnusedCount; ++ ++#define IPSEC_SA_REF_TABLE_NUM_ENTRIES (1 << IPSEC_SA_REF_TABLE_IDX_WIDTH) ++ ++#ifdef __KERNEL__ ++#if ((IPSEC_SA_REF_TABLE_IDX_WIDTH - (1 + IPSEC_SA_REF_MAINTABLE_IDX_WIDTH)) < 0) ++#error "IPSEC_SA_REF_TABLE_IDX_WIDTH("IPSEC_SA_REF_TABLE_IDX_WIDTH") MUST be < 1 + IPSEC_SA_REF_MAINTABLE_IDX_WIDTH("IPSEC_SA_REF_MAINTABLE_IDX_WIDTH")" ++#endif ++ ++#define IPSEC_SA_REF_SUBTABLE_IDX_WIDTH (IPSEC_SA_REF_TABLE_IDX_WIDTH - IPSEC_SA_REF_MAINTABLE_IDX_WIDTH) ++ ++#define IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES (1 << IPSEC_SA_REF_MAINTABLE_IDX_WIDTH) ++#define IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES (1 << IPSEC_SA_REF_SUBTABLE_IDX_WIDTH) ++ ++#ifdef CONFIG_NETFILTER ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) ++#define nfmark mark ++#endif ++#define IPSEC_SA_REF_HOST_FIELD(x) ((struct sk_buff*)(x))->nfmark ++#define IPSEC_SA_REF_HOST_FIELD_TYPE typeof(IPSEC_SA_REF_HOST_FIELD(NULL)) ++#else /* CONFIG_NETFILTER */ ++/* just make it work for now, it doesn't matter, since there is no nfmark */ ++#define IPSEC_SA_REF_HOST_FIELD_TYPE unsigned long ++#endif /* CONFIG_NETFILTER */ ++#define IPSEC_SA_REF_HOST_FIELD_WIDTH (8 * sizeof(IPSEC_SA_REF_HOST_FIELD_TYPE)) ++#define IPSEC_SA_REF_FIELD_WIDTH (8 * sizeof(IPsecSAref_t)) ++ ++#define IPSEC_SA_REF_MASK (IPSEC_SAREF_NULL >> (IPSEC_SA_REF_FIELD_WIDTH - IPSEC_SA_REF_TABLE_IDX_WIDTH)) ++#define IPSEC_SA_REF_TABLE_MASK ((IPSEC_SAREF_NULL >> (IPSEC_SA_REF_FIELD_WIDTH - IPSEC_SA_REF_MAINTABLE_IDX_WIDTH)) << IPSEC_SA_REF_SUBTABLE_IDX_WIDTH) ++#define IPSEC_SA_REF_ENTRY_MASK (IPSEC_SAREF_NULL >> (IPSEC_SA_REF_FIELD_WIDTH - IPSEC_SA_REF_SUBTABLE_IDX_WIDTH)) ++ ++#define IPsecSAref2table(x) (((x) & IPSEC_SA_REF_TABLE_MASK) >> IPSEC_SA_REF_SUBTABLE_IDX_WIDTH) ++#define IPsecSAref2entry(x) ((x) & IPSEC_SA_REF_ENTRY_MASK) ++#define IPsecSArefBuild(x,y) (((x) << IPSEC_SA_REF_SUBTABLE_IDX_WIDTH) + (y)) ++ ++#define IPsecSAref2SA(x) (ipsec_sadb.refTable[IPsecSAref2table(x)]->entry[IPsecSAref2entry(x)]) ++#define IPsecSA2SAref(x) ((x)->ips_ref) ++ ++#define EMT_INBOUND 0x01 /* SA direction, 1=inbound */ ++ ++/* 'struct ipsec_sa' should be 64bit aligned when allocated. */ ++struct ipsec_sa ++{ ++ IPsecSAref_t ips_ref; /* reference table entry number */ ++ atomic_t ips_refcount; /* reference count for this struct */ ++ struct ipsec_sa *ips_hnext; /* next in hash chain */ ++ struct ipsec_sa *ips_inext; /* pointer to next xform */ ++ struct ipsec_sa *ips_onext; /* pointer to prev xform */ ++ ++ struct ifnet *ips_rcvif; /* related rcv encap interface */ ++ ++ ip_said ips_said; /* SA ID */ ++ ++ __u32 ips_seq; /* seq num of msg that initiated this SA */ ++ __u32 ips_pid; /* PID of process that initiated this SA */ ++ __u8 ips_authalg; /* auth algorithm for this SA */ ++ __u8 ips_encalg; /* enc algorithm for this SA */ ++ ++ struct ipsec_stats ips_errs; ++ ++ __u8 ips_replaywin; /* replay window size */ ++ enum sadb_sastate ips_state; /* state of SA */ ++ __u32 ips_replaywin_lastseq; /* last pkt sequence num */ ++ __u64 ips_replaywin_bitmap; /* bitmap of received pkts */ ++ __u32 ips_replaywin_maxdiff; /* max pkt sequence difference */ ++ ++ __u32 ips_flags; /* generic xform flags */ ++ ++ ++ struct ipsec_lifetimes ips_life; /* lifetime records */ ++ ++ /* selector information */ ++ __u8 ips_transport_protocol; /* protocol for this SA, if ports are involved */ ++ struct sockaddr*ips_addr_s; /* src sockaddr */ ++ struct sockaddr*ips_addr_d; /* dst sockaddr */ ++ struct sockaddr*ips_addr_p; /* proxy sockaddr */ ++ __u16 ips_addr_s_size; ++ __u16 ips_addr_d_size; ++ __u16 ips_addr_p_size; ++ ip_address ips_flow_s; ++ ip_address ips_flow_d; ++ ip_address ips_mask_s; ++ ip_address ips_mask_d; ++ ++ __u16 ips_key_bits_a; /* size of authkey in bits */ ++ __u16 ips_auth_bits; /* size of authenticator in bits */ ++ __u16 ips_key_bits_e; /* size of enckey in bits */ ++ __u16 ips_iv_bits; /* size of IV in bits */ ++ __u8 ips_iv_size; ++ __u16 ips_key_a_size; ++ __u16 ips_key_e_size; ++ ++ caddr_t ips_key_a; /* authentication key */ ++ caddr_t ips_key_e; /* encryption key */ ++ caddr_t ips_iv; /* Initialisation Vector */ ++ ++ struct ident ips_ident_s; /* identity src */ ++ struct ident ips_ident_d; /* identity dst */ ++ ++ /* these are included even if CONFIG_KLIPS_IPCOMP is off */ ++ __u16 ips_comp_adapt_tries; /* ipcomp self-adaption tries */ ++ __u16 ips_comp_adapt_skip; /* ipcomp self-adaption to-skip */ ++ __u64 ips_comp_ratio_cbytes; /* compressed bytes */ ++ __u64 ips_comp_ratio_dbytes; /* decompressed (or uncompressed) bytes */ ++ ++ /* these are included even if CONFIG_IPSEC_NAT_TRAVERSAL is off */ ++ __u8 ips_natt_type; ++ __u8 ips_natt_reserved[3]; ++ __u16 ips_natt_sport; ++ __u16 ips_natt_dport; ++ ++ struct sockaddr *ips_natt_oa; ++ __u16 ips_natt_oa_size; ++ __u16 ips_natt_reserved2; ++ ++#if 0 ++ __u32 ips_sens_dpd; ++ __u8 ips_sens_sens_level; ++ __u8 ips_sens_sens_len; ++ __u64* ips_sens_sens_bitmap; ++ __u8 ips_sens_integ_level; ++ __u8 ips_sens_integ_len; ++ __u64* ips_sens_integ_bitmap; ++#endif ++ struct ipsec_alg_enc *ips_alg_enc; ++ struct ipsec_alg_auth *ips_alg_auth; ++ IPsecSAref_t ips_ref_rel; ++}; ++ ++struct IPsecSArefSubTable ++{ ++ struct ipsec_sa* entry[IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES]; ++}; ++ ++struct ipsec_sadb { ++ struct IPsecSArefSubTable* refTable[IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES]; ++ IPsecSAref_t refFreeList[IPSEC_SA_REF_FREELIST_NUM_ENTRIES]; ++ int refFreeListHead; ++ int refFreeListTail; ++ IPsecSAref_t refFreeListCont; ++ IPsecSAref_t said_hash[SADB_HASHMOD]; ++ spinlock_t sadb_lock; ++}; ++ ++extern struct ipsec_sadb ipsec_sadb; ++ ++extern int ipsec_SAref_recycle(void); ++extern int ipsec_SArefSubTable_alloc(unsigned table); ++extern int ipsec_saref_freelist_init(void); ++extern int ipsec_sadb_init(void); ++extern struct ipsec_sa *ipsec_sa_alloc(int*error); /* pass in error var by pointer */ ++extern IPsecSAref_t ipsec_SAref_alloc(int*erorr); /* pass in error var by pointer */ ++extern int ipsec_sa_free(struct ipsec_sa* ips); ++extern int ipsec_sa_put(struct ipsec_sa *ips); ++extern int ipsec_sa_add(struct ipsec_sa *ips); ++extern int ipsec_sa_del(struct ipsec_sa *ips); ++extern int ipsec_sa_delchain(struct ipsec_sa *ips); ++extern int ipsec_sadb_cleanup(__u8 proto); ++extern int ipsec_sadb_free(void); ++extern int ipsec_sa_wipe(struct ipsec_sa *ips); ++#endif /* __KERNEL__ */ ++ ++enum ipsec_direction { ++ ipsec_incoming = 1, ++ ipsec_outgoing = 2 ++}; ++ ++#define _IPSEC_SA_H_ ++#endif /* _IPSEC_SA_H_ */ ++ ++/* ++ * $Log: ipsec_sa.h,v $ ++ * Revision 1.23.2.1 2007-09-05 02:31:15 paul ++ * Fix an nfmark -> mark occurance. Patch by David McCullough ++ * ++ * Revision 1.23 2005/05/11 01:18:59 mcr ++ * do not change structure based upon options, to avoid ++ * too many #ifdef. ++ * ++ * Revision 1.22 2005/04/14 01:17:09 mcr ++ * change sadb_state to an enum. ++ * ++ * Revision 1.21 2004/08/20 21:45:37 mcr ++ * CONFIG_KLIPS_NAT_TRAVERSAL is not used in an attempt to ++ * be 26sec compatible. But, some defines where changed. ++ * ++ * Revision 1.20 2004/07/10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.19 2004/04/05 19:55:06 mcr ++ * Moved from linux/include/freeswan/ipsec_sa.h,v ++ * ++ * Revision 1.18 2004/04/05 19:41:05 mcr ++ * merged alg-branch code. ++ * ++ * Revision 1.17.2.1 2003/12/22 15:25:52 jjo ++ * . Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.17 2003/12/10 01:20:06 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.16 2003/10/31 02:27:05 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.15.4.1 2003/10/29 01:10:19 mcr ++ * elimited "struct sa_id" ++ * ++ * Revision 1.15 2003/05/11 00:53:09 mcr ++ * IPsecSAref_t and macros were moved to freeswan.h. ++ * ++ * Revision 1.14 2003/02/12 19:31:55 rgb ++ * Fixed bug in "file seen" machinery. ++ * Updated copyright year. ++ * ++ * Revision 1.13 2003/01/30 02:31:52 rgb ++ * ++ * Re-wrote comments describing SAref system for accuracy. ++ * Rename SAref table macro names for clarity. ++ * Convert IPsecSAref_t from signed to unsigned to fix apparent SAref exhaustion bug. ++ * Transmit error code through to caller from callee for better diagnosis of problems. ++ * Enclose all macro arguments in parens to avoid any possible obscrure bugs. ++ * ++ * Revision 1.12 2002/10/07 18:31:19 rgb ++ * Change comment to reflect the flexible nature of the main and sub-table widths. ++ * Added a counter for the number of unused entries in each subtable. ++ * Further break up host field type macro to host field. ++ * Move field width sanity checks to ipsec_sa.c ++ * Define a mask for an entire saref. ++ * ++ * Revision 1.11 2002/09/20 15:40:33 rgb ++ * Re-write most of the SAref macros and types to eliminate any pointer references to Entrys. ++ * Fixed SAref/nfmark macros. ++ * Rework saref freeslist. ++ * Place all ipsec sadb globals into one struct. ++ * Restrict some bits to kernel context for use to klips utils. ++ * ++ * Revision 1.10 2002/09/20 05:00:34 rgb ++ * Update copyright date. ++ * ++ * Revision 1.9 2002/09/17 17:19:29 mcr ++ * make it compile even if there is no netfilter - we lost ++ * functionality, but it works, especially on 2.2. ++ * ++ * Revision 1.8 2002/07/28 22:59:53 mcr ++ * clarified/expanded one comment. ++ * ++ * Revision 1.7 2002/07/26 08:48:31 rgb ++ * Added SA ref table code. ++ * ++ * Revision 1.6 2002/05/31 17:27:48 rgb ++ * Comment fix. ++ * ++ * Revision 1.5 2002/05/27 18:55:03 rgb ++ * Remove final vistiges of tdb references via IPSEC_KLIPS1_COMPAT. ++ * ++ * Revision 1.4 2002/05/23 07:13:36 rgb ++ * Convert "usecount" to "refcount" to remove ambiguity. ++ * ++ * Revision 1.3 2002/04/24 07:36:47 mcr ++ * Moved from ./klips/net/ipsec/ipsec_sa.h,v ++ * ++ * Revision 1.2 2001/11/26 09:16:15 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:24:58 mcr ++ * struct tdb -> struct ipsec_sa. ++ * sa(tdb) manipulation functions renamed and moved to ipsec_sa.c ++ * ipsec_xform.c removed. header file still contains useful things. ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_sha1.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,79 @@ ++/* ++ * RCSID $Id: ipsec_sha1.h,v 1.8 2004-04-05 19:55:07 mcr Exp $ ++ */ ++ ++/* ++ * Here is the original comment from the distribution: ++ ++SHA-1 in C ++By Steve Reid ++100% Public Domain ++ ++ * Adapted for use by the IPSEC code by John Ioannidis ++ */ ++ ++ ++#ifndef _IPSEC_SHA1_H_ ++#define _IPSEC_SHA1_H_ ++ ++typedef struct ++{ ++ __u32 state[5]; ++ __u32 count[2]; ++ __u8 buffer[64]; ++} SHA1_CTX; ++ ++void SHA1Transform(__u32 state[5], __u8 buffer[64]); ++void SHA1Init(void *context); ++void SHA1Update(void *context, unsigned char *data, __u32 len); ++void SHA1Final(unsigned char digest[20], void *context); ++ ++ ++#endif /* _IPSEC_SHA1_H_ */ ++ ++/* ++ * $Log: ipsec_sha1.h,v $ ++ * Revision 1.8 2004-04-05 19:55:07 mcr ++ * Moved from linux/include/freeswan/ipsec_sha1.h,v ++ * ++ * Revision 1.7 2002/09/10 01:45:09 mcr ++ * changed type of MD5_CTX and SHA1_CTX to void * so that ++ * the function prototypes would match, and could be placed ++ * into a pointer to a function. ++ * ++ * Revision 1.6 2002/04/24 07:36:47 mcr ++ * Moved from ./klips/net/ipsec/ipsec_sha1.h,v ++ * ++ * Revision 1.5 1999/12/13 13:59:13 rgb ++ * Quick fix to argument size to Update bugs. ++ * ++ * Revision 1.4 1999/12/07 18:16:23 rgb ++ * Fixed comments at end of #endif lines. ++ * ++ * Revision 1.3 1999/04/06 04:54:27 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.2 1998/11/30 13:22:54 rgb ++ * Rationalised all the klips kernel file headers. They are much shorter ++ * now and won't conflict under RH5.2. ++ * ++ * Revision 1.1 1998/06/18 21:27:50 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.2 1998/04/23 20:54:05 rgb ++ * Fixed md5 and sha1 include file nesting issues, to be cleaned up when ++ * verified. ++ * ++ * Revision 1.1 1998/04/09 03:04:21 henry ++ * sources moved up from linux/net/ipsec ++ * these two include files modified not to include others except in kernel ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:04 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * New transform ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_stats.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,76 @@ ++/* ++ * @(#) definition of ipsec_stats structure ++ * ++ * Copyright (C) 2001 Richard Guy Briggs ++ * and Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_stats.h,v 1.7 2005-04-14 01:17:45 mcr Exp $ ++ * ++ */ ++ ++/* ++ * This file describes the errors/statistics that FreeSWAN collects. ++ */ ++ ++#ifndef _IPSEC_STATS_H_ ++ ++struct ipsec_stats { ++ __u32 ips_alg_errs; /* number of algorithm errors */ ++ __u32 ips_auth_errs; /* # of authentication errors */ ++ __u32 ips_encsize_errs; /* # of encryption size errors*/ ++ __u32 ips_encpad_errs; /* # of encryption pad errors*/ ++ __u32 ips_replaywin_errs; /* # of pkt sequence errors */ ++}; ++ ++#define _IPSEC_STATS_H_ ++#endif /* _IPSEC_STATS_H_ */ ++ ++/* ++ * $Log: ipsec_stats.h,v $ ++ * Revision 1.7 2005-04-14 01:17:45 mcr ++ * add prototypes for snprintf. ++ * ++ * Revision 1.6 2004/04/05 19:55:07 mcr ++ * Moved from linux/include/freeswan/ipsec_stats.h,v ++ * ++ * Revision 1.5 2004/04/05 19:41:05 mcr ++ * merged alg-branch code. ++ * ++ * Revision 1.4 2004/03/28 20:27:19 paul ++ * Included tested and confirmed fixes mcr made and dhr verified for ++ * snprint statements. Changed one other snprintf to use ipsec_snprintf ++ * so it wouldnt break compatibility with 2.0/2.2 kernels. Verified with ++ * dhr. (thanks dhr!) ++ * ++ * Revision 1.4 2004/03/24 01:58:31 mcr ++ * sprintf->snprintf for formatting into proc buffer. ++ * ++ * Revision 1.3.34.1 2004/04/05 04:30:46 mcr ++ * patches for alg-branch to compile/work with 2.x openswan ++ * ++ * Revision 1.3 2002/04/24 07:36:47 mcr ++ * Moved from ./klips/net/ipsec/ipsec_stats.h,v ++ * ++ * Revision 1.2 2001/11/26 09:16:16 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:27:00 mcr ++ * statistics moved to seperate structure. ++ * ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_tunnel.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,280 @@ ++/* ++ * IPSEC tunneling code ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_tunnel.h,v 1.33 2005-06-04 16:06:05 mcr Exp $ ++ */ ++ ++ ++#ifdef NET_21 ++# define DEV_QUEUE_XMIT(skb, device, pri) {\ ++ skb->dev = device; \ ++ neigh_compat_output(skb); \ ++ /* skb->dst->output(skb); */ \ ++ } ++# define ICMP_SEND(skb_in, type, code, info, dev) \ ++ icmp_send(skb_in, type, code, htonl(info)) ++# define IP_SEND(skb, dev) \ ++ ip_send(skb); ++#else /* NET_21 */ ++# define DEV_QUEUE_XMIT(skb, device, pri) {\ ++ dev_queue_xmit(skb, device, pri); \ ++ } ++# define ICMP_SEND(skb_in, type, code, info, dev) \ ++ icmp_send(skb_in, type, code, info, dev) ++# define IP_SEND(skb, dev) \ ++ if(ntohs(iph->tot_len) > physmtu) { \ ++ ip_fragment(NULL, skb, dev, 0); \ ++ ipsec_kfree_skb(skb); \ ++ } else { \ ++ dev_queue_xmit(skb, dev, SOPRI_NORMAL); \ ++ } ++#endif /* NET_21 */ ++ ++ ++/* ++ * Heavily based on drivers/net/new_tunnel.c. Lots ++ * of ideas also taken from the 2.1.x version of drivers/net/shaper.c ++ */ ++ ++struct ipsectunnelconf ++{ ++ __u32 cf_cmd; ++ union ++ { ++ char cfu_name[12]; ++ } cf_u; ++#define cf_name cf_u.cfu_name ++}; ++ ++#define IPSEC_SET_DEV (SIOCDEVPRIVATE) ++#define IPSEC_DEL_DEV (SIOCDEVPRIVATE + 1) ++#define IPSEC_CLR_DEV (SIOCDEVPRIVATE + 2) ++ ++#ifdef __KERNEL__ ++#include ++#ifndef KERNEL_VERSION ++# define KERNEL_VERSION(x,y,z) (((x)<<16)+((y)<<8)+(z)) ++#endif ++struct ipsecpriv ++{ ++ struct sk_buff_head sendq; ++ struct net_device *dev; ++ struct wait_queue *wait_queue; ++ char locked; ++ int (*hard_start_xmit) (struct sk_buff *skb, ++ struct net_device *dev); ++ int (*hard_header) (struct sk_buff *skb, ++ struct net_device *dev, ++ unsigned short type, ++ void *daddr, ++ void *saddr, ++ unsigned len); ++#ifdef NET_21 ++ int (*rebuild_header)(struct sk_buff *skb); ++#else /* NET_21 */ ++ int (*rebuild_header)(void *buff, struct net_device *dev, ++ unsigned long raddr, struct sk_buff *skb); ++#endif /* NET_21 */ ++ int (*set_mac_address)(struct net_device *dev, void *addr); ++#ifndef NET_21 ++ void (*header_cache_bind)(struct hh_cache **hhp, struct net_device *dev, ++ unsigned short htype, __u32 daddr); ++#endif /* !NET_21 */ ++ void (*header_cache_update)(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr); ++ struct net_device_stats *(*get_stats)(struct net_device *dev); ++ struct net_device_stats mystats; ++ int mtu; /* What is the desired MTU? */ ++}; ++ ++extern char ipsec_tunnel_c_version[]; ++ ++extern struct net_device *ipsecdevices[IPSEC_NUM_IF]; ++ ++int ipsec_tunnel_init_devices(void); ++ ++/* void */ int ipsec_tunnel_cleanup_devices(void); ++ ++extern /* void */ int ipsec_init(void); ++ ++extern int ipsec_tunnel_start_xmit(struct sk_buff *skb, struct net_device *dev); ++extern struct net_device *ipsec_get_device(int inst); ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int debug_tunnel; ++extern int sysctl_ipsec_debug_verbose; ++#endif /* CONFIG_KLIPS_DEBUG */ ++#endif /* __KERNEL__ */ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++#define DB_TN_INIT 0x0001 ++#define DB_TN_PROCFS 0x0002 ++#define DB_TN_XMIT 0x0010 ++#define DB_TN_OHDR 0x0020 ++#define DB_TN_CROUT 0x0040 ++#define DB_TN_OXFS 0x0080 ++#define DB_TN_REVEC 0x0100 ++#define DB_TN_ENCAP 0x0200 ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++/* ++ * $Log: ipsec_tunnel.h,v $ ++ * Revision 1.33 2005-06-04 16:06:05 mcr ++ * better patch for nat-t rcv-device code. ++ * ++ * Revision 1.32 2005/05/21 03:18:35 mcr ++ * added additional debug flag tunnelling. ++ * ++ * Revision 1.31 2004/08/03 18:18:02 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.30 2004/07/10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.29 2004/04/05 19:55:07 mcr ++ * Moved from linux/include/freeswan/ipsec_tunnel.h,v ++ * ++ * Revision 1.28 2003/06/24 20:22:32 mcr ++ * added new global: ipsecdevices[] so that we can keep track of ++ * the ipsecX devices. They will be referenced with dev_hold(), ++ * so 2.2 may need this as well. ++ * ++ * Revision 1.27 2003/04/03 17:38:09 rgb ++ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}. ++ * ++ * Revision 1.26 2003/02/12 19:32:20 rgb ++ * Updated copyright year. ++ * ++ * Revision 1.25 2002/05/27 18:56:07 rgb ++ * Convert to dynamic ipsec device allocation. ++ * ++ * Revision 1.24 2002/04/24 07:36:48 mcr ++ * Moved from ./klips/net/ipsec/ipsec_tunnel.h,v ++ * ++ * Revision 1.23 2001/11/06 19:50:44 rgb ++ * Moved IP_SEND, ICMP_SEND, DEV_QUEUE_XMIT macros to ipsec_tunnel.h for ++ * use also by pfkey_v2_parser.c ++ * ++ * Revision 1.22 2001/09/15 16:24:05 rgb ++ * Re-inject first and last HOLD packet when an eroute REPLACE is done. ++ * ++ * Revision 1.21 2001/06/14 19:35:10 rgb ++ * Update copyright date. ++ * ++ * Revision 1.20 2000/09/15 11:37:02 rgb ++ * Merge in heavily modified Svenning Soerensen's ++ * IPCOMP zlib deflate code. ++ * ++ * Revision 1.19 2000/09/08 19:12:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.18 2000/07/28 13:50:54 rgb ++ * Changed enet_statistics to net_device_stats and added back compatibility ++ * for pre-2.1.19. ++ * ++ * Revision 1.17 1999/11/19 01:12:15 rgb ++ * Purge unneeded proc_info prototypes, now that static linking uses ++ * dynamic proc_info registration. ++ * ++ * Revision 1.16 1999/11/18 18:51:00 rgb ++ * Changed all device registrations for static linking to ++ * dynamic to reduce the number and size of patches. ++ * ++ * Revision 1.15 1999/11/18 04:14:21 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * Added CONFIG_PROC_FS compiler directives in case it is shut off. ++ * Added Marc Boucher's 2.3.25 proc patches. ++ * ++ * Revision 1.14 1999/05/25 02:50:10 rgb ++ * Fix kernel version macros for 2.0.x static linking. ++ * ++ * Revision 1.13 1999/05/25 02:41:06 rgb ++ * Add ipsec_klipsdebug support for static linking. ++ * ++ * Revision 1.12 1999/05/05 22:02:32 rgb ++ * Add a quick and dirty port to 2.2 kernels by Marc Boucher . ++ * ++ * Revision 1.11 1999/04/29 15:19:50 rgb ++ * Add return values to init and cleanup functions. ++ * ++ * Revision 1.10 1999/04/16 16:02:39 rgb ++ * Bump up macro to 4 ipsec I/Fs. ++ * ++ * Revision 1.9 1999/04/15 15:37:25 rgb ++ * Forward check changes from POST1_00 branch. ++ * ++ * Revision 1.5.2.1 1999/04/02 04:26:14 rgb ++ * Backcheck from HEAD, pre1.0. ++ * ++ * Revision 1.8 1999/04/11 00:29:01 henry ++ * GPL boilerplate ++ * ++ * Revision 1.7 1999/04/06 04:54:28 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.6 1999/03/31 05:44:48 rgb ++ * Keep PMTU reduction private. ++ * ++ * Revision 1.5 1999/02/10 22:31:20 rgb ++ * Change rebuild_header member to reflect generality of link layer. ++ * ++ * Revision 1.4 1998/12/01 13:22:04 rgb ++ * Added support for debug printing of version info. ++ * ++ * Revision 1.3 1998/07/29 20:42:46 rgb ++ * Add a macro for clearing all tunnel devices. ++ * Rearrange structures and declarations for sharing with userspace. ++ * ++ * Revision 1.2 1998/06/25 20:01:45 rgb ++ * Make prototypes available for ipsec_init and ipsec proc_dir_entries ++ * for static linking. ++ * ++ * Revision 1.1 1998/06/18 21:27:50 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.3 1998/05/18 21:51:50 rgb ++ * Added macros for num of I/F's and a procfs debug switch. ++ * ++ * Revision 1.2 1998/04/21 21:29:09 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.1 1998/04/09 03:06:13 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:05 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.5 1997/06/03 04:24:48 ji ++ * Added transport mode. ++ * Changed the way routing is done. ++ * Lots of bug fixes. ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * No changes. ++ * ++ * Revision 0.3 1996/11/20 14:39:04 ji ++ * Minor cleanups. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_xform.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,257 @@ ++/* ++ * Definitions relevant to IPSEC transformations ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * COpyright (C) 2003 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_xform.h,v 1.41 2004-07-10 19:08:41 mcr Exp $ ++ */ ++ ++#ifndef _IPSEC_XFORM_H_ ++ ++#include ++ ++#define XF_NONE 0 /* No transform set */ ++#define XF_IP4 1 /* IPv4 inside IPv4 */ ++#define XF_AHMD5 2 /* AH MD5 */ ++#define XF_AHSHA 3 /* AH SHA */ ++#define XF_ESP3DES 5 /* ESP DES3-CBC */ ++#define XF_AHHMACMD5 6 /* AH-HMAC-MD5 with opt replay prot */ ++#define XF_AHHMACSHA1 7 /* AH-HMAC-SHA1 with opt replay prot */ ++#define XF_ESP3DESMD5 9 /* triple DES, HMAC-MD-5, 128-bits of authentication */ ++#define XF_ESP3DESMD596 10 /* triple DES, HMAC-MD-5, 96-bits of authentication */ ++#define XF_ESPNULLMD596 12 /* NULL, HMAC-MD-5 with 96-bits of authentication */ ++#define XF_ESPNULLSHA196 13 /* NULL, HMAC-SHA-1 with 96-bits of authentication */ ++#define XF_ESP3DESSHA196 14 /* triple DES, HMAC-SHA-1, 96-bits of authentication */ ++#define XF_IP6 15 /* IPv6 inside IPv6 */ ++#define XF_COMPDEFLATE 16 /* IPCOMP deflate */ ++ ++#define XF_CLR 126 /* Clear SA table */ ++#define XF_DEL 127 /* Delete SA */ ++ ++/* IPsec AH transform values ++ * RFC 2407 ++ * draft-ietf-ipsec-doi-tc-mib-02.txt ++ */ ++ ++#define AH_NONE 0 ++#define AH_MD5 2 ++#define AH_SHA 3 ++/* draft-ietf-ipsec-ciph-aes-cbc-03.txt */ ++#define AH_SHA2_256 5 ++#define AH_SHA2_384 6 ++#define AH_SHA2_512 7 ++#define AH_RIPEMD 8 ++#define AH_MAX 15 ++ ++/* IPsec ESP transform values */ ++ ++#define ESP_NONE 0 ++#define ESP_DES 2 ++#define ESP_3DES 3 ++#define ESP_RC5 4 ++#define ESP_IDEA 5 ++#define ESP_CAST 6 ++#define ESP_BLOWFISH 7 ++#define ESP_3IDEA 8 ++#define ESP_RC4 10 ++#define ESP_NULL 11 ++#define ESP_AES 12 ++ ++/* as draft-ietf-ipsec-ciph-aes-cbc-02.txt */ ++#define ESP_MARS 249 ++#define ESP_RC6 250 ++#define ESP_SERPENT 252 ++#define ESP_TWOFISH 253 ++ ++/* IPCOMP transform values */ ++ ++#define IPCOMP_NONE 0 ++#define IPCOMP_OUI 1 ++#define IPCOMP_DEFLAT 2 ++#define IPCOMP_LZS 3 ++#define IPCOMP_V42BIS 4 ++ ++#define XFT_AUTH 0x0001 ++#define XFT_CONF 0x0100 ++ ++/* available if CONFIG_KLIPS_DEBUG is defined */ ++#define DB_XF_INIT 0x0001 ++ ++#define PROTO2TXT(x) \ ++ (x) == IPPROTO_AH ? "AH" : \ ++ (x) == IPPROTO_ESP ? "ESP" : \ ++ (x) == IPPROTO_IPIP ? "IPIP" : \ ++ (x) == IPPROTO_COMP ? "COMP" : \ ++ "UNKNOWN_proto" ++static inline const char *enc_name_id (unsigned id) { ++ static char buf[16]; ++ snprintf(buf, sizeof(buf), "_ID%d", id); ++ return buf; ++} ++static inline const char *auth_name_id (unsigned id) { ++ static char buf[16]; ++ snprintf(buf, sizeof(buf), "_ID%d", id); ++ return buf; ++} ++#define IPS_XFORM_NAME(x) \ ++ PROTO2TXT((x)->ips_said.proto), \ ++ (x)->ips_said.proto == IPPROTO_COMP ? \ ++ ((x)->ips_encalg == SADB_X_CALG_DEFLATE ? \ ++ "_DEFLATE" : "_UNKNOWN_comp") : \ ++ (x)->ips_encalg == ESP_NONE ? "" : \ ++ (x)->ips_encalg == ESP_3DES ? "_3DES" : \ ++ (x)->ips_encalg == ESP_AES ? "_AES" : \ ++ (x)->ips_encalg == ESP_SERPENT ? "_SERPENT" : \ ++ (x)->ips_encalg == ESP_TWOFISH ? "_TWOFISH" : \ ++ enc_name_id(x->ips_encalg)/* "_UNKNOWN_encr" */, \ ++ (x)->ips_authalg == AH_NONE ? "" : \ ++ (x)->ips_authalg == AH_MD5 ? "_HMAC_MD5" : \ ++ (x)->ips_authalg == AH_SHA ? "_HMAC_SHA1" : \ ++ (x)->ips_authalg == AH_SHA2_256 ? "_HMAC_SHA2_256" : \ ++ (x)->ips_authalg == AH_SHA2_384 ? "_HMAC_SHA2_384" : \ ++ (x)->ips_authalg == AH_SHA2_512 ? "_HMAC_SHA2_512" : \ ++ auth_name_id(x->ips_authalg) /* "_UNKNOWN_auth" */ \ ++ ++#ifdef __KERNEL__ ++struct ipsec_rcv_state; ++struct ipsec_xmit_state; ++ ++struct xform_functions { ++ enum ipsec_rcv_value (*rcv_checks)(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb); ++ enum ipsec_rcv_value (*rcv_decrypt)(struct ipsec_rcv_state *irs); ++ ++ enum ipsec_rcv_value (*rcv_setup_auth)(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb, ++ __u32 *replay, ++ unsigned char **authenticator); ++ enum ipsec_rcv_value (*rcv_calc_auth)(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb); ++ ++ enum ipsec_xmit_value (*xmit_setup)(struct ipsec_xmit_state *ixs); ++ enum ipsec_xmit_value (*xmit_encrypt)(struct ipsec_xmit_state *ixs); ++ ++ enum ipsec_xmit_value (*xmit_setup_auth)(struct ipsec_xmit_state *ixs, ++ struct sk_buff *skb, ++ __u32 *replay, ++ unsigned char **authenticator); ++ enum ipsec_xmit_value (*xmit_calc_auth)(struct ipsec_xmit_state *ixs, ++ struct sk_buff *skb); ++ int xmit_headroom; ++ int xmit_needtailroom; ++}; ++ ++#endif /* __KERNEL__ */ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern void ipsec_dmp(char *s, caddr_t bb, int len); ++#else /* CONFIG_KLIPS_DEBUG */ ++#define ipsec_dmp(_x, _y, _z) ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ ++#define _IPSEC_XFORM_H_ ++#endif /* _IPSEC_XFORM_H_ */ ++ ++/* ++ * $Log: ipsec_xform.h,v $ ++ * Revision 1.41 2004-07-10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.40 2004/04/06 02:49:08 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.39 2004/04/05 19:55:07 mcr ++ * Moved from linux/include/freeswan/ipsec_xform.h,v ++ * ++ * Revision 1.38 2004/04/05 19:41:05 mcr ++ * merged alg-branch code. ++ * ++ * Revision 1.37 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.36.34.1 2003/12/22 15:25:52 jjo ++ * Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.36 2002/04/24 07:36:48 mcr ++ * Moved from ./klips/net/ipsec/ipsec_xform.h,v ++ * ++ * Revision 1.35 2001/11/26 09:23:51 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.33.2.1 2001/09/25 02:24:58 mcr ++ * struct tdb -> struct ipsec_sa. ++ * sa(tdb) manipulation functions renamed and moved to ipsec_sa.c ++ * ipsec_xform.c removed. header file still contains useful things. ++ * ++ * Revision 1.34 2001/11/06 19:47:17 rgb ++ * Changed lifetime_packets to uint32 from uint64. ++ * ++ * Revision 1.33 2001/09/08 21:13:34 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.32 2001/07/06 07:40:01 rgb ++ * Reformatted for readability. ++ * Added inbound policy checking fields for use with IPIP SAs. ++ * ++ * Revision 1.31 2001/06/14 19:35:11 rgb ++ * Update copyright date. ++ * ++ * Revision 1.30 2001/05/30 08:14:03 rgb ++ * Removed vestiges of esp-null transforms. ++ * ++ * Revision 1.29 2001/01/30 23:42:47 rgb ++ * Allow pfkey msgs from pid other than user context required for ACQUIRE ++ * and subsequent ADD or UDATE. ++ * ++ * Revision 1.28 2000/11/06 04:30:40 rgb ++ * Add Svenning's adaptive content compression. ++ * ++ * Revision 1.27 2000/09/19 00:38:25 rgb ++ * Fixed algorithm name bugs introduced for ipcomp. ++ * ++ * Revision 1.26 2000/09/17 21:36:48 rgb ++ * Added proto2txt macro. ++ * ++ * Revision 1.25 2000/09/17 18:56:47 rgb ++ * Added IPCOMP support. ++ * ++ * Revision 1.24 2000/09/12 19:34:12 rgb ++ * Defined XF_IP6 from Gerhard for ipv6 tunnel support. ++ * ++ * Revision 1.23 2000/09/12 03:23:14 rgb ++ * Cleaned out now unused tdb_xform and tdb_xdata members of struct tdb. ++ * ++ * Revision 1.22 2000/09/08 19:12:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.21 2000/09/01 18:32:43 rgb ++ * Added (disabled) sensitivity members to tdb struct. ++ * ++ * Revision 1.20 2000/08/30 05:31:01 rgb ++ * Removed all the rest of the references to tdb_spi, tdb_proto, tdb_dst. ++ * Kill remainder of tdb_xform, tdb_xdata, xformsw. ++ * ++ * Revision 1.19 2000/08/01 14:51:52 rgb ++ * Removed _all_ remaining traces of DES. ++ * ++ * Revision 1.18 2000/01/21 06:17:45 rgb ++ * Tidied up spacing. ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/ipsec_xmit.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,198 @@ ++/* ++ * IPSEC tunneling code ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_xmit.h,v 1.14 2005-05-11 01:00:26 mcr Exp $ ++ */ ++ ++#include "openswan/ipsec_sa.h" ++ ++enum ipsec_xmit_value ++{ ++ IPSEC_XMIT_STOLEN=2, ++ IPSEC_XMIT_PASS=1, ++ IPSEC_XMIT_OK=0, ++ IPSEC_XMIT_ERRMEMALLOC=-1, ++ IPSEC_XMIT_ESP_BADALG=-2, ++ IPSEC_XMIT_BADPROTO=-3, ++ IPSEC_XMIT_ESP_PUSHPULLERR=-4, ++ IPSEC_XMIT_BADLEN=-5, ++ IPSEC_XMIT_AH_BADALG=-6, ++ IPSEC_XMIT_SAIDNOTFOUND=-7, ++ IPSEC_XMIT_SAIDNOTLIVE=-8, ++ IPSEC_XMIT_REPLAYROLLED=-9, ++ IPSEC_XMIT_LIFETIMEFAILED=-10, ++ IPSEC_XMIT_CANNOTFRAG=-11, ++ IPSEC_XMIT_MSSERR=-12, ++ IPSEC_XMIT_ERRSKBALLOC=-13, ++ IPSEC_XMIT_ENCAPFAIL=-14, ++ IPSEC_XMIT_NODEV=-15, ++ IPSEC_XMIT_NOPRIVDEV=-16, ++ IPSEC_XMIT_NOPHYSDEV=-17, ++ IPSEC_XMIT_NOSKB=-18, ++ IPSEC_XMIT_NOIPV6=-19, ++ IPSEC_XMIT_NOIPOPTIONS=-20, ++ IPSEC_XMIT_TTLEXPIRED=-21, ++ IPSEC_XMIT_BADHHLEN=-22, ++ IPSEC_XMIT_PUSHPULLERR=-23, ++ IPSEC_XMIT_ROUTEERR=-24, ++ IPSEC_XMIT_RECURSDETECT=-25, ++ IPSEC_XMIT_IPSENDFAILURE=-26, ++ IPSEC_XMIT_ESPUDP=-27, ++ IPSEC_XMIT_ESPUDP_BADTYPE=-28, ++}; ++ ++struct ipsec_xmit_state ++{ ++ struct sk_buff *skb; /* working skb pointer */ ++ struct net_device *dev; /* working dev pointer */ ++ struct ipsecpriv *prv; /* Our device' private space */ ++ struct sk_buff *oskb; /* Original skb pointer */ ++ struct net_device_stats *stats; /* This device's statistics */ ++ struct iphdr *iph; /* Our new IP header */ ++ __u32 newdst; /* The other SG's IP address */ ++ __u32 orgdst; /* Original IP destination address */ ++ __u32 orgedst; /* 1st SG's IP address */ ++ __u32 newsrc; /* The new source SG's IP address */ ++ __u32 orgsrc; /* Original IP source address */ ++ __u32 innersrc; /* Innermost IP source address */ ++ int iphlen; /* IP header length */ ++ int pyldsz; /* upper protocol payload size */ ++ int headroom; ++ int tailroom; ++ int authlen; ++ int max_headroom; /* The extra header space needed */ ++ int max_tailroom; /* The extra stuffing needed */ ++ int ll_headroom; /* The extra link layer hard_header space needed */ ++ int tot_headroom; /* The total header space needed */ ++ int tot_tailroom; /* The totalstuffing needed */ ++ __u8 *saved_header; /* saved copy of the hard header */ ++ unsigned short sport, dport; ++ ++ struct sockaddr_encap matcher; /* eroute search key */ ++ struct eroute *eroute; ++ struct ipsec_sa *ipsp, *ipsq; /* ipsec_sa pointers */ ++ char sa_txt[SATOT_BUF]; ++ size_t sa_len; ++ int hard_header_stripped; /* has the hard header been removed yet? */ ++ int hard_header_len; ++ struct net_device *physdev; ++/* struct device *virtdev; */ ++ short physmtu; ++ short cur_mtu; /* copy of prv->mtu, cause prv may == NULL */ ++ short mtudiff; ++#ifdef NET_21 ++ struct rtable *route; ++#endif /* NET_21 */ ++ ip_said outgoing_said; ++#ifdef NET_21 ++ int pass; ++#endif /* NET_21 */ ++ int error; ++ uint32_t eroute_pid; ++ struct ipsec_sa ips; ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ uint8_t natt_type; ++ uint8_t natt_head; ++ uint16_t natt_sport; ++ uint16_t natt_dport; ++#endif ++}; ++ ++enum ipsec_xmit_value ++ipsec_xmit_sanity_check_dev(struct ipsec_xmit_state *ixs); ++ ++enum ipsec_xmit_value ++ipsec_xmit_sanity_check_skb(struct ipsec_xmit_state *ixs); ++ ++enum ipsec_xmit_value ++ipsec_xmit_encap_bundle(struct ipsec_xmit_state *ixs); ++ ++extern void ipsec_extract_ports(struct iphdr * iph, struct sockaddr_encap * er); ++ ++ ++extern int ipsec_xmit_trap_count; ++extern int ipsec_xmit_trap_sendcount; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int debug_tunnel; ++ ++#define debug_xmit debug_tunnel ++ ++#define ipsec_xmit_dmp(_x,_y, _z) if (debug_xmit && sysctl_ipsec_debug_verbose) ipsec_dmp_block(_x,_y,_z) ++#else ++#define ipsec_xmit_dmp(_x,_y, _z) do {} while(0) ++ ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++extern int sysctl_ipsec_debug_verbose; ++extern int sysctl_ipsec_icmp; ++extern int sysctl_ipsec_tos; ++ ++ ++/* ++ * $Log: ipsec_xmit.h,v $ ++ * Revision 1.14 2005-05-11 01:00:26 mcr ++ * do not call debug routines if !defined KLIPS_DEBUG. ++ * ++ * Revision 1.13 2005/04/29 05:01:38 mcr ++ * use ipsec_dmp_block. ++ * added cur_mtu to ixs instead of using ixs->dev. ++ * ++ * Revision 1.12 2004/08/20 21:45:37 mcr ++ * CONFIG_KLIPS_NAT_TRAVERSAL is not used in an attempt to ++ * be 26sec compatible. But, some defines where changed. ++ * ++ * Revision 1.11 2004/08/03 18:18:21 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.10 2004/07/10 19:08:41 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.9 2004/04/06 02:49:08 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.8 2004/04/05 19:55:07 mcr ++ * Moved from linux/include/freeswan/ipsec_xmit.h,v ++ * ++ * Revision 1.7 2004/02/03 03:11:40 mcr ++ * new xmit type if the UDP encapsulation is wrong. ++ * ++ * Revision 1.6 2003/12/13 19:10:16 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.5 2003/12/10 01:20:06 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.4 2003/12/06 16:37:04 mcr ++ * 1.4.7a X.509 patch applied. ++ * ++ * Revision 1.3 2003/10/31 02:27:05 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.2.4.2 2003/10/29 01:10:19 mcr ++ * elimited "struct sa_id" ++ * ++ * Revision 1.2.4.1 2003/09/21 13:59:38 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.2 2003/06/20 01:42:13 mcr ++ * added counters to measure how many ACQUIREs we send to pluto, ++ * and how many are successfully sent. ++ * ++ * Revision 1.1 2003/02/12 19:31:03 rgb ++ * Refactored from ipsec_tunnel.c ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/passert.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,75 @@ ++/* ++ * sanitize a string into a printable format. ++ * ++ * Copyright (C) 1998-2002 D. Hugh Redelmeier. ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: passert.h,v 1.7.8.1 2007-09-05 02:32:24 paul Exp $ ++ */ ++ ++#include "openswan.h" ++ ++#ifndef _OPENSWAN_PASSERT_H ++#define _OPENSWAN_PASSERT_H ++/* our versions of assert: log result */ ++ ++#ifdef DEBUG ++ ++typedef void (*openswan_passert_fail_t)(const char *pred_str, ++ const char *file_str, ++ unsigned long line_no) NEVER_RETURNS; ++ ++extern openswan_passert_fail_t openswan_passert_fail; ++ ++extern void pexpect_log(const char *pred_str ++ , const char *file_str, unsigned long line_no); ++ ++# define impossible() do { \ ++ if(openswan_passert_fail) { \ ++ (*openswan_passert_fail)("impossible", __FILE__, __LINE__); \ ++ }} while(0) ++ ++extern void switch_fail(int n ++ , const char *file_str, unsigned long line_no) NEVER_RETURNS; ++ ++# define bad_case(n) switch_fail((int) n, __FILE__, __LINE__) ++ ++# define passert(pred) do { \ ++ if (!(pred)) \ ++ if(openswan_passert_fail) { \ ++ (*openswan_passert_fail)(#pred, __FILE__, __LINE__); \ ++ } \ ++ } while(0) ++ ++# define pexpect(pred) do { \ ++ if (!(pred)) \ ++ pexpect_log(#pred, __FILE__, __LINE__); \ ++ } while(0) ++ ++/* assert that an err_t is NULL; evaluate exactly once */ ++# define happy(x) { \ ++ err_t ugh = x; \ ++ if (ugh != NULL) \ ++ if(openswan_passert_fail) { (*openswan_passert_fail)(ugh, __FILE__, __LINE__); } \ ++ } ++ ++#else /*!DEBUG*/ ++ ++# define impossible() abort() ++# define bad_case(n) abort() ++# define passert(pred) { } /* do nothing */ ++# define happy(x) { (void) x; } /* evaluate non-judgementally */ ++ ++#endif /*!DEBUG*/ ++ ++#endif /* _OPENSWAN_PASSERT_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/pfkey_debug.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,54 @@ ++/* ++ * sanitize a string into a printable format. ++ * ++ * Copyright (C) 1998-2002 D. Hugh Redelmeier. ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: pfkey_debug.h,v 1.3 2004-04-05 19:55:07 mcr Exp $ ++ */ ++ ++#ifndef _FREESWAN_PFKEY_DEBUG_H ++#define _FREESWAN_PFKEY_DEBUG_H ++ ++#ifdef __KERNEL__ ++ ++/* note, kernel version ignores pfkey levels */ ++# define DEBUGGING(level,args...) \ ++ KLIPS_PRINT(debug_pfkey, "klips_debug:" args) ++ ++# define ERROR(args...) printk(KERN_ERR "klips:" args) ++ ++#else ++ ++extern unsigned int pfkey_lib_debug; ++ ++extern void (*pfkey_debug_func)(const char *message, ...) PRINTF_LIKE(1); ++extern void (*pfkey_error_func)(const char *message, ...) PRINTF_LIKE(1); ++ ++#define DEBUGGING(level,args...) if(pfkey_lib_debug & level) { \ ++ if(pfkey_debug_func != NULL) { \ ++ (*pfkey_debug_func)("pfkey_lib_debug:" args); \ ++ } else { \ ++ printf("pfkey_lib_debug:" args); \ ++ } } ++ ++#define ERROR(args...) if(pfkey_error_func != NULL) { \ ++ (*pfkey_error_func)("pfkey_lib_debug:" args); \ ++ } ++ ++# define MALLOC(size) malloc(size) ++# define FREE(obj) free(obj) ++ ++#endif ++ ++#endif +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/openswan/radij.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,280 @@ ++/* ++ * RCSID $Id: radij.h,v 1.13 2004-04-05 19:55:08 mcr Exp $ ++ */ ++ ++/* ++ * This file is defived from ${SRC}/sys/net/radix.h of BSD 4.4lite ++ * ++ * Variable and procedure names have been modified so that they don't ++ * conflict with the original BSD code, as a small number of modifications ++ * have been introduced and we may want to reuse this code in BSD. ++ * ++ * The `j' in `radij' is pronounced as a voiceless guttural (like a Greek ++ * chi or a German ch sound (as `doch', not as in `milch'), or even a ++ * spanish j as in Juan. It is not as far back in the throat like ++ * the corresponding Hebrew sound, nor is it a soft breath like the English h. ++ * It has nothing to do with the Dutch ij sound. ++ * ++ * Here is the appropriate copyright notice: ++ */ ++ ++/* ++ * Copyright (c) 1988, 1989, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * This product includes software developed by the University of ++ * California, Berkeley and its contributors. ++ * 4. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)radix.h 8.1 (Berkeley) 6/10/93 ++ */ ++ ++#ifndef _RADIJ_H_ ++#define _RADIJ_H_ ++ ++/* ++#define RJ_DEBUG ++*/ ++ ++#ifdef __KERNEL__ ++ ++#ifndef __P ++#ifdef __STDC__ ++#define __P(x) x ++#else ++#define __P(x) () ++#endif ++#endif ++ ++/* ++ * Radix search tree node layout. ++ */ ++ ++struct radij_node ++{ ++ struct radij_mask *rj_mklist; /* list of masks contained in subtree */ ++ struct radij_node *rj_p; /* parent */ ++ short rj_b; /* bit offset; -1-index(netmask) */ ++ char rj_bmask; /* node: mask for bit test*/ ++ u_char rj_flags; /* enumerated next */ ++#define RJF_NORMAL 1 /* leaf contains normal route */ ++#define RJF_ROOT 2 /* leaf is root leaf for tree */ ++#define RJF_ACTIVE 4 /* This node is alive (for rtfree) */ ++ union { ++ struct { /* leaf only data: */ ++ caddr_t rj_Key; /* object of search */ ++ caddr_t rj_Mask; /* netmask, if present */ ++ struct radij_node *rj_Dupedkey; ++ } rj_leaf; ++ struct { /* node only data: */ ++ int rj_Off; /* where to start compare */ ++ struct radij_node *rj_L;/* progeny */ ++ struct radij_node *rj_R;/* progeny */ ++ }rj_node; ++ } rj_u; ++#ifdef RJ_DEBUG ++ int rj_info; ++ struct radij_node *rj_twin; ++ struct radij_node *rj_ybro; ++#endif ++}; ++ ++#define rj_dupedkey rj_u.rj_leaf.rj_Dupedkey ++#define rj_key rj_u.rj_leaf.rj_Key ++#define rj_mask rj_u.rj_leaf.rj_Mask ++#define rj_off rj_u.rj_node.rj_Off ++#define rj_l rj_u.rj_node.rj_L ++#define rj_r rj_u.rj_node.rj_R ++ ++/* ++ * Annotations to tree concerning potential routes applying to subtrees. ++ */ ++ ++extern struct radij_mask { ++ short rm_b; /* bit offset; -1-index(netmask) */ ++ char rm_unused; /* cf. rj_bmask */ ++ u_char rm_flags; /* cf. rj_flags */ ++ struct radij_mask *rm_mklist; /* more masks to try */ ++ caddr_t rm_mask; /* the mask */ ++ int rm_refs; /* # of references to this struct */ ++} *rj_mkfreelist; ++ ++#define MKGet(m) {\ ++ if (rj_mkfreelist) {\ ++ m = rj_mkfreelist; \ ++ rj_mkfreelist = (m)->rm_mklist; \ ++ } else \ ++ R_Malloc(m, struct radij_mask *, sizeof (*(m))); }\ ++ ++#define MKFree(m) { (m)->rm_mklist = rj_mkfreelist; rj_mkfreelist = (m);} ++ ++struct radij_node_head { ++ struct radij_node *rnh_treetop; ++ int rnh_addrsize; /* permit, but not require fixed keys */ ++ int rnh_pktsize; /* permit, but not require fixed keys */ ++#if 0 ++ struct radij_node *(*rnh_addaddr) /* add based on sockaddr */ ++ __P((void *v, void *mask, ++ struct radij_node_head *head, struct radij_node nodes[])); ++#endif ++ int (*rnh_addaddr) /* add based on sockaddr */ ++ __P((void *v, void *mask, ++ struct radij_node_head *head, struct radij_node nodes[])); ++ struct radij_node *(*rnh_addpkt) /* add based on packet hdr */ ++ __P((void *v, void *mask, ++ struct radij_node_head *head, struct radij_node nodes[])); ++#if 0 ++ struct radij_node *(*rnh_deladdr) /* remove based on sockaddr */ ++ __P((void *v, void *mask, struct radij_node_head *head)); ++#endif ++ int (*rnh_deladdr) /* remove based on sockaddr */ ++ __P((void *v, void *mask, struct radij_node_head *head, struct radij_node **node)); ++ struct radij_node *(*rnh_delpkt) /* remove based on packet hdr */ ++ __P((void *v, void *mask, struct radij_node_head *head)); ++ struct radij_node *(*rnh_matchaddr) /* locate based on sockaddr */ ++ __P((void *v, struct radij_node_head *head)); ++ struct radij_node *(*rnh_matchpkt) /* locate based on packet hdr */ ++ __P((void *v, struct radij_node_head *head)); ++ int (*rnh_walktree) /* traverse tree */ ++ __P((struct radij_node_head *head, int (*f)(struct radij_node *rn, void *w), void *w)); ++ struct radij_node rnh_nodes[3]; /* empty tree for common case */ ++}; ++ ++ ++#define Bcmp(a, b, n) memcmp(((caddr_t)(b)), ((caddr_t)(a)), (unsigned)(n)) ++#define Bcopy(a, b, n) memmove(((caddr_t)(b)), ((caddr_t)(a)), (unsigned)(n)) ++#define Bzero(p, n) memset((caddr_t)(p), 0, (unsigned)(n)) ++#define R_Malloc(p, t, n) ((p = (t) kmalloc((size_t)(n), GFP_ATOMIC)), Bzero((p),(n))) ++#define Free(p) kfree((caddr_t)p); ++ ++void rj_init __P((void)); ++int rj_inithead __P((void **, int)); ++int rj_refines __P((void *, void *)); ++int rj_walktree __P((struct radij_node_head *head, int (*f)(struct radij_node *rn, void *w), void *w)); ++struct radij_node ++ *rj_addmask __P((void *, int, int)) /* , rgb */ ; ++int /* * */ rj_addroute __P((void *, void *, struct radij_node_head *, ++ struct radij_node [2])) /* , rgb */ ; ++int /* * */ rj_delete __P((void *, void *, struct radij_node_head *, struct radij_node **)) /* , rgb */ ; ++struct radij_node /* rgb */ ++ *rj_insert __P((void *, struct radij_node_head *, int *, ++ struct radij_node [2])), ++ *rj_match __P((void *, struct radij_node_head *)), ++ *rj_newpair __P((void *, int, struct radij_node[2])), ++ *rj_search __P((void *, struct radij_node *)), ++ *rj_search_m __P((void *, struct radij_node *, void *)); ++ ++void rj_deltree(struct radij_node_head *); ++void rj_delnodes(struct radij_node *); ++void rj_free_mkfreelist(void); ++int radijcleartree(void); ++int radijcleanup(void); ++ ++extern struct radij_node_head *mask_rjhead; ++extern int maj_keylen; ++#endif /* __KERNEL__ */ ++ ++#endif /* _RADIJ_H_ */ ++ ++ ++/* ++ * $Log: radij.h,v $ ++ * Revision 1.13 2004-04-05 19:55:08 mcr ++ * Moved from linux/include/freeswan/radij.h,v ++ * ++ * Revision 1.12 2002/04/24 07:36:48 mcr ++ * Moved from ./klips/net/ipsec/radij.h,v ++ * ++ * Revision 1.11 2001/09/20 15:33:00 rgb ++ * Min/max cleanup. ++ * ++ * Revision 1.10 1999/11/18 04:09:20 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * ++ * Revision 1.9 1999/05/05 22:02:33 rgb ++ * Add a quick and dirty port to 2.2 kernels by Marc Boucher . ++ * ++ * Revision 1.8 1999/04/29 15:24:58 rgb ++ * Add check for existence of macros min/max. ++ * ++ * Revision 1.7 1999/04/11 00:29:02 henry ++ * GPL boilerplate ++ * ++ * Revision 1.6 1999/04/06 04:54:29 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.5 1999/01/22 06:30:32 rgb ++ * 64-bit clean-up. ++ * ++ * Revision 1.4 1998/11/30 13:22:55 rgb ++ * Rationalised all the klips kernel file headers. They are much shorter ++ * now and won't conflict under RH5.2. ++ * ++ * Revision 1.3 1998/10/25 02:43:27 rgb ++ * Change return type on rj_addroute and rj_delete and add and argument ++ * to the latter to be able to transmit more infomation about errors. ++ * ++ * Revision 1.2 1998/07/14 18:09:51 rgb ++ * Add a routine to clear eroute table. ++ * Added #ifdef __KERNEL__ directives to restrict scope of header. ++ * ++ * Revision 1.1 1998/06/18 21:30:22 henry ++ * move sources from klips/src to klips/net/ipsec to keep stupid kernel ++ * build scripts happier about symlinks ++ * ++ * Revision 1.4 1998/05/25 20:34:16 rgb ++ * Remove temporary ipsec_walk, rj_deltree and rj_delnodes functions. ++ * ++ * Rename ipsec_rj_walker (ipsec_walk) to ipsec_rj_walker_procprint and ++ * add ipsec_rj_walker_delete. ++ * ++ * Recover memory for eroute table on unload of module. ++ * ++ * Revision 1.3 1998/04/22 16:51:37 rgb ++ * Tidy up radij debug code from recent rash of modifications to debug code. ++ * ++ * Revision 1.2 1998/04/14 17:30:38 rgb ++ * Fix up compiling errors for radij tree memory reclamation. ++ * ++ * Revision 1.1 1998/04/09 03:06:16 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:04 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * No changes. ++ * ++ * Revision 0.3 1996/11/20 14:44:45 ji ++ * Release update only. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/pfkey.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,529 @@ ++/* ++ * FreeS/WAN specific PF_KEY headers ++ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: pfkey.h,v 1.49 2005-05-11 00:57:29 mcr Exp $ ++ */ ++ ++#ifndef __NET_IPSEC_PF_KEY_H ++#define __NET_IPSEC_PF_KEY_H ++#ifdef __KERNEL__ ++extern struct proto_ops pfkey_proto_ops; ++typedef struct sock pfkey_sock; ++extern int debug_pfkey; ++ ++extern /* void */ int pfkey_init(void); ++extern /* void */ int pfkey_cleanup(void); ++ ++struct socket_list ++{ ++ struct socket *socketp; ++ struct socket_list *next; ++}; ++extern int pfkey_list_insert_socket(struct socket*, struct socket_list**); ++extern int pfkey_list_remove_socket(struct socket*, struct socket_list**); ++extern struct socket_list *pfkey_open_sockets; ++extern struct socket_list *pfkey_registered_sockets[]; ++ ++struct ipsec_alg_supported ++{ ++ uint16_t ias_exttype; ++ uint8_t ias_id; ++ uint8_t ias_ivlen; ++ uint16_t ias_keyminbits; ++ uint16_t ias_keymaxbits; ++ char *ias_name; ++}; ++ ++extern struct supported_list *pfkey_supported_list[]; ++struct supported_list ++{ ++ struct ipsec_alg_supported *supportedp; ++ struct supported_list *next; ++}; ++extern int pfkey_list_insert_supported(struct ipsec_alg_supported*, struct supported_list**); ++extern int pfkey_list_remove_supported(struct ipsec_alg_supported*, struct supported_list**); ++ ++struct sockaddr_key ++{ ++ uint16_t key_family; /* PF_KEY */ ++ uint16_t key_pad; /* not used */ ++ uint32_t key_pid; /* process ID */ ++}; ++ ++struct pfkey_extracted_data ++{ ++ struct ipsec_sa* ips; ++ struct ipsec_sa* ips2; ++ struct eroute *eroute; ++}; ++ ++/* forward reference */ ++struct sadb_ext; ++struct sadb_msg; ++struct sockaddr; ++struct sadb_comb; ++struct sadb_sadb; ++struct sadb_alg; ++ ++extern int ++pfkey_alloc_eroute(struct eroute** eroute); ++ ++extern int ++pfkey_sa_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_lifetime_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_address_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_key_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_ident_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_sens_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_prop_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_supported_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_spirange_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_x_kmprivate_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_x_satype_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int ++pfkey_x_debug_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data* extr); ++ ++extern int pfkey_upmsg(struct socket *, struct sadb_msg *); ++extern int pfkey_expire(struct ipsec_sa *, int); ++extern int pfkey_acquire(struct ipsec_sa *); ++#else /* ! __KERNEL__ */ ++ ++extern void (*pfkey_debug_func)(const char *message, ...); ++extern void (*pfkey_error_func)(const char *message, ...); ++extern void pfkey_print(struct sadb_msg *msg, FILE *out); ++ ++ ++#endif /* __KERNEL__ */ ++ ++extern uint8_t satype2proto(uint8_t satype); ++extern uint8_t proto2satype(uint8_t proto); ++extern char* satype2name(uint8_t satype); ++extern char* proto2name(uint8_t proto); ++ ++struct key_opt ++{ ++ uint32_t key_pid; /* process ID */ ++ struct sock *sk; ++}; ++ ++#define key_pid(sk) ((struct key_opt*)&((sk)->sk_protinfo))->key_pid ++ ++/* XXX-mcr this is not an alignment, this is because the count is in 64-bit ++ * words. ++ */ ++#define IPSEC_PFKEYv2_ALIGN (sizeof(uint64_t)/sizeof(uint8_t)) ++#define BITS_PER_OCTET 8 ++#define OCTETBITS 8 ++#define PFKEYBITS 64 ++#define DIVUP(x,y) ((x + y -1) / y) /* divide, rounding upwards */ ++#define ALIGN_N(x,y) (DIVUP(x,y) * y) /* align on y boundary */ ++ ++#define IPSEC_PFKEYv2_LEN(x) ((x) * IPSEC_PFKEYv2_ALIGN) ++#define IPSEC_PFKEYv2_WORDS(x) ((x) / IPSEC_PFKEYv2_ALIGN) ++ ++ ++#define PFKEYv2_MAX_MSGSIZE 4096 ++ ++/* ++ * PF_KEYv2 permitted and required extensions in and out bitmaps ++ */ ++struct pf_key_ext_parsers_def { ++ int (*parser)(struct sadb_ext*); ++ char *parser_name; ++}; ++ ++ ++#define SADB_EXTENSIONS_MAX 31 ++extern unsigned int extensions_bitmaps[2/*in/out*/][2/*perm/req*/][SADB_EXTENSIONS_MAX]; ++#define EXT_BITS_IN 0 ++#define EXT_BITS_OUT 1 ++#define EXT_BITS_PERM 0 ++#define EXT_BITS_REQ 1 ++ ++extern void pfkey_extensions_init(struct sadb_ext *extensions[]); ++extern void pfkey_extensions_free(struct sadb_ext *extensions[]); ++extern void pfkey_msg_free(struct sadb_msg **pfkey_msg); ++ ++extern int pfkey_msg_parse(struct sadb_msg *pfkey_msg, ++ struct pf_key_ext_parsers_def *ext_parsers[], ++ struct sadb_ext **extensions, ++ int dir); ++ ++extern int pfkey_register_reply(int satype, struct sadb_msg *sadb_msg); ++ ++/* ++ * PF_KEYv2 build function prototypes ++ */ ++ ++int ++pfkey_msg_hdr_build(struct sadb_ext** pfkey_ext, ++ uint8_t msg_type, ++ uint8_t satype, ++ uint8_t msg_errno, ++ uint32_t seq, ++ uint32_t pid); ++ ++int ++pfkey_sa_ref_build(struct sadb_ext ** pfkey_ext, ++ uint16_t exttype, ++ uint32_t spi, /* in network order */ ++ uint8_t replay_window, ++ uint8_t sa_state, ++ uint8_t auth, ++ uint8_t encrypt, ++ uint32_t flags, ++ uint32_t/*IPsecSAref_t*/ ref); ++ ++int ++pfkey_sa_build(struct sadb_ext ** pfkey_ext, ++ uint16_t exttype, ++ uint32_t spi, /* in network order */ ++ uint8_t replay_window, ++ uint8_t sa_state, ++ uint8_t auth, ++ uint8_t encrypt, ++ uint32_t flags); ++ ++int ++pfkey_lifetime_build(struct sadb_ext ** pfkey_ext, ++ uint16_t exttype, ++ uint32_t allocations, ++ uint64_t bytes, ++ uint64_t addtime, ++ uint64_t usetime, ++ uint32_t packets); ++ ++int ++pfkey_address_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint8_t proto, ++ uint8_t prefixlen, ++ struct sockaddr* address); ++ ++int ++pfkey_key_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint16_t key_bits, ++ char* key); ++ ++int ++pfkey_ident_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint16_t ident_type, ++ uint64_t ident_id, ++ uint8_t ident_len, ++ char* ident_string); ++ ++#ifdef __KERNEL__ ++extern int pfkey_nat_t_new_mapping(struct ipsec_sa *, struct sockaddr *, __u16); ++extern int pfkey_x_nat_t_type_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr); ++extern int pfkey_x_nat_t_port_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr); ++#endif /* __KERNEL__ */ ++int ++pfkey_x_nat_t_type_build(struct sadb_ext** pfkey_ext, ++ uint8_t type); ++int ++pfkey_x_nat_t_port_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint16_t port); ++ ++int ++pfkey_sens_build(struct sadb_ext** pfkey_ext, ++ uint32_t dpd, ++ uint8_t sens_level, ++ uint8_t sens_len, ++ uint64_t* sens_bitmap, ++ uint8_t integ_level, ++ uint8_t integ_len, ++ uint64_t* integ_bitmap); ++ ++int pfkey_x_protocol_build(struct sadb_ext **, uint8_t); ++ ++ ++int ++pfkey_prop_build(struct sadb_ext** pfkey_ext, ++ uint8_t replay, ++ unsigned int comb_num, ++ struct sadb_comb* comb); ++ ++int ++pfkey_supported_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ unsigned int alg_num, ++ struct sadb_alg* alg); ++ ++int ++pfkey_spirange_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint32_t min, ++ uint32_t max); ++ ++int ++pfkey_x_kmprivate_build(struct sadb_ext** pfkey_ext); ++ ++int ++pfkey_x_satype_build(struct sadb_ext** pfkey_ext, ++ uint8_t satype); ++ ++int ++pfkey_x_debug_build(struct sadb_ext** pfkey_ext, ++ uint32_t tunnel, ++ uint32_t netlink, ++ uint32_t xform, ++ uint32_t eroute, ++ uint32_t spi, ++ uint32_t radij, ++ uint32_t esp, ++ uint32_t ah, ++ uint32_t rcv, ++ uint32_t pfkey, ++ uint32_t ipcomp, ++ uint32_t verbose); ++ ++int ++pfkey_msg_build(struct sadb_msg** pfkey_msg, ++ struct sadb_ext* extensions[], ++ int dir); ++ ++/* in pfkey_v2_debug.c - routines to decode numbers -> strings */ ++const char * ++pfkey_v2_sadb_ext_string(int extnum); ++ ++const char * ++pfkey_v2_sadb_type_string(int sadb_type); ++ ++ ++#endif /* __NET_IPSEC_PF_KEY_H */ ++ ++/* ++ * $Log: pfkey.h,v $ ++ * Revision 1.49 2005-05-11 00:57:29 mcr ++ * rename struct supported -> struct ipsec_alg_supported. ++ * make pfkey.h more standalone. ++ * ++ * Revision 1.48 2005/05/01 03:12:50 mcr ++ * include name of algorithm in datastructure. ++ * ++ * Revision 1.47 2004/08/21 00:44:14 mcr ++ * simplify definition of nat_t related prototypes. ++ * ++ * Revision 1.46 2004/08/04 16:27:22 mcr ++ * 2.6 sk_ options. ++ * ++ * Revision 1.45 2004/04/06 02:49:00 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.44 2003/12/10 01:20:01 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.43 2003/10/31 02:26:44 mcr ++ * pulled up port-selector patches. ++ * ++ * Revision 1.42.2.2 2003/10/29 01:09:32 mcr ++ * added debugging for pfkey library. ++ * ++ * Revision 1.42.2.1 2003/09/21 13:59:34 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.42 2003/08/25 22:08:19 mcr ++ * removed pfkey_proto_init() from pfkey.h for 2.6 support. ++ * ++ * Revision 1.41 2003/05/07 17:28:57 mcr ++ * new function pfkey_debug_func added for us in debugging from ++ ++ * pfkey library. ++ * ++ * Revision 1.40 2003/01/30 02:31:34 rgb ++ * ++ * Convert IPsecSAref_t from signed to unsigned to fix apparent SAref exhaustion bug. ++ * ++ * Revision 1.39 2002/09/20 15:40:21 rgb ++ * Switch from pfkey_alloc_ipsec_sa() to ipsec_sa_alloc(). ++ * Added ref parameter to pfkey_sa_build(). ++ * Cleaned out unused cruft. ++ * ++ * Revision 1.38 2002/05/14 02:37:24 rgb ++ * Change all references to tdb, TDB or Tunnel Descriptor Block to ips, ++ * ipsec_sa or ipsec_sa. ++ * Added function prototypes for the functions moved to ++ * pfkey_v2_ext_process.c. ++ * ++ * Revision 1.37 2002/04/24 07:36:49 mcr ++ * Moved from ./lib/pfkey.h,v ++ * ++ * Revision 1.36 2002/01/20 20:34:49 mcr ++ * added pfkey_v2_sadb_type_string to decode sadb_type to string. ++ * ++ * Revision 1.35 2001/11/27 05:27:47 mcr ++ * pfkey parses are now maintained by a structure ++ * that includes their name for debug purposes. ++ * ++ * Revision 1.34 2001/11/26 09:23:53 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.33 2001/11/06 19:47:47 rgb ++ * Added packet parameter to lifetime and comb structures. ++ * ++ * Revision 1.32 2001/09/08 21:13:34 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.31 2001/06/14 19:35:16 rgb ++ * Update copyright date. ++ * ++ * Revision 1.30 2001/02/27 07:04:52 rgb ++ * Added satype2name prototype. ++ * ++ * Revision 1.29 2001/02/26 19:59:33 rgb ++ * Ditch unused sadb_satype2proto[], replaced by satype2proto(). ++ * ++ * Revision 1.28 2000/10/10 20:10:19 rgb ++ * Added support for debug_ipcomp and debug_verbose to klipsdebug. ++ * ++ * Revision 1.27 2000/09/21 04:20:45 rgb ++ * Fixed array size off-by-one error. (Thanks Svenning!) ++ * ++ * Revision 1.26 2000/09/12 03:26:05 rgb ++ * Added pfkey_acquire prototype. ++ * ++ * Revision 1.25 2000/09/08 19:21:28 rgb ++ * Fix pfkey_prop_build() parameter to be only single indirection. ++ * ++ * Revision 1.24 2000/09/01 18:46:42 rgb ++ * Added a supported algorithms array lists, one per satype and registered ++ * existing algorithms. ++ * Fixed pfkey_list_{insert,remove}_{socket,support}() to allow change to ++ * list. ++ * ++ * Revision 1.23 2000/08/27 01:55:26 rgb ++ * Define OCTETBITS and PFKEYBITS to avoid using 'magic' numbers in code. ++ * ++ * Revision 1.22 2000/08/20 21:39:23 rgb ++ * Added kernel prototypes for kernel funcitions pfkey_upmsg() and ++ * pfkey_expire(). ++ * ++ * Revision 1.21 2000/08/15 17:29:23 rgb ++ * Fixes from SZI to untested pfkey_prop_build(). ++ * ++ * Revision 1.20 2000/05/10 20:14:19 rgb ++ * Fleshed out sensitivity, proposal and supported extensions. ++ * ++ * Revision 1.19 2000/03/16 14:07:23 rgb ++ * Renamed ALIGN macro to avoid fighting with others in kernel. ++ * ++ * Revision 1.18 2000/01/22 23:24:06 rgb ++ * Added prototypes for proto2satype(), satype2proto() and proto2name(). ++ * ++ * Revision 1.17 2000/01/21 06:26:59 rgb ++ * Converted from double tdb arguments to one structure (extr) ++ * containing pointers to all temporary information structures. ++ * Added klipsdebug switching capability. ++ * Dropped unused argument to pfkey_x_satype_build(). ++ * ++ * Revision 1.16 1999/12/29 21:17:41 rgb ++ * Changed pfkey_msg_build() I/F to include a struct sadb_msg** ++ * parameter for cleaner manipulation of extensions[] and to guard ++ * against potential memory leaks. ++ * Changed the I/F to pfkey_msg_free() for the same reason. ++ * ++ * Revision 1.15 1999/12/09 23:12:54 rgb ++ * Added macro for BITS_PER_OCTET. ++ * Added argument to pfkey_sa_build() to do eroutes. ++ * ++ * Revision 1.14 1999/12/08 20:33:25 rgb ++ * Changed sa_family_t to uint16_t for 2.0.xx compatibility. ++ * ++ * Revision 1.13 1999/12/07 19:53:40 rgb ++ * Removed unused first argument from extension parsers. ++ * Changed __u* types to uint* to avoid use of asm/types.h and ++ * sys/types.h in userspace code. ++ * Added function prototypes for pfkey message and extensions ++ * initialisation and cleanup. ++ * ++ * Revision 1.12 1999/12/01 22:19:38 rgb ++ * Change pfkey_sa_build to accept an SPI in network byte order. ++ * ++ * Revision 1.11 1999/11/27 11:55:26 rgb ++ * Added extern sadb_satype2proto to enable moving protocol lookup table ++ * to lib/pfkey_v2_parse.c. ++ * Delete unused, moved typedefs. ++ * Add argument to pfkey_msg_parse() for direction. ++ * Consolidated the 4 1-d extension bitmap arrays into one 4-d array. ++ * ++ * Revision 1.10 1999/11/23 22:29:21 rgb ++ * This file has been moved in the distribution from klips/net/ipsec to ++ * lib. ++ * Add macros for dealing with alignment and rounding up more opaquely. ++ * The uint_t type defines have been moved to freeswan.h to avoid ++ * chicken-and-egg problems. ++ * Add macros for dealing with alignment and rounding up more opaque. ++ * Added prototypes for using extention header bitmaps. ++ * Added prototypes of all the build functions. ++ * ++ * Revision 1.9 1999/11/20 21:59:48 rgb ++ * Moved socketlist type declarations and prototypes for shared use. ++ * Slightly modified scope of sockaddr_key declaration. ++ * ++ * Revision 1.8 1999/11/17 14:34:25 rgb ++ * Protect sa_family_t from being used in userspace with GLIBC<2. ++ * ++ * Revision 1.7 1999/10/27 19:40:35 rgb ++ * Add a maximum PFKEY packet size macro. ++ * ++ * Revision 1.6 1999/10/26 16:58:58 rgb ++ * Created a sockaddr_key and key_opt socket extension structures. ++ * ++ * Revision 1.5 1999/06/10 05:24:41 rgb ++ * Renamed variables to reduce confusion. ++ * ++ * Revision 1.4 1999/04/29 15:21:11 rgb ++ * Add pfkey support to debugging. ++ * Add return values to init and cleanup functions. ++ * ++ * Revision 1.3 1999/04/15 17:58:07 rgb ++ * Add RCSID labels. ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/pfkeyv2.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,472 @@ ++/* ++ * RCSID $Id: pfkeyv2.h,v 1.31 2005-04-14 01:14:54 mcr Exp $ ++ */ ++ ++/* ++RFC 2367 PF_KEY Key Management API July 1998 ++ ++ ++Appendix D: Sample Header File ++ ++This file defines structures and symbols for the PF_KEY Version 2 ++key management interface. It was written at the U.S. Naval Research ++Laboratory. This file is in the public domain. The authors ask that ++you leave this credit intact on any copies of this file. ++*/ ++#ifndef __PFKEY_V2_H ++#define __PFKEY_V2_H 1 ++ ++#define PF_KEY_V2 2 ++#define PFKEYV2_REVISION 199806L ++ ++#define SADB_RESERVED 0 ++#define SADB_GETSPI 1 ++#define SADB_UPDATE 2 ++#define SADB_ADD 3 ++#define SADB_DELETE 4 ++#define SADB_GET 5 ++#define SADB_ACQUIRE 6 ++#define SADB_REGISTER 7 ++#define SADB_EXPIRE 8 ++#define SADB_FLUSH 9 ++#define SADB_DUMP 10 ++#define SADB_X_PROMISC 11 ++#define SADB_X_PCHANGE 12 ++#define SADB_X_GRPSA 13 ++#define SADB_X_ADDFLOW 14 ++#define SADB_X_DELFLOW 15 ++#define SADB_X_DEBUG 16 ++#define SADB_X_NAT_T_NEW_MAPPING 17 ++#define SADB_MAX 17 ++ ++struct sadb_msg { ++ uint8_t sadb_msg_version; ++ uint8_t sadb_msg_type; ++ uint8_t sadb_msg_errno; ++ uint8_t sadb_msg_satype; ++ uint16_t sadb_msg_len; ++ uint16_t sadb_msg_reserved; ++ uint32_t sadb_msg_seq; ++ uint32_t sadb_msg_pid; ++}; ++ ++struct sadb_ext { ++ uint16_t sadb_ext_len; ++ uint16_t sadb_ext_type; ++}; ++ ++struct sadb_sa { ++ uint16_t sadb_sa_len; ++ uint16_t sadb_sa_exttype; ++ uint32_t sadb_sa_spi; ++ uint8_t sadb_sa_replay; ++ uint8_t sadb_sa_state; ++ uint8_t sadb_sa_auth; ++ uint8_t sadb_sa_encrypt; ++ uint32_t sadb_sa_flags; ++ uint32_t /*IPsecSAref_t*/ sadb_x_sa_ref; /* 32 bits */ ++ uint8_t sadb_x_reserved[4]; ++}; ++ ++struct sadb_sa_v1 { ++ uint16_t sadb_sa_len; ++ uint16_t sadb_sa_exttype; ++ uint32_t sadb_sa_spi; ++ uint8_t sadb_sa_replay; ++ uint8_t sadb_sa_state; ++ uint8_t sadb_sa_auth; ++ uint8_t sadb_sa_encrypt; ++ uint32_t sadb_sa_flags; ++}; ++ ++struct sadb_lifetime { ++ uint16_t sadb_lifetime_len; ++ uint16_t sadb_lifetime_exttype; ++ uint32_t sadb_lifetime_allocations; ++ uint64_t sadb_lifetime_bytes; ++ uint64_t sadb_lifetime_addtime; ++ uint64_t sadb_lifetime_usetime; ++ uint32_t sadb_x_lifetime_packets; ++ uint32_t sadb_x_lifetime_reserved; ++}; ++ ++struct sadb_address { ++ uint16_t sadb_address_len; ++ uint16_t sadb_address_exttype; ++ uint8_t sadb_address_proto; ++ uint8_t sadb_address_prefixlen; ++ uint16_t sadb_address_reserved; ++}; ++ ++struct sadb_key { ++ uint16_t sadb_key_len; ++ uint16_t sadb_key_exttype; ++ uint16_t sadb_key_bits; ++ uint16_t sadb_key_reserved; ++}; ++ ++struct sadb_ident { ++ uint16_t sadb_ident_len; ++ uint16_t sadb_ident_exttype; ++ uint16_t sadb_ident_type; ++ uint16_t sadb_ident_reserved; ++ uint64_t sadb_ident_id; ++}; ++ ++struct sadb_sens { ++ uint16_t sadb_sens_len; ++ uint16_t sadb_sens_exttype; ++ uint32_t sadb_sens_dpd; ++ uint8_t sadb_sens_sens_level; ++ uint8_t sadb_sens_sens_len; ++ uint8_t sadb_sens_integ_level; ++ uint8_t sadb_sens_integ_len; ++ uint32_t sadb_sens_reserved; ++}; ++ ++struct sadb_prop { ++ uint16_t sadb_prop_len; ++ uint16_t sadb_prop_exttype; ++ uint8_t sadb_prop_replay; ++ uint8_t sadb_prop_reserved[3]; ++}; ++ ++struct sadb_comb { ++ uint8_t sadb_comb_auth; ++ uint8_t sadb_comb_encrypt; ++ uint16_t sadb_comb_flags; ++ uint16_t sadb_comb_auth_minbits; ++ uint16_t sadb_comb_auth_maxbits; ++ uint16_t sadb_comb_encrypt_minbits; ++ uint16_t sadb_comb_encrypt_maxbits; ++ uint32_t sadb_comb_reserved; ++ uint32_t sadb_comb_soft_allocations; ++ uint32_t sadb_comb_hard_allocations; ++ uint64_t sadb_comb_soft_bytes; ++ uint64_t sadb_comb_hard_bytes; ++ uint64_t sadb_comb_soft_addtime; ++ uint64_t sadb_comb_hard_addtime; ++ uint64_t sadb_comb_soft_usetime; ++ uint64_t sadb_comb_hard_usetime; ++ uint32_t sadb_x_comb_soft_packets; ++ uint32_t sadb_x_comb_hard_packets; ++}; ++ ++struct sadb_supported { ++ uint16_t sadb_supported_len; ++ uint16_t sadb_supported_exttype; ++ uint32_t sadb_supported_reserved; ++}; ++ ++struct sadb_alg { ++ uint8_t sadb_alg_id; ++ uint8_t sadb_alg_ivlen; ++ uint16_t sadb_alg_minbits; ++ uint16_t sadb_alg_maxbits; ++ uint16_t sadb_alg_reserved; ++}; ++ ++struct sadb_spirange { ++ uint16_t sadb_spirange_len; ++ uint16_t sadb_spirange_exttype; ++ uint32_t sadb_spirange_min; ++ uint32_t sadb_spirange_max; ++ uint32_t sadb_spirange_reserved; ++}; ++ ++struct sadb_x_kmprivate { ++ uint16_t sadb_x_kmprivate_len; ++ uint16_t sadb_x_kmprivate_exttype; ++ uint32_t sadb_x_kmprivate_reserved; ++}; ++ ++struct sadb_x_satype { ++ uint16_t sadb_x_satype_len; ++ uint16_t sadb_x_satype_exttype; ++ uint8_t sadb_x_satype_satype; ++ uint8_t sadb_x_satype_reserved[3]; ++}; ++ ++struct sadb_x_policy { ++ uint16_t sadb_x_policy_len; ++ uint16_t sadb_x_policy_exttype; ++ uint16_t sadb_x_policy_type; ++ uint8_t sadb_x_policy_dir; ++ uint8_t sadb_x_policy_reserved; ++ uint32_t sadb_x_policy_id; ++ uint32_t sadb_x_policy_reserved2; ++}; ++ ++struct sadb_x_debug { ++ uint16_t sadb_x_debug_len; ++ uint16_t sadb_x_debug_exttype; ++ uint32_t sadb_x_debug_tunnel; ++ uint32_t sadb_x_debug_netlink; ++ uint32_t sadb_x_debug_xform; ++ uint32_t sadb_x_debug_eroute; ++ uint32_t sadb_x_debug_spi; ++ uint32_t sadb_x_debug_radij; ++ uint32_t sadb_x_debug_esp; ++ uint32_t sadb_x_debug_ah; ++ uint32_t sadb_x_debug_rcv; ++ uint32_t sadb_x_debug_pfkey; ++ uint32_t sadb_x_debug_ipcomp; ++ uint32_t sadb_x_debug_verbose; ++ uint8_t sadb_x_debug_reserved[4]; ++}; ++ ++struct sadb_x_nat_t_type { ++ uint16_t sadb_x_nat_t_type_len; ++ uint16_t sadb_x_nat_t_type_exttype; ++ uint8_t sadb_x_nat_t_type_type; ++ uint8_t sadb_x_nat_t_type_reserved[3]; ++}; ++struct sadb_x_nat_t_port { ++ uint16_t sadb_x_nat_t_port_len; ++ uint16_t sadb_x_nat_t_port_exttype; ++ uint16_t sadb_x_nat_t_port_port; ++ uint16_t sadb_x_nat_t_port_reserved; ++}; ++ ++/* ++ * A protocol structure for passing through the transport level ++ * protocol. It contains more fields than are actually used/needed ++ * but it is this way to be compatible with the structure used in ++ * OpenBSD (http://www.openbsd.org/cgi-bin/cvsweb/src/sys/net/pfkeyv2.h) ++ */ ++struct sadb_protocol { ++ uint16_t sadb_protocol_len; ++ uint16_t sadb_protocol_exttype; ++ uint8_t sadb_protocol_proto; ++ uint8_t sadb_protocol_direction; ++ uint8_t sadb_protocol_flags; ++ uint8_t sadb_protocol_reserved2; ++}; ++ ++#define SADB_EXT_RESERVED 0 ++#define SADB_EXT_SA 1 ++#define SADB_EXT_LIFETIME_CURRENT 2 ++#define SADB_EXT_LIFETIME_HARD 3 ++#define SADB_EXT_LIFETIME_SOFT 4 ++#define SADB_EXT_ADDRESS_SRC 5 ++#define SADB_EXT_ADDRESS_DST 6 ++#define SADB_EXT_ADDRESS_PROXY 7 ++#define SADB_EXT_KEY_AUTH 8 ++#define SADB_EXT_KEY_ENCRYPT 9 ++#define SADB_EXT_IDENTITY_SRC 10 ++#define SADB_EXT_IDENTITY_DST 11 ++#define SADB_EXT_SENSITIVITY 12 ++#define SADB_EXT_PROPOSAL 13 ++#define SADB_EXT_SUPPORTED_AUTH 14 ++#define SADB_EXT_SUPPORTED_ENCRYPT 15 ++#define SADB_EXT_SPIRANGE 16 ++#define SADB_X_EXT_KMPRIVATE 17 ++#define SADB_X_EXT_SATYPE2 18 ++#ifdef KERNEL26_HAS_KAME_DUPLICATES ++#define SADB_X_EXT_POLICY 18 ++#endif ++#define SADB_X_EXT_SA2 19 ++#define SADB_X_EXT_ADDRESS_DST2 20 ++#define SADB_X_EXT_ADDRESS_SRC_FLOW 21 ++#define SADB_X_EXT_ADDRESS_DST_FLOW 22 ++#define SADB_X_EXT_ADDRESS_SRC_MASK 23 ++#define SADB_X_EXT_ADDRESS_DST_MASK 24 ++#define SADB_X_EXT_DEBUG 25 ++#define SADB_X_EXT_PROTOCOL 26 ++#define SADB_X_EXT_NAT_T_TYPE 27 ++#define SADB_X_EXT_NAT_T_SPORT 28 ++#define SADB_X_EXT_NAT_T_DPORT 29 ++#define SADB_X_EXT_NAT_T_OA 30 ++#define SADB_EXT_MAX 30 ++ ++/* SADB_X_DELFLOW required over and above SADB_X_SAFLAGS_CLEARFLOW */ ++#define SADB_X_EXT_ADDRESS_DELFLOW \ ++ ( (1< ++# define ZEXPORT WINAPI ++# ifdef WIN32 ++# define ZEXPORTVA WINAPIV ++# else ++# define ZEXPORTVA FAR _cdecl _export ++# endif ++# endif ++# if defined (__BORLANDC__) ++# if (__BORLANDC__ >= 0x0500) && defined (WIN32) ++# include ++# define ZEXPORT __declspec(dllexport) WINAPI ++# define ZEXPORTRVA __declspec(dllexport) WINAPIV ++# else ++# if defined (_Windows) && defined (__DLL__) ++# define ZEXPORT _export ++# define ZEXPORTVA _export ++# endif ++# endif ++# endif ++#endif ++ ++#if defined (__BEOS__) ++# if defined (ZLIB_DLL) ++# define ZEXTERN extern __declspec(dllexport) ++# else ++# define ZEXTERN extern __declspec(dllimport) ++# endif ++#endif ++ ++#ifndef ZEXPORT ++# define ZEXPORT ++#endif ++#ifndef ZEXPORTVA ++# define ZEXPORTVA ++#endif ++#ifndef ZEXTERN ++# define ZEXTERN extern ++#endif ++ ++#ifndef FAR ++# define FAR ++#endif ++ ++#if !defined(MACOS) && !defined(TARGET_OS_MAC) ++typedef unsigned char Byte; /* 8 bits */ ++#endif ++typedef unsigned int uInt; /* 16 bits or more */ ++typedef unsigned long uLong; /* 32 bits or more */ ++ ++#ifdef SMALL_MEDIUM ++ /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ ++# define Bytef Byte FAR ++#else ++ typedef Byte FAR Bytef; ++#endif ++typedef char FAR charf; ++typedef int FAR intf; ++typedef uInt FAR uIntf; ++typedef uLong FAR uLongf; ++ ++#ifdef STDC ++ typedef void FAR *voidpf; ++ typedef void *voidp; ++#else ++ typedef Byte FAR *voidpf; ++ typedef Byte *voidp; ++#endif ++ ++#ifdef HAVE_UNISTD_H ++# include /* for off_t */ ++# include /* for SEEK_* and off_t */ ++# define z_off_t off_t ++#endif ++#ifndef SEEK_SET ++# define SEEK_SET 0 /* Seek from beginning of file. */ ++# define SEEK_CUR 1 /* Seek from current position. */ ++# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ ++#endif ++#ifndef z_off_t ++# define z_off_t long ++#endif ++ ++/* MVS linker does not support external names larger than 8 bytes */ ++#if defined(__MVS__) ++# pragma map(deflateInit_,"DEIN") ++# pragma map(deflateInit2_,"DEIN2") ++# pragma map(deflateEnd,"DEEND") ++# pragma map(inflateInit_,"ININ") ++# pragma map(inflateInit2_,"ININ2") ++# pragma map(inflateEnd,"INEND") ++# pragma map(inflateSync,"INSY") ++# pragma map(inflateSetDictionary,"INSEDI") ++# pragma map(inflate_blocks,"INBL") ++# pragma map(inflate_blocks_new,"INBLNE") ++# pragma map(inflate_blocks_free,"INBLFR") ++# pragma map(inflate_blocks_reset,"INBLRE") ++# pragma map(inflate_codes_free,"INCOFR") ++# pragma map(inflate_codes,"INCO") ++# pragma map(inflate_fast,"INFA") ++# pragma map(inflate_flush,"INFLU") ++# pragma map(inflate_mask,"INMA") ++# pragma map(inflate_set_dictionary,"INSEDI2") ++# pragma map(ipcomp_inflate_copyright,"INCOPY") ++# pragma map(inflate_trees_bits,"INTRBI") ++# pragma map(inflate_trees_dynamic,"INTRDY") ++# pragma map(inflate_trees_fixed,"INTRFI") ++# pragma map(inflate_trees_free,"INTRFR") ++#endif ++ ++#endif /* _ZCONF_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/zlib/zlib.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,893 @@ ++/* zlib.h -- interface of the 'zlib' general purpose compression library ++ version 1.1.4, March 11th, 2002 ++ ++ Copyright (C) 1995-2002 Jean-loup Gailly and Mark Adler ++ ++ This software is provided 'as-is', without any express or implied ++ warranty. In no event will the authors be held liable for any damages ++ arising from the use of this software. ++ ++ Permission is granted to anyone to use this software for any purpose, ++ including commercial applications, and to alter it and redistribute it ++ freely, subject to the following restrictions: ++ ++ 1. The origin of this software must not be misrepresented; you must not ++ claim that you wrote the original software. If you use this software ++ in a product, an acknowledgment in the product documentation would be ++ appreciated but is not required. ++ 2. Altered source versions must be plainly marked as such, and must not be ++ misrepresented as being the original software. ++ 3. This notice may not be removed or altered from any source distribution. ++ ++ Jean-loup Gailly Mark Adler ++ jloup@gzip.org madler@alumni.caltech.edu ++ ++ ++ The data format used by the zlib library is described by RFCs (Request for ++ Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt ++ (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). ++*/ ++ ++#ifndef _ZLIB_H ++#define _ZLIB_H ++ ++#include "zconf.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define ZLIB_VERSION "1.1.4" ++ ++/* ++ The 'zlib' compression library provides in-memory compression and ++ decompression functions, including integrity checks of the uncompressed ++ data. This version of the library supports only one compression method ++ (deflation) but other algorithms will be added later and will have the same ++ stream interface. ++ ++ Compression can be done in a single step if the buffers are large ++ enough (for example if an input file is mmap'ed), or can be done by ++ repeated calls of the compression function. In the latter case, the ++ application must provide more input and/or consume the output ++ (providing more output space) before each call. ++ ++ The library also supports reading and writing files in gzip (.gz) format ++ with an interface similar to that of stdio. ++ ++ The library does not install any signal handler. The decoder checks ++ the consistency of the compressed data, so the library should never ++ crash even in case of corrupted input. ++*/ ++ ++typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); ++typedef void (*free_func) OF((voidpf opaque, voidpf address)); ++ ++struct internal_state; ++ ++typedef struct z_stream_s { ++ Bytef *next_in; /* next input byte */ ++ uInt avail_in; /* number of bytes available at next_in */ ++ uLong total_in; /* total nb of input bytes read so far */ ++ ++ Bytef *next_out; /* next output byte should be put there */ ++ uInt avail_out; /* remaining free space at next_out */ ++ uLong total_out; /* total nb of bytes output so far */ ++ ++ const char *msg; /* last error message, NULL if no error */ ++ struct internal_state FAR *state; /* not visible by applications */ ++ ++ alloc_func zalloc; /* used to allocate the internal state */ ++ free_func zfree; /* used to free the internal state */ ++ voidpf opaque; /* private data object passed to zalloc and zfree */ ++ ++ int data_type; /* best guess about the data type: ascii or binary */ ++ uLong adler; /* adler32 value of the uncompressed data */ ++ uLong reserved; /* reserved for future use */ ++} z_stream; ++ ++typedef z_stream FAR *z_streamp; ++ ++/* ++ The application must update next_in and avail_in when avail_in has ++ dropped to zero. It must update next_out and avail_out when avail_out ++ has dropped to zero. The application must initialize zalloc, zfree and ++ opaque before calling the init function. All other fields are set by the ++ compression library and must not be updated by the application. ++ ++ The opaque value provided by the application will be passed as the first ++ parameter for calls of zalloc and zfree. This can be useful for custom ++ memory management. The compression library attaches no meaning to the ++ opaque value. ++ ++ zalloc must return Z_NULL if there is not enough memory for the object. ++ If zlib is used in a multi-threaded application, zalloc and zfree must be ++ thread safe. ++ ++ On 16-bit systems, the functions zalloc and zfree must be able to allocate ++ exactly 65536 bytes, but will not be required to allocate more than this ++ if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, ++ pointers returned by zalloc for objects of exactly 65536 bytes *must* ++ have their offset normalized to zero. The default allocation function ++ provided by this library ensures this (see zutil.c). To reduce memory ++ requirements and avoid any allocation of 64K objects, at the expense of ++ compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). ++ ++ The fields total_in and total_out can be used for statistics or ++ progress reports. After compression, total_in holds the total size of ++ the uncompressed data and may be saved for use in the decompressor ++ (particularly if the decompressor wants to decompress everything in ++ a single step). ++*/ ++ ++ /* constants */ ++ ++#define Z_NO_FLUSH 0 ++#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ ++#define Z_SYNC_FLUSH 2 ++#define Z_FULL_FLUSH 3 ++#define Z_FINISH 4 ++/* Allowed flush values; see deflate() below for details */ ++ ++#define Z_OK 0 ++#define Z_STREAM_END 1 ++#define Z_NEED_DICT 2 ++#define Z_ERRNO (-1) ++#define Z_STREAM_ERROR (-2) ++#define Z_DATA_ERROR (-3) ++#define Z_MEM_ERROR (-4) ++#define Z_BUF_ERROR (-5) ++#define Z_VERSION_ERROR (-6) ++/* Return codes for the compression/decompression functions. Negative ++ * values are errors, positive values are used for special but normal events. ++ */ ++ ++#define Z_NO_COMPRESSION 0 ++#define Z_BEST_SPEED 1 ++#define Z_BEST_COMPRESSION 9 ++#define Z_DEFAULT_COMPRESSION (-1) ++/* compression levels */ ++ ++#define Z_FILTERED 1 ++#define Z_HUFFMAN_ONLY 2 ++#define Z_DEFAULT_STRATEGY 0 ++/* compression strategy; see deflateInit2() below for details */ ++ ++#define Z_BINARY 0 ++#define Z_ASCII 1 ++#define Z_UNKNOWN 2 ++/* Possible values of the data_type field */ ++ ++#define Z_DEFLATED 8 ++/* The deflate compression method (the only one supported in this version) */ ++ ++#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ ++ ++#define zlib_version zlibVersion() ++/* for compatibility with versions < 1.0.2 */ ++ ++ /* basic functions */ ++ ++ZEXTERN const char * ZEXPORT zlibVersion OF((void)); ++/* The application can compare zlibVersion and ZLIB_VERSION for consistency. ++ If the first character differs, the library code actually used is ++ not compatible with the zlib.h header file used by the application. ++ This check is automatically made by deflateInit and inflateInit. ++ */ ++ ++/* ++ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); ++ ++ Initializes the internal stream state for compression. The fields ++ zalloc, zfree and opaque must be initialized before by the caller. ++ If zalloc and zfree are set to Z_NULL, deflateInit updates them to ++ use default allocation functions. ++ ++ The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: ++ 1 gives best speed, 9 gives best compression, 0 gives no compression at ++ all (the input data is simply copied a block at a time). ++ Z_DEFAULT_COMPRESSION requests a default compromise between speed and ++ compression (currently equivalent to level 6). ++ ++ deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_STREAM_ERROR if level is not a valid compression level, ++ Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible ++ with the version assumed by the caller (ZLIB_VERSION). ++ msg is set to null if there is no error message. deflateInit does not ++ perform any compression: this will be done by deflate(). ++*/ ++ ++ ++ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); ++/* ++ deflate compresses as much data as possible, and stops when the input ++ buffer becomes empty or the output buffer becomes full. It may introduce some ++ output latency (reading input without producing any output) except when ++ forced to flush. ++ ++ The detailed semantics are as follows. deflate performs one or both of the ++ following actions: ++ ++ - Compress more input starting at next_in and update next_in and avail_in ++ accordingly. If not all input can be processed (because there is not ++ enough room in the output buffer), next_in and avail_in are updated and ++ processing will resume at this point for the next call of deflate(). ++ ++ - Provide more output starting at next_out and update next_out and avail_out ++ accordingly. This action is forced if the parameter flush is non zero. ++ Forcing flush frequently degrades the compression ratio, so this parameter ++ should be set only when necessary (in interactive applications). ++ Some output may be provided even if flush is not set. ++ ++ Before the call of deflate(), the application should ensure that at least ++ one of the actions is possible, by providing more input and/or consuming ++ more output, and updating avail_in or avail_out accordingly; avail_out ++ should never be zero before the call. The application can consume the ++ compressed output when it wants, for example when the output buffer is full ++ (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK ++ and with zero avail_out, it must be called again after making room in the ++ output buffer because there might be more output pending. ++ ++ If the parameter flush is set to Z_SYNC_FLUSH, all pending output is ++ flushed to the output buffer and the output is aligned on a byte boundary, so ++ that the decompressor can get all input data available so far. (In particular ++ avail_in is zero after the call if enough output space has been provided ++ before the call.) Flushing may degrade compression for some compression ++ algorithms and so it should be used only when necessary. ++ ++ If flush is set to Z_FULL_FLUSH, all output is flushed as with ++ Z_SYNC_FLUSH, and the compression state is reset so that decompression can ++ restart from this point if previous compressed data has been damaged or if ++ random access is desired. Using Z_FULL_FLUSH too often can seriously degrade ++ the compression. ++ ++ If deflate returns with avail_out == 0, this function must be called again ++ with the same value of the flush parameter and more output space (updated ++ avail_out), until the flush is complete (deflate returns with non-zero ++ avail_out). ++ ++ If the parameter flush is set to Z_FINISH, pending input is processed, ++ pending output is flushed and deflate returns with Z_STREAM_END if there ++ was enough output space; if deflate returns with Z_OK, this function must be ++ called again with Z_FINISH and more output space (updated avail_out) but no ++ more input data, until it returns with Z_STREAM_END or an error. After ++ deflate has returned Z_STREAM_END, the only possible operations on the ++ stream are deflateReset or deflateEnd. ++ ++ Z_FINISH can be used immediately after deflateInit if all the compression ++ is to be done in a single step. In this case, avail_out must be at least ++ 0.1% larger than avail_in plus 12 bytes. If deflate does not return ++ Z_STREAM_END, then it must be called again as described above. ++ ++ deflate() sets strm->adler to the adler32 checksum of all input read ++ so far (that is, total_in bytes). ++ ++ deflate() may update data_type if it can make a good guess about ++ the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered ++ binary. This field is only for information purposes and does not affect ++ the compression algorithm in any manner. ++ ++ deflate() returns Z_OK if some progress has been made (more input ++ processed or more output produced), Z_STREAM_END if all input has been ++ consumed and all output has been produced (only when flush is set to ++ Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example ++ if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible ++ (for example avail_in or avail_out was zero). ++*/ ++ ++ ++ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); ++/* ++ All dynamically allocated data structures for this stream are freed. ++ This function discards any unprocessed input and does not flush any ++ pending output. ++ ++ deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the ++ stream state was inconsistent, Z_DATA_ERROR if the stream was freed ++ prematurely (some input or output was discarded). In the error case, ++ msg may be set but then points to a static string (which must not be ++ deallocated). ++*/ ++ ++ ++/* ++ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); ++ ++ Initializes the internal stream state for decompression. The fields ++ next_in, avail_in, zalloc, zfree and opaque must be initialized before by ++ the caller. If next_in is not Z_NULL and avail_in is large enough (the exact ++ value depends on the compression method), inflateInit determines the ++ compression method from the zlib header and allocates all data structures ++ accordingly; otherwise the allocation will be deferred to the first call of ++ inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to ++ use default allocation functions. ++ ++ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the ++ version assumed by the caller. msg is set to null if there is no error ++ message. inflateInit does not perform any decompression apart from reading ++ the zlib header if present: this will be done by inflate(). (So next_in and ++ avail_in may be modified, but next_out and avail_out are unchanged.) ++*/ ++ ++ ++ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); ++/* ++ inflate decompresses as much data as possible, and stops when the input ++ buffer becomes empty or the output buffer becomes full. It may some ++ introduce some output latency (reading input without producing any output) ++ except when forced to flush. ++ ++ The detailed semantics are as follows. inflate performs one or both of the ++ following actions: ++ ++ - Decompress more input starting at next_in and update next_in and avail_in ++ accordingly. If not all input can be processed (because there is not ++ enough room in the output buffer), next_in is updated and processing ++ will resume at this point for the next call of inflate(). ++ ++ - Provide more output starting at next_out and update next_out and avail_out ++ accordingly. inflate() provides as much output as possible, until there ++ is no more input data or no more space in the output buffer (see below ++ about the flush parameter). ++ ++ Before the call of inflate(), the application should ensure that at least ++ one of the actions is possible, by providing more input and/or consuming ++ more output, and updating the next_* and avail_* values accordingly. ++ The application can consume the uncompressed output when it wants, for ++ example when the output buffer is full (avail_out == 0), or after each ++ call of inflate(). If inflate returns Z_OK and with zero avail_out, it ++ must be called again after making room in the output buffer because there ++ might be more output pending. ++ ++ If the parameter flush is set to Z_SYNC_FLUSH, inflate flushes as much ++ output as possible to the output buffer. The flushing behavior of inflate is ++ not specified for values of the flush parameter other than Z_SYNC_FLUSH ++ and Z_FINISH, but the current implementation actually flushes as much output ++ as possible anyway. ++ ++ inflate() should normally be called until it returns Z_STREAM_END or an ++ error. However if all decompression is to be performed in a single step ++ (a single call of inflate), the parameter flush should be set to ++ Z_FINISH. In this case all pending input is processed and all pending ++ output is flushed; avail_out must be large enough to hold all the ++ uncompressed data. (The size of the uncompressed data may have been saved ++ by the compressor for this purpose.) The next operation on this stream must ++ be inflateEnd to deallocate the decompression state. The use of Z_FINISH ++ is never required, but can be used to inform inflate that a faster routine ++ may be used for the single inflate() call. ++ ++ If a preset dictionary is needed at this point (see inflateSetDictionary ++ below), inflate sets strm-adler to the adler32 checksum of the ++ dictionary chosen by the compressor and returns Z_NEED_DICT; otherwise ++ it sets strm->adler to the adler32 checksum of all output produced ++ so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or ++ an error code as described below. At the end of the stream, inflate() ++ checks that its computed adler32 checksum is equal to that saved by the ++ compressor and returns Z_STREAM_END only if the checksum is correct. ++ ++ inflate() returns Z_OK if some progress has been made (more input processed ++ or more output produced), Z_STREAM_END if the end of the compressed data has ++ been reached and all uncompressed output has been produced, Z_NEED_DICT if a ++ preset dictionary is needed at this point, Z_DATA_ERROR if the input data was ++ corrupted (input stream not conforming to the zlib format or incorrect ++ adler32 checksum), Z_STREAM_ERROR if the stream structure was inconsistent ++ (for example if next_in or next_out was NULL), Z_MEM_ERROR if there was not ++ enough memory, Z_BUF_ERROR if no progress is possible or if there was not ++ enough room in the output buffer when Z_FINISH is used. In the Z_DATA_ERROR ++ case, the application may then call inflateSync to look for a good ++ compression block. ++*/ ++ ++ ++ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); ++/* ++ All dynamically allocated data structures for this stream are freed. ++ This function discards any unprocessed input and does not flush any ++ pending output. ++ ++ inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state ++ was inconsistent. In the error case, msg may be set but then points to a ++ static string (which must not be deallocated). ++*/ ++ ++ /* Advanced functions */ ++ ++/* ++ The following functions are needed only in some special applications. ++*/ ++ ++/* ++ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, ++ int level, ++ int method, ++ int windowBits, ++ int memLevel, ++ int strategy)); ++ ++ This is another version of deflateInit with more compression options. The ++ fields next_in, zalloc, zfree and opaque must be initialized before by ++ the caller. ++ ++ The method parameter is the compression method. It must be Z_DEFLATED in ++ this version of the library. ++ ++ The windowBits parameter is the base two logarithm of the window size ++ (the size of the history buffer). It should be in the range 8..15 for this ++ version of the library. Larger values of this parameter result in better ++ compression at the expense of memory usage. The default value is 15 if ++ deflateInit is used instead. ++ ++ The memLevel parameter specifies how much memory should be allocated ++ for the internal compression state. memLevel=1 uses minimum memory but ++ is slow and reduces compression ratio; memLevel=9 uses maximum memory ++ for optimal speed. The default value is 8. See zconf.h for total memory ++ usage as a function of windowBits and memLevel. ++ ++ The strategy parameter is used to tune the compression algorithm. Use the ++ value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a ++ filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no ++ string match). Filtered data consists mostly of small values with a ++ somewhat random distribution. In this case, the compression algorithm is ++ tuned to compress them better. The effect of Z_FILTERED is to force more ++ Huffman coding and less string matching; it is somewhat intermediate ++ between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects ++ the compression ratio but not the correctness of the compressed output even ++ if it is not set appropriately. ++ ++ deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid ++ method). msg is set to null if there is no error message. deflateInit2 does ++ not perform any compression: this will be done by deflate(). ++*/ ++ ++ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, ++ const Bytef *dictionary, ++ uInt dictLength)); ++/* ++ Initializes the compression dictionary from the given byte sequence ++ without producing any compressed output. This function must be called ++ immediately after deflateInit, deflateInit2 or deflateReset, before any ++ call of deflate. The compressor and decompressor must use exactly the same ++ dictionary (see inflateSetDictionary). ++ ++ The dictionary should consist of strings (byte sequences) that are likely ++ to be encountered later in the data to be compressed, with the most commonly ++ used strings preferably put towards the end of the dictionary. Using a ++ dictionary is most useful when the data to be compressed is short and can be ++ predicted with good accuracy; the data can then be compressed better than ++ with the default empty dictionary. ++ ++ Depending on the size of the compression data structures selected by ++ deflateInit or deflateInit2, a part of the dictionary may in effect be ++ discarded, for example if the dictionary is larger than the window size in ++ deflate or deflate2. Thus the strings most likely to be useful should be ++ put at the end of the dictionary, not at the front. ++ ++ Upon return of this function, strm->adler is set to the Adler32 value ++ of the dictionary; the decompressor may later use this value to determine ++ which dictionary has been used by the compressor. (The Adler32 value ++ applies to the whole dictionary even if only a subset of the dictionary is ++ actually used by the compressor.) ++ ++ deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a ++ parameter is invalid (such as NULL dictionary) or the stream state is ++ inconsistent (for example if deflate has already been called for this stream ++ or if the compression method is bsort). deflateSetDictionary does not ++ perform any compression: this will be done by deflate(). ++*/ ++ ++ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, ++ z_streamp source)); ++/* ++ Sets the destination stream as a complete copy of the source stream. ++ ++ This function can be useful when several compression strategies will be ++ tried, for example when there are several ways of pre-processing the input ++ data with a filter. The streams that will be discarded should then be freed ++ by calling deflateEnd. Note that deflateCopy duplicates the internal ++ compression state which can be quite large, so this strategy is slow and ++ can consume lots of memory. ++ ++ deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent ++ (such as zalloc being NULL). msg is left unchanged in both source and ++ destination. ++*/ ++ ++ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); ++/* ++ This function is equivalent to deflateEnd followed by deflateInit, ++ but does not free and reallocate all the internal compression state. ++ The stream will keep the same compression level and any other attributes ++ that may have been set by deflateInit2. ++ ++ deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent (such as zalloc or state being NULL). ++*/ ++ ++ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, ++ int level, ++ int strategy)); ++/* ++ Dynamically update the compression level and compression strategy. The ++ interpretation of level and strategy is as in deflateInit2. This can be ++ used to switch between compression and straight copy of the input data, or ++ to switch to a different kind of input data requiring a different ++ strategy. If the compression level is changed, the input available so far ++ is compressed with the old level (and may be flushed); the new level will ++ take effect only at the next call of deflate(). ++ ++ Before the call of deflateParams, the stream state must be set as for ++ a call of deflate(), since the currently available input may have to ++ be compressed and flushed. In particular, strm->avail_out must be non-zero. ++ ++ deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source ++ stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR ++ if strm->avail_out was zero. ++*/ ++ ++/* ++ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, ++ int windowBits)); ++ ++ This is another version of inflateInit with an extra parameter. The ++ fields next_in, avail_in, zalloc, zfree and opaque must be initialized ++ before by the caller. ++ ++ The windowBits parameter is the base two logarithm of the maximum window ++ size (the size of the history buffer). It should be in the range 8..15 for ++ this version of the library. The default value is 15 if inflateInit is used ++ instead. If a compressed stream with a larger window size is given as ++ input, inflate() will return with the error code Z_DATA_ERROR instead of ++ trying to allocate a larger window. ++ ++ inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_STREAM_ERROR if a parameter is invalid (such as a negative ++ memLevel). msg is set to null if there is no error message. inflateInit2 ++ does not perform any decompression apart from reading the zlib header if ++ present: this will be done by inflate(). (So next_in and avail_in may be ++ modified, but next_out and avail_out are unchanged.) ++*/ ++ ++ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, ++ const Bytef *dictionary, ++ uInt dictLength)); ++/* ++ Initializes the decompression dictionary from the given uncompressed byte ++ sequence. This function must be called immediately after a call of inflate ++ if this call returned Z_NEED_DICT. The dictionary chosen by the compressor ++ can be determined from the Adler32 value returned by this call of ++ inflate. The compressor and decompressor must use exactly the same ++ dictionary (see deflateSetDictionary). ++ ++ inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a ++ parameter is invalid (such as NULL dictionary) or the stream state is ++ inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the ++ expected one (incorrect Adler32 value). inflateSetDictionary does not ++ perform any decompression: this will be done by subsequent calls of ++ inflate(). ++*/ ++ ++ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); ++/* ++ Skips invalid compressed data until a full flush point (see above the ++ description of deflate with Z_FULL_FLUSH) can be found, or until all ++ available input is skipped. No output is provided. ++ ++ inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR ++ if no more input was provided, Z_DATA_ERROR if no flush point has been found, ++ or Z_STREAM_ERROR if the stream structure was inconsistent. In the success ++ case, the application may save the current current value of total_in which ++ indicates where valid compressed data was found. In the error case, the ++ application may repeatedly call inflateSync, providing more input each time, ++ until success or end of the input data. ++*/ ++ ++ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); ++/* ++ This function is equivalent to inflateEnd followed by inflateInit, ++ but does not free and reallocate all the internal decompression state. ++ The stream will keep attributes that may have been set by inflateInit2. ++ ++ inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent (such as zalloc or state being NULL). ++*/ ++ ++ ++ /* utility functions */ ++ ++/* ++ The following utility functions are implemented on top of the ++ basic stream-oriented functions. To simplify the interface, some ++ default options are assumed (compression level and memory usage, ++ standard memory allocation functions). The source code of these ++ utility functions can easily be modified if you need special options. ++*/ ++ ++ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, ++ const Bytef *source, uLong sourceLen)); ++/* ++ Compresses the source buffer into the destination buffer. sourceLen is ++ the byte length of the source buffer. Upon entry, destLen is the total ++ size of the destination buffer, which must be at least 0.1% larger than ++ sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the ++ compressed buffer. ++ This function can be used to compress a whole file at once if the ++ input file is mmap'ed. ++ compress returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_BUF_ERROR if there was not enough room in the output ++ buffer. ++*/ ++ ++ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, ++ const Bytef *source, uLong sourceLen, ++ int level)); ++/* ++ Compresses the source buffer into the destination buffer. The level ++ parameter has the same meaning as in deflateInit. sourceLen is the byte ++ length of the source buffer. Upon entry, destLen is the total size of the ++ destination buffer, which must be at least 0.1% larger than sourceLen plus ++ 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. ++ ++ compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_BUF_ERROR if there was not enough room in the output buffer, ++ Z_STREAM_ERROR if the level parameter is invalid. ++*/ ++ ++ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, ++ const Bytef *source, uLong sourceLen)); ++/* ++ Decompresses the source buffer into the destination buffer. sourceLen is ++ the byte length of the source buffer. Upon entry, destLen is the total ++ size of the destination buffer, which must be large enough to hold the ++ entire uncompressed data. (The size of the uncompressed data must have ++ been saved previously by the compressor and transmitted to the decompressor ++ by some mechanism outside the scope of this compression library.) ++ Upon exit, destLen is the actual size of the compressed buffer. ++ This function can be used to decompress a whole file at once if the ++ input file is mmap'ed. ++ ++ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_BUF_ERROR if there was not enough room in the output ++ buffer, or Z_DATA_ERROR if the input data was corrupted. ++*/ ++ ++ ++typedef voidp gzFile; ++ ++ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); ++/* ++ Opens a gzip (.gz) file for reading or writing. The mode parameter ++ is as in fopen ("rb" or "wb") but can also include a compression level ++ ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for ++ Huffman only compression as in "wb1h". (See the description ++ of deflateInit2 for more information about the strategy parameter.) ++ ++ gzopen can be used to read a file which is not in gzip format; in this ++ case gzread will directly read from the file without decompression. ++ ++ gzopen returns NULL if the file could not be opened or if there was ++ insufficient memory to allocate the (de)compression state; errno ++ can be checked to distinguish the two cases (if errno is zero, the ++ zlib error is Z_MEM_ERROR). */ ++ ++ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); ++/* ++ gzdopen() associates a gzFile with the file descriptor fd. File ++ descriptors are obtained from calls like open, dup, creat, pipe or ++ fileno (in the file has been previously opened with fopen). ++ The mode parameter is as in gzopen. ++ The next call of gzclose on the returned gzFile will also close the ++ file descriptor fd, just like fclose(fdopen(fd), mode) closes the file ++ descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). ++ gzdopen returns NULL if there was insufficient memory to allocate ++ the (de)compression state. ++*/ ++ ++ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); ++/* ++ Dynamically update the compression level or strategy. See the description ++ of deflateInit2 for the meaning of these parameters. ++ gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not ++ opened for writing. ++*/ ++ ++ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); ++/* ++ Reads the given number of uncompressed bytes from the compressed file. ++ If the input file was not in gzip format, gzread copies the given number ++ of bytes into the buffer. ++ gzread returns the number of uncompressed bytes actually read (0 for ++ end of file, -1 for error). */ ++ ++ZEXTERN int ZEXPORT gzwrite OF((gzFile file, ++ const voidp buf, unsigned len)); ++/* ++ Writes the given number of uncompressed bytes into the compressed file. ++ gzwrite returns the number of uncompressed bytes actually written ++ (0 in case of error). ++*/ ++ ++ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...)); ++/* ++ Converts, formats, and writes the args to the compressed file under ++ control of the format string, as in fprintf. gzprintf returns the number of ++ uncompressed bytes actually written (0 in case of error). ++*/ ++ ++ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); ++/* ++ Writes the given null-terminated string to the compressed file, excluding ++ the terminating null character. ++ gzputs returns the number of characters written, or -1 in case of error. ++*/ ++ ++ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); ++/* ++ Reads bytes from the compressed file until len-1 characters are read, or ++ a newline character is read and transferred to buf, or an end-of-file ++ condition is encountered. The string is then terminated with a null ++ character. ++ gzgets returns buf, or Z_NULL in case of error. ++*/ ++ ++ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); ++/* ++ Writes c, converted to an unsigned char, into the compressed file. ++ gzputc returns the value that was written, or -1 in case of error. ++*/ ++ ++ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); ++/* ++ Reads one byte from the compressed file. gzgetc returns this byte ++ or -1 in case of end of file or error. ++*/ ++ ++ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); ++/* ++ Flushes all pending output into the compressed file. The parameter ++ flush is as in the deflate() function. The return value is the zlib ++ error number (see function gzerror below). gzflush returns Z_OK if ++ the flush parameter is Z_FINISH and all output could be flushed. ++ gzflush should be called only when strictly necessary because it can ++ degrade compression. ++*/ ++ ++ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, ++ z_off_t offset, int whence)); ++/* ++ Sets the starting position for the next gzread or gzwrite on the ++ given compressed file. The offset represents a number of bytes in the ++ uncompressed data stream. The whence parameter is defined as in lseek(2); ++ the value SEEK_END is not supported. ++ If the file is opened for reading, this function is emulated but can be ++ extremely slow. If the file is opened for writing, only forward seeks are ++ supported; gzseek then compresses a sequence of zeroes up to the new ++ starting position. ++ ++ gzseek returns the resulting offset location as measured in bytes from ++ the beginning of the uncompressed stream, or -1 in case of error, in ++ particular if the file is opened for writing and the new starting position ++ would be before the current position. ++*/ ++ ++ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); ++/* ++ Rewinds the given file. This function is supported only for reading. ++ ++ gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) ++*/ ++ ++ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); ++/* ++ Returns the starting position for the next gzread or gzwrite on the ++ given compressed file. This position represents a number of bytes in the ++ uncompressed data stream. ++ ++ gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) ++*/ ++ ++ZEXTERN int ZEXPORT gzeof OF((gzFile file)); ++/* ++ Returns 1 when EOF has previously been detected reading the given ++ input stream, otherwise zero. ++*/ ++ ++ZEXTERN int ZEXPORT gzclose OF((gzFile file)); ++/* ++ Flushes all pending output if necessary, closes the compressed file ++ and deallocates all the (de)compression state. The return value is the zlib ++ error number (see function gzerror below). ++*/ ++ ++ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); ++/* ++ Returns the error message for the last error which occurred on the ++ given compressed file. errnum is set to zlib error number. If an ++ error occurred in the file system and not in the compression library, ++ errnum is set to Z_ERRNO and the application may consult errno ++ to get the exact error code. ++*/ ++ ++ /* checksum functions */ ++ ++/* ++ These functions are not related to compression but are exported ++ anyway because they might be useful in applications using the ++ compression library. ++*/ ++ ++ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); ++ ++/* ++ Update a running Adler-32 checksum with the bytes buf[0..len-1] and ++ return the updated checksum. If buf is NULL, this function returns ++ the required initial value for the checksum. ++ An Adler-32 checksum is almost as reliable as a CRC32 but can be computed ++ much faster. Usage example: ++ ++ uLong adler = adler32(0L, Z_NULL, 0); ++ ++ while (read_buffer(buffer, length) != EOF) { ++ adler = adler32(adler, buffer, length); ++ } ++ if (adler != original_adler) error(); ++*/ ++ ++ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); ++/* ++ Update a running crc with the bytes buf[0..len-1] and return the updated ++ crc. If buf is NULL, this function returns the required initial value ++ for the crc. Pre- and post-conditioning (one's complement) is performed ++ within this function so it shouldn't be done by the application. ++ Usage example: ++ ++ uLong crc = crc32(0L, Z_NULL, 0); ++ ++ while (read_buffer(buffer, length) != EOF) { ++ crc = crc32(crc, buffer, length); ++ } ++ if (crc != original_crc) error(); ++*/ ++ ++ ++ /* various hacks, don't look :) */ ++ ++/* deflateInit and inflateInit are macros to allow checking the zlib version ++ * and the compiler's view of z_stream: ++ */ ++ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, ++ const char *version, int stream_size)); ++ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, ++ const char *version, int stream_size)); ++ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, ++ int windowBits, int memLevel, ++ int strategy, const char *version, ++ int stream_size)); ++ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, ++ const char *version, int stream_size)); ++#define deflateInit(strm, level) \ ++ deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) ++#define inflateInit(strm) \ ++ inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream)) ++#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ ++ deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ ++ (strategy), ZLIB_VERSION, sizeof(z_stream)) ++#define inflateInit2(strm, windowBits) \ ++ inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream)) ++ ++ ++#if !defined(_Z_UTIL_H) && !defined(NO_DUMMY_DECL) ++ struct internal_state {int dummy;}; /* hack for buggy compilers */ ++#endif ++ ++ZEXTERN const char * ZEXPORT zError OF((int err)); ++ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z)); ++ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* _ZLIB_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/include/zlib/zutil.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,225 @@ ++/* zutil.h -- internal interface and configuration of the compression library ++ * Copyright (C) 1995-2002 Jean-loup Gailly. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* WARNING: this file should *not* be used by applications. It is ++ part of the implementation of the compression library and is ++ subject to change. Applications should only use zlib.h. ++ */ ++ ++/* @(#) $Id: zutil.h,v 1.4 2002-04-24 07:36:48 mcr Exp $ */ ++ ++#ifndef _Z_UTIL_H ++#define _Z_UTIL_H ++ ++#include "zlib.h" ++ ++#include ++#define HAVE_MEMCPY ++ ++#if 0 // #ifdef STDC ++# include ++# include ++# include ++#endif ++#ifndef __KERNEL__ ++#ifdef NO_ERRNO_H ++ extern int errno; ++#else ++# include ++#endif ++#endif ++ ++#ifndef local ++# define local static ++#endif ++/* compile with -Dlocal if your debugger can't find static symbols */ ++ ++typedef unsigned char uch; ++typedef uch FAR uchf; ++typedef unsigned short ush; ++typedef ush FAR ushf; ++typedef unsigned long ulg; ++ ++extern const char *z_errmsg[10]; /* indexed by 2-zlib_error */ ++/* (size given to avoid silly warnings with Visual C++) */ ++ ++#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] ++ ++#define ERR_RETURN(strm,err) \ ++ return (strm->msg = ERR_MSG(err), (err)) ++/* To be used only when the state is known to be valid */ ++ ++ /* common constants */ ++ ++#ifndef DEF_WBITS ++# define DEF_WBITS MAX_WBITS ++#endif ++/* default windowBits for decompression. MAX_WBITS is for compression only */ ++ ++#if MAX_MEM_LEVEL >= 8 ++# define DEF_MEM_LEVEL 8 ++#else ++# define DEF_MEM_LEVEL MAX_MEM_LEVEL ++#endif ++/* default memLevel */ ++ ++#define STORED_BLOCK 0 ++#define STATIC_TREES 1 ++#define DYN_TREES 2 ++/* The three kinds of block type */ ++ ++#define MIN_MATCH 3 ++#define MAX_MATCH 258 ++/* The minimum and maximum match lengths */ ++ ++#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ ++ ++ /* target dependencies */ ++ ++#ifdef MSDOS ++# define OS_CODE 0x00 ++# if defined(__TURBOC__) || defined(__BORLANDC__) ++# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) ++ /* Allow compilation with ANSI keywords only enabled */ ++ void _Cdecl farfree( void *block ); ++ void *_Cdecl farmalloc( unsigned long nbytes ); ++# else ++# include ++# endif ++# else /* MSC or DJGPP */ ++# include ++# endif ++#endif ++ ++#ifdef OS2 ++# define OS_CODE 0x06 ++#endif ++ ++#ifdef WIN32 /* Window 95 & Windows NT */ ++# define OS_CODE 0x0b ++#endif ++ ++#if defined(VAXC) || defined(VMS) ++# define OS_CODE 0x02 ++# define F_OPEN(name, mode) \ ++ fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") ++#endif ++ ++#ifdef AMIGA ++# define OS_CODE 0x01 ++#endif ++ ++#if defined(ATARI) || defined(atarist) ++# define OS_CODE 0x05 ++#endif ++ ++#if defined(MACOS) || defined(TARGET_OS_MAC) ++# define OS_CODE 0x07 ++# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os ++# include /* for fdopen */ ++# else ++# ifndef fdopen ++# define fdopen(fd,mode) NULL /* No fdopen() */ ++# endif ++# endif ++#endif ++ ++#ifdef __50SERIES /* Prime/PRIMOS */ ++# define OS_CODE 0x0F ++#endif ++ ++#ifdef TOPS20 ++# define OS_CODE 0x0a ++#endif ++ ++#if defined(_BEOS_) || defined(RISCOS) ++# define fdopen(fd,mode) NULL /* No fdopen() */ ++#endif ++ ++#if (defined(_MSC_VER) && (_MSC_VER > 600)) ++# define fdopen(fd,type) _fdopen(fd,type) ++#endif ++ ++ ++ /* Common defaults */ ++ ++#ifndef OS_CODE ++# define OS_CODE 0x03 /* assume Unix */ ++#endif ++ ++#ifndef F_OPEN ++# define F_OPEN(name, mode) fopen((name), (mode)) ++#endif ++ ++ /* functions */ ++ ++#ifdef HAVE_STRERROR ++ extern char *strerror OF((int)); ++# define zstrerror(errnum) strerror(errnum) ++#else ++# define zstrerror(errnum) "" ++#endif ++ ++#if defined(pyr) ++# define NO_MEMCPY ++#endif ++#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) ++ /* Use our own functions for small and medium model with MSC <= 5.0. ++ * You may have to use the same strategy for Borland C (untested). ++ * The __SC__ check is for Symantec. ++ */ ++# define NO_MEMCPY ++#endif ++#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) ++# define HAVE_MEMCPY ++#endif ++#ifdef HAVE_MEMCPY ++# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ ++# define zmemcpy _fmemcpy ++# define zmemcmp _fmemcmp ++# define zmemzero(dest, len) _fmemset(dest, 0, len) ++# else ++# define zmemcpy memcpy ++# define zmemcmp memcmp ++# define zmemzero(dest, len) memset(dest, 0, len) ++# endif ++#else ++ extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); ++ extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); ++ extern void zmemzero OF((Bytef* dest, uInt len)); ++#endif ++ ++/* Diagnostic functions */ ++#ifdef DEBUG ++# include ++ extern int z_verbose; ++ extern void z_error OF((char *m)); ++# define Assert(cond,msg) {if(!(cond)) z_error(msg);} ++# define Trace(x) {if (z_verbose>=0) fprintf x ;} ++# define Tracev(x) {if (z_verbose>0) fprintf x ;} ++# define Tracevv(x) {if (z_verbose>1) fprintf x ;} ++# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} ++# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} ++#else ++# define Assert(cond,msg) ++# define Trace(x) ++# define Tracev(x) ++# define Tracevv(x) ++# define Tracec(c,x) ++# define Tracecv(c,x) ++#endif ++ ++ ++typedef uLong (ZEXPORT *check_func) OF((uLong check, const Bytef *buf, ++ uInt len)); ++voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); ++void zcfree OF((voidpf opaque, voidpf ptr)); ++ ++#define ZALLOC(strm, items, size) \ ++ (*((strm)->zalloc))((strm)->opaque, (items), (size)) ++#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) ++#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} ++ ++#endif /* _Z_UTIL_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/lib/libfreeswan/Makefile.objs Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,21 @@ ++obj-y += satot.o ++obj-y += addrtot.o ++obj-y += ultot.o ++obj-y += addrtypeof.o ++obj-y += anyaddr.o ++obj-y += initaddr.o ++obj-y += ultoa.o ++obj-y += addrtoa.o ++obj-y += subnettoa.o ++obj-y += subnetof.o ++obj-y += goodmask.o ++obj-y += datatot.o ++obj-y += rangetoa.o ++obj-y += prng.o ++obj-y += pfkey_v2_parse.o ++obj-y += pfkey_v2_build.o ++obj-y += pfkey_v2_debug.o ++obj-y += pfkey_v2_ext_bits.o ++ ++#version.c: ${LIBFREESWANDIR}/version.in.c ${OPENSWANSRCDIR}/Makefile.ver ++# sed '/"/s/xxx/$(IPSECVERSION)/' ${LIBFREESWANDIR}/version.in.c >$@ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/lib/zlib/Makefile Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,118 @@ ++# (kernel) Makefile for IPCOMP zlib deflate code ++# Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++# Copyright (C) 2000 Svenning Soerensen ++# ++# This program is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by the ++# Free Software Foundation; either version 2 of the License, or (at your ++# option) any later version. See . ++# ++# This program is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# for more details. ++# ++# RCSID $Id: Makefile,v 1.9 2002-04-24 07:55:32 mcr Exp $ ++# ++ ++ ++ ++include ../Makefile.inc ++ ++ ++ ++ifndef TOPDIR ++TOPDIR := /usr/src/linux ++endif ++ ++ ++L_TARGET := zlib.a ++ ++obj-y := ++ ++include Makefile.objs ++ ++EXTRA_CFLAGS += $(KLIPSCOMPILE) ++ ++EXTRA_CFLAGS += -Wall ++#EXTRA_CFLAGS += -Wconversion ++#EXTRA_CFLAGS += -Wmissing-prototypes ++EXTRA_CFLAGS += -Wpointer-arith ++#EXTRA_CFLAGS += -Wcast-qual ++#EXTRA_CFLAGS += -Wmissing-declarations ++EXTRA_CFLAGS += -Wstrict-prototypes ++#EXTRA_CFLAGS += -pedantic ++#EXTRA_CFLAGS += -W ++#EXTRA_CFLAGS += -Wwrite-strings ++EXTRA_CFLAGS += -Wbad-function-cast ++EXTRA_CFLAGS += -DIPCOMP_PREFIX ++ ++.S.o: ++ $(CC) -D__ASSEMBLY__ -DNO_UNDERLINE -traditional -c $< -o $*.o ++ ++asm-obj-$(CONFIG_M586) += match586.o ++asm-obj-$(CONFIG_M586TSC) += match586.o ++asm-obj-$(CONFIG_M586MMX) += match586.o ++asm-obj-$(CONFIG_M686) += match686.o ++asm-obj-$(CONFIG_MPENTIUMIII) += match686.o ++asm-obj-$(CONFIG_MPENTIUM4) += match686.o ++asm-obj-$(CONFIG_MK6) += match586.o ++asm-obj-$(CONFIG_MK7) += match686.o ++asm-obj-$(CONFIG_MCRUSOE) += match586.o ++asm-obj-$(CONFIG_MWINCHIPC6) += match586.o ++asm-obj-$(CONFIG_MWINCHIP2) += match686.o ++asm-obj-$(CONFIG_MWINCHIP3D) += match686.o ++ ++obj-y += $(asm-obj-y) ++ifneq ($(strip $(asm-obj-y)),) ++ EXTRA_CFLAGS += -DASMV ++endif ++ ++active-objs := $(sort $(obj-y) $(obj-m)) ++L_OBJS := $(obj-y) ++M_OBJS := $(obj-m) ++MIX_OBJS := $(filter $(export-objs), $(active-objs)) ++ ++include $(TOPDIR)/Rules.make ++ ++$(obj-y) : $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/autoconf.h ++ ++ ++clean: ++ -rm -f *.o *.a ++ ++checkprograms: ++programs: $(L_TARGET) ++ ++# ++# $Log: Makefile,v $ ++# Revision 1.9 2002-04-24 07:55:32 mcr ++# #include patches and Makefiles for post-reorg compilation. ++# ++# Revision 1.8 2002/04/24 07:36:44 mcr ++# Moved from ./zlib/Makefile,v ++# ++# Revision 1.7 2002/03/27 23:34:35 mcr ++# added programs: target ++# ++# Revision 1.6 2001/12/05 20:19:08 henry ++# use new compile-control variable ++# ++# Revision 1.5 2001/11/27 16:38:08 mcr ++# added new "checkprograms" target to deal with programs that ++# are required for "make check", but that may not be ready to ++# build for every user due to external dependancies. ++# ++# Revision 1.4 2001/10/24 14:46:24 henry ++# Makefile.inc ++# ++# Revision 1.3 2001/04/21 23:05:24 rgb ++# Update asm directives for 2.4 style makefiles. ++# ++# Revision 1.2 2001/01/29 22:22:00 rgb ++# Convert to 2.4 new style with back compat. ++# ++# Revision 1.1.1.1 2000/09/29 18:51:33 rgb ++# zlib_beginnings ++# ++# +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/lib/zlib/Makefile.objs Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,27 @@ ++obj-$(CONFIG_IPSEC_IPCOMP) += adler32.o ++obj-$(CONFIG_IPSEC_IPCOMP) += deflate.o ++obj-$(CONFIG_IPSEC_IPCOMP) += infblock.o ++obj-$(CONFIG_IPSEC_IPCOMP) += infcodes.o ++obj-$(CONFIG_IPSEC_IPCOMP) += inffast.o ++obj-$(CONFIG_IPSEC_IPCOMP) += inflate.o ++obj-$(CONFIG_IPSEC_IPCOMP) += inftrees.o ++obj-$(CONFIG_IPSEC_IPCOMP) += infutil.o ++obj-$(CONFIG_IPSEC_IPCOMP) += trees.o ++obj-$(CONFIG_IPSEC_IPCOMP) += zutil.o ++ ++asm-obj-$(CONFIG_M586) += ${LIBZLIBSRCDIR}/match586.o ++asm-obj-$(CONFIG_M586TSC) += ${LIBZLIBSRCDIR}/match586.o ++asm-obj-$(CONFIG_M586MMX) += ${LIBZLIBSRCDIR}/match586.o ++asm-obj-$(CONFIG_M686) += ${LIBZLIBSRCDIR}/match686.o ++asm-obj-$(CONFIG_MPENTIUMIII) += ${LIBZLIBSRCDIR}/match686.o ++asm-obj-$(CONFIG_MPENTIUM4) += ${LIBZLIBSRCDIR}/match686.o ++asm-obj-$(CONFIG_MK6) += ${LIBZLIBSRCDIR}/match586.o ++asm-obj-$(CONFIG_MK7) += ${LIBZLIBSRCDIR}/match686.o ++asm-obj-$(CONFIG_MCRUSOE) += ${LIBZLIBSRCDIR}/match586.o ++asm-obj-$(CONFIG_MWINCHIPC6) += ${LIBZLIBSRCDIR}/match586.o ++asm-obj-$(CONFIG_MWINCHIP2) += ${LIBZLIBSRCDIR}/match686.o ++asm-obj-$(CONFIG_MWINCHIP3D) += ${LIBZLIBSRCDIR}/match686.o ++ ++EXTRA_CFLAGS += -DIPCOMP_PREFIX ++ ++ +--- swan26/net/Kconfig.preipsec 2005-09-01 18:15:19.000000000 -0400 ++++ swan26/net/Kconfig 2005-09-03 16:51:17.000000000 -0400 +@@ -215,2 +215,6 @@ + ++if INET ++source "net/ipsec/Kconfig" ++endif # if INET ++ + endif # if NET +--- /distros/kernel/linux-2.6.3-rc4/net/Makefile Mon Feb 16 21:22:12 2004 ++++ ref26/net/Makefile Thu Feb 19 21:02:25 2004 +@@ -42,3 +42,6 @@ + ifeq ($(CONFIG_NET),y) + obj-$(CONFIG_SYSCTL) += sysctl_net.o + endif ++ ++obj-$(CONFIG_KLIPS) += ipsec/ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/Kconfig Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,172 @@ ++# ++# IPSEC configuration ++# Copyright (C) 2004 Michael Richardson ++# ++# This program is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by the ++# Free Software Foundation; either version 2 of the License, or (at your ++# option) any later version. See . ++# ++# This program is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# for more details. ++# ++# RCSID $Id: Kconfig,v 1.6.2.3 2007-12-06 15:51:01 paul Exp $ ++ ++config KLIPS ++ tristate "Openswan IPsec (KLIPS26)" ++ default n ++ help ++ KLIPS is the Openswan (www.openswan.org) Kernel Level IP Security ++ system. It is extensively tested, and has interoperated with ++ many other systems. ++ It provides "ipsecX" devices on which one can do firewalling. ++ The userland, is compatible with both KLIPS and 26sec. ++ ++menu "KLIPS options" ++ depends on KLIPS ++ ++config KLIPS_ESP ++ bool 'Encapsulating Security Payload - ESP ("VPN")' ++ default y ++ help ++ This option provides support for the IPSEC Encapsulation Security ++ Payload (IP protocol 50) which provides packet layer content ++ hiding, and content authentication. ++ It is recommended to enable this. RFC2406 ++ ++config KLIPS_AH ++ bool 'Authentication Header - AH' ++ default n ++ help ++ This option provides support for the IPSEC Authentication Header ++ (IP protocol 51) which provides packet layer sender and content ++ authentication. It does not provide for confidentiality. ++ It is not recommended to enable this. RFC2402 ++ ++config KLIPS_AUTH_HMAC_MD5 ++ bool 'HMAC-MD5 authentication algorithm' ++ default y ++ help ++ The HMAC-MD5 algorithm is used by ESP (and AH) to guarantee packet ++ integrity. There is little reason not to include it. ++ ++config KLIPS_AUTH_HMAC_SHA1 ++ bool 'HMAC-SHA1 authentication algorithm' ++ default y ++ help ++ The HMAC-SHA1 algorithm is used by ESP (and AH) to guarantee packet ++ integrity. SHA1 is a little slower than MD5, but is said to be ++ a bit more secure. There is little reason not to include it. ++ ++config KLIPS_ALG ++ bool 'KLIPS_ALG software encryption' ++ default y ++ help ++ You should only disabled this if using the external OCF patch ++ for hardware offload. ++ ++config KLIPS_ENC_CRYPTOAPI ++ bool 'CryptoAPI algorithm interface' ++ default n ++ help ++ Enable the algorithm interface to make all CryptoAPI 1.0 algorithms ++ available to KLIPS. ++ ++config KLIPS_ENC_1DES ++ bool 'Include 1DES with CryptoAPI' ++ default n ++ depends on KLIPS_ENC_CRYPTOAPI ++ help ++ The CryptoAPI interface does not include support for every algorithm ++ yet, and one that it doesn't support by default is the VERY WEAK ++ 1DES. Select this if you are terminally stupid. ++ ++config KLIPS_ENC_3DES ++ bool '3DES encryption algorithm' ++ default y ++ help ++ The 3DES algorithm is used by ESP to provide for packet privacy. ++ 3DES is 3-repeats of the DES algorithm. 3DES is widely supported, ++ and analyzed and is considered very secure. 1DES is not supported. ++ ++config KLIPS_ENC_AES ++ bool 'AES encryption algorithm' ++ default y ++ help ++ The AES algorithm is used by ESP to provide for packet privacy. ++ AES the NIST replacement for DES. AES is being widely analyzed, ++ and is very fast. ++ ++config KLIPS_ENC_NULL ++ bool 'NULL NON-encryption algorithm' ++ default n ++ help ++ NON encryption algo , maybe useful for ESP auth only scenarios ++ (eg: with NAT-T), see RFC 2410. ++ ++config KLIPS_IPCOMP ++ bool 'IP compression' ++ default y ++ help ++ The IPcomp protocol is used prior to ESP to make the packet ++ smaller. Once encrypted, compression will fail, so any link ++ layer efforts (e.g. PPP) will not work. ++ ++config KLIPS_DEBUG ++ bool 'IPsec debugging' ++ default y ++ help ++ KLIPS includes a lot of debugging code. Unless there is a real ++ tangible benefit to removing this code, it should be left in place. ++ Debugging connections without access to kernel level debugging is ++ essentially impossible. Leave this on. ++ ++endmenu ++ ++# ++# ++# $Log: Kconfig,v $ ++# Revision 1.6.2.3 2007-12-06 15:51:01 paul ++# Enable KLIPS_ALG in default build, when not using Makefile.inc. ++# Patch by Laszlo Attila Toth ++# ++# Revision 1.6.2.2 2006/10/11 18:14:33 paul ++# Add JuanJo Ciarlante's ESP_NULL patches for KLIPS, but leave it disabled ++# per default. ++# ++# Revision 1.6.2.1 2006/04/20 16:33:06 mcr ++# remove all of CONFIG_KLIPS_ALG --- one can no longer build without it. ++# Fix in-kernel module compilation. Sub-makefiles do not work. ++# ++# Revision 1.6 2005/05/18 20:55:27 mcr ++# default cryptoapi to n. ++# ++# Revision 1.5 2005/05/11 01:23:25 mcr ++# added 1DES option to cryptoapi. ++# ++# Revision 1.4 2005/04/29 05:29:54 mcr ++# add option to include cryptoapi algorithms. ++# ++# Revision 1.3 2004/08/17 03:27:23 mcr ++# klips 2.6 edits. ++# ++# Revision 1.2 2004/08/14 03:27:39 mcr ++# 2.6 kernel build/configuration files. ++# ++# Revision 1.1 2004/08/14 02:47:55 mcr ++# kernel build/config patches ++# ++# Revision 1.3 2004/02/24 17:17:04 mcr ++# s/CONFIG_IPSEC/CONFIG_KLIPS/ as 26sec uses "CONFIG_IPSEC" to ++# turn it on/off as well. ++# ++# Revision 1.2 2004/02/22 06:50:42 mcr ++# kernel 2.6 port - merged with 2.4 code. ++# ++# Revision 1.1.2.1 2004/02/20 02:07:53 mcr ++# module configuration for KLIPS 2.6 ++# ++# ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/Makefile Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,195 @@ ++# Makefile for KLIPS kernel code as a module for 2.6 kernels ++# ++# Makefile for KLIPS kernel code as a module ++# Copyright (C) 1998, 1999, 2000,2001 Richard Guy Briggs. ++# Copyright (C) 2002-2004 Michael Richardson ++# ++# This program is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by the ++# Free Software Foundation; either version 2 of the License, or (at your ++# option) any later version. See . ++# ++# This program is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# for more details. ++# ++# RCSID $Id: Makefile.fs2_6,v 1.8.2.2 2006-10-11 18:14:33 paul Exp $ ++# ++# Note! Dependencies are done automagically by 'make dep', which also ++# removes any old dependencies. DON'T put your own dependencies here ++# unless it's something special (ie not a .c file). ++# ++ ++OPENSWANSRCDIR?=. ++KLIPS_TOP?=. ++ ++-include ${OPENSWANSRCDIR}/Makefile.ver ++ ++base-klips-objs := ++ ++base-klips-objs+= ipsec_init.o ipsec_sa.o ipsec_radij.o radij.o ++base-klips-objs+= ipsec_life.o ipsec_proc.o ++base-klips-objs+= ipsec_tunnel.o ipsec_xmit.o ipsec_rcv.o ipsec_ipip.o ++base-klips-objs+= ipsec_snprintf.o ++base-klips-objs+= sysctl_net_ipsec.o ++base-klips-objs+= pfkey_v2.o pfkey_v2_parser.o pfkey_v2_ext_process.o ++base-klips-objs+= version.o ++ ++base-klips-objs+= satot.o ++base-klips-objs+= addrtot.o ++base-klips-objs+= ultot.o ++base-klips-objs+= addrtypeof.o ++base-klips-objs+= anyaddr.o ++base-klips-objs+= initaddr.o ++base-klips-objs+= ultoa.o ++base-klips-objs+= addrtoa.o ++base-klips-objs+= subnettoa.o ++base-klips-objs+= subnetof.o ++base-klips-objs+= goodmask.o ++base-klips-objs+= datatot.o ++base-klips-objs+= rangetoa.o ++base-klips-objs+= prng.o ++base-klips-objs+= pfkey_v2_parse.o ++base-klips-objs+= pfkey_v2_build.o ++base-klips-objs+= pfkey_v2_debug.o ++base-klips-objs+= pfkey_v2_ext_bits.o ++base-klips-objs+= version.o ++ ++obj-${CONFIG_KLIPS} += ipsec.o ++ ++ipsec-objs += ${base-klips-objs} ++ ++ipsec-$(CONFIG_KLIPS_ESP) += ipsec_esp.o ++ipsec-$(CONFIG_KLIPS_IPCOMP) += ipsec_ipcomp.o ++ipsec-$(CONFIG_KLIPS_AUTH_HMAC_MD5) += ipsec_md5c.o ++ipsec-$(CONFIG_KLIPS_AUTH_HMAC_SHA1) += ipsec_sha1.o ++ ++# AH, if you really think you need it. ++ipsec-$(CONFIG_KLIPS_AH) += ipsec_ah.o ++ ++ipsec-y += ipsec_alg.o ++ ++# include code from DES subdir ++crypto-$(CONFIG_KLIPS_ENC_3DES) += des/ipsec_alg_3des.o ++crypto-$(CONFIG_KLIPS_ENC_3DES) += des/cbc_enc.o ++crypto-$(CONFIG_KLIPS_ENC_3DES) += des/ecb_enc.o ++crypto-$(CONFIG_KLIPS_ENC_3DES) += des/set_key.o ++ ++ifeq ($(strip ${SUBARCH}),) ++SUBARCH:=${ARCH} ++endif ++ ++# the assembly version expects frame pointers, which are ++# optional in many kernel builds. If you want speed, you should ++# probably use cryptoapi code instead. ++USEASSEMBLY=${SUBARCH}${CONFIG_FRAME_POINTER} ++ifeq (${USEASSEMBLY},i386y) ++crypto-$(CONFIG_KLIPS_ENC_3DES) += des/dx86unix.o ++else ++crypto-$(CONFIG_KLIPS_ENC_3DES) += des/des_enc.o ++endif ++ ++# include code from AES subdir ++crypto-$(CONFIG_KLIPS_ENC_AES) += aes/ipsec_alg_aes.o ++crypto-$(CONFIG_KLIPS_ENC_AES) += aes/aes_xcbc_mac.o ++crypto-$(CONFIG_KLIPS_ENC_AES) += aes/aes_cbc.o ++ ++ifeq ($(strip ${SUBARCH}),) ++SUBARCH:=${ARCH} ++endif ++ ++USEASSEMBLY=${SUBARCH}${CONFIG_FRAME_POINTER} ++ifeq (${USEASSEMBLY},i386y) ++crypto-$(CONFIG_KLIPS_ENC_AES) += aes/aes-i586.o ++else ++crypto-$(CONFIG_KLIPS_ENC_AES) += aes/aes.o ++endif ++ ++crypto-$(CONFIG_KLIPS_ENC_NULL) += null/ipsec_alg_null.o ++ ++ipsec-y += ${crypto-y} ++ ++ipsec-$(CONFIG_KLIPS_ENC_CRYPTOAPI) += ipsec_alg_cryptoapi.o ++ ++# IPcomp stuff ++base-ipcomp-objs := ipcomp.o ++base-ipcomp-objs += adler32.o ++base-ipcomp-objs += deflate.o ++base-ipcomp-objs += infblock.o ++base-ipcomp-objs += infcodes.o ++base-ipcomp-objs += inffast.o ++base-ipcomp-objs += inflate.o ++base-ipcomp-objs += inftrees.o ++base-ipcomp-objs += infutil.o ++base-ipcomp-objs += trees.o ++base-ipcomp-objs += zutil.o ++asm-ipcomp-obj-$(CONFIG_M586) += match586.o ++asm-ipcomp-obj-$(CONFIG_M586TSC) += match586.o ++asm-ipcomp-obj-$(CONFIG_M586MMX) += match586.o ++asm-ipcomp-obj-$(CONFIG_M686) += match686.o ++asm-ipcomp-obj-$(CONFIG_MPENTIUMIII) += match686.o ++asm-ipcomp-obj-$(CONFIG_MPENTIUM4) += match686.o ++asm-ipcomp-obj-$(CONFIG_MK6) += match586.o ++asm-ipcomp-obj-$(CONFIG_MK7) += match686.o ++asm-ipcomp-obj-$(CONFIG_MCRUSOE) += match586.o ++asm-ipcomp-obj-$(CONFIG_MWINCHIPC6) += match586.o ++asm-ipcomp-obj-$(CONFIG_MWINCHIP2) += match686.o ++asm-ipcomp-obj-$(CONFIG_MWINCHIP3D) += match686.o ++base-ipcomp-objs += ${asm-ipcomp-obj-y} ++ ++ipsec-$(CONFIG_KLIPS_IPCOMP) += ${base-ipcomp-objs} ++ ++EXTRA_CFLAGS += -DIPCOMP_PREFIX ++ ++# ++# $Log: Makefile.fs2_6,v $ ++# Revision 1.8.2.2 2006-10-11 18:14:33 paul ++# Add JuanJo Ciarlante's ESP_NULL patches for KLIPS, but leave it disabled ++# per default. ++# ++# Revision 1.8.2.1 2006/04/20 16:33:06 mcr ++# remove all of CONFIG_KLIPS_ALG --- one can no longer build without it. ++# Fix in-kernel module compilation. Sub-makefiles do not work. ++# ++# Revision 1.8 2005/05/11 03:15:42 mcr ++# adjusted makefiles to sanely build modules properly. ++# ++# Revision 1.7 2005/04/13 22:52:12 mcr ++# moved KLIPS specific snprintf() wrapper to seperate file. ++# ++# Revision 1.6 2004/08/22 05:02:03 mcr ++# organized symbols such that it is easier to build modules. ++# ++# Revision 1.5 2004/08/18 01:43:56 mcr ++# adjusted makefile enumation so that it can be used by module ++# wrapper. ++# ++# Revision 1.4 2004/08/17 03:27:23 mcr ++# klips 2.6 edits. ++# ++# Revision 1.3 2004/08/04 16:50:13 mcr ++# removed duplicate definition of dx86unix.o ++# ++# Revision 1.2 2004/08/03 18:21:09 mcr ++# only set KLIPS_TOP and OPENSWANSRCDIR if not already set. ++# ++# Revision 1.1 2004/07/26 15:02:22 mcr ++# makefile for KLIPS module for 2.6. ++# ++# Revision 1.3 2004/02/24 17:17:04 mcr ++# s/CONFIG_IPSEC/CONFIG_KLIPS/ as 26sec uses "CONFIG_IPSEC" to ++# turn it on/off as well. ++# ++# Revision 1.2 2004/02/22 06:50:42 mcr ++# kernel 2.6 port - merged with 2.4 code. ++# ++# Revision 1.1.2.1 2004/02/20 02:07:53 mcr ++# module configuration for KLIPS 2.6 ++# ++# ++# Local Variables: ++# compile-command: "(cd ../../.. && source umlsetup.sh && make -C ${POOLSPACE} module/ipsec.o)" ++# End Variables: ++# ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/README-zlib Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,147 @@ ++zlib 1.1.4 is a general purpose data compression library. All the code ++is thread safe. The data format used by the zlib library ++is described by RFCs (Request for Comments) 1950 to 1952 in the files ++http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate ++format) and rfc1952.txt (gzip format). These documents are also available in ++other formats from ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html ++ ++All functions of the compression library are documented in the file zlib.h ++(volunteer to write man pages welcome, contact jloup@gzip.org). A usage ++example of the library is given in the file example.c which also tests that ++the library is working correctly. Another example is given in the file ++minigzip.c. The compression library itself is composed of all source files ++except example.c and minigzip.c. ++ ++To compile all files and run the test program, follow the instructions ++given at the top of Makefile. In short "make test; make install" ++should work for most machines. For Unix: "./configure; make test; make install" ++For MSDOS, use one of the special makefiles such as Makefile.msc. ++For VMS, use Make_vms.com or descrip.mms. ++ ++Questions about zlib should be sent to , or to ++Gilles Vollant for the Windows DLL version. ++The zlib home page is http://www.zlib.org or http://www.gzip.org/zlib/ ++Before reporting a problem, please check this site to verify that ++you have the latest version of zlib; otherwise get the latest version and ++check whether the problem still exists or not. ++ ++PLEASE read the zlib FAQ http://www.gzip.org/zlib/zlib_faq.html ++before asking for help. ++ ++Mark Nelson wrote an article about zlib for the Jan. 1997 ++issue of Dr. Dobb's Journal; a copy of the article is available in ++http://dogma.net/markn/articles/zlibtool/zlibtool.htm ++ ++The changes made in version 1.1.4 are documented in the file ChangeLog. ++The only changes made since 1.1.3 are bug corrections: ++ ++- ZFREE was repeated on same allocation on some error conditions. ++ This creates a security problem described in ++ http://www.zlib.org/advisory-2002-03-11.txt ++- Returned incorrect error (Z_MEM_ERROR) on some invalid data ++- Avoid accesses before window for invalid distances with inflate window ++ less than 32K. ++- force windowBits > 8 to avoid a bug in the encoder for a window size ++ of 256 bytes. (A complete fix will be available in 1.1.5). ++ ++The beta version 1.1.5beta includes many more changes. A new official ++version 1.1.5 will be released as soon as extensive testing has been ++completed on it. ++ ++ ++Unsupported third party contributions are provided in directory "contrib". ++ ++A Java implementation of zlib is available in the Java Development Kit ++http://www.javasoft.com/products/JDK/1.1/docs/api/Package-java.util.zip.html ++See the zlib home page http://www.zlib.org for details. ++ ++A Perl interface to zlib written by Paul Marquess ++is in the CPAN (Comprehensive Perl Archive Network) sites ++http://www.cpan.org/modules/by-module/Compress/ ++ ++A Python interface to zlib written by A.M. Kuchling ++is available in Python 1.5 and later versions, see ++http://www.python.org/doc/lib/module-zlib.html ++ ++A zlib binding for TCL written by Andreas Kupries ++is availlable at http://www.westend.com/~kupries/doc/trf/man/man.html ++ ++An experimental package to read and write files in .zip format, ++written on top of zlib by Gilles Vollant , is ++available at http://www.winimage.com/zLibDll/unzip.html ++and also in the contrib/minizip directory of zlib. ++ ++ ++Notes for some targets: ++ ++- To build a Windows DLL version, include in a DLL project zlib.def, zlib.rc ++ and all .c files except example.c and minigzip.c; compile with -DZLIB_DLL ++ The zlib DLL support was initially done by Alessandro Iacopetti and is ++ now maintained by Gilles Vollant . Check the zlib DLL ++ home page at http://www.winimage.com/zLibDll ++ ++ From Visual Basic, you can call the DLL functions which do not take ++ a structure as argument: compress, uncompress and all gz* functions. ++ See contrib/visual-basic.txt for more information, or get ++ http://www.tcfb.com/dowseware/cmp-z-it.zip ++ ++- For 64-bit Irix, deflate.c must be compiled without any optimization. ++ With -O, one libpng test fails. The test works in 32 bit mode (with ++ the -n32 compiler flag). The compiler bug has been reported to SGI. ++ ++- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 ++ it works when compiled with cc. ++ ++- on Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 ++ is necessary to get gzprintf working correctly. This is done by configure. ++ ++- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works ++ with other compilers. Use "make test" to check your compiler. ++ ++- gzdopen is not supported on RISCOS, BEOS and by some Mac compilers. ++ ++- For Turbo C the small model is supported only with reduced performance to ++ avoid any far allocation; it was tested with -DMAX_WBITS=11 -DMAX_MEM_LEVEL=3 ++ ++- For PalmOs, see http://www.cs.uit.no/~perm/PASTA/pilot/software.html ++ Per Harald Myrvang ++ ++ ++Acknowledgments: ++ ++ The deflate format used by zlib was defined by Phil Katz. The deflate ++ and zlib specifications were written by L. Peter Deutsch. Thanks to all the ++ people who reported problems and suggested various improvements in zlib; ++ they are too numerous to cite here. ++ ++Copyright notice: ++ ++ (C) 1995-2002 Jean-loup Gailly and Mark Adler ++ ++ This software is provided 'as-is', without any express or implied ++ warranty. In no event will the authors be held liable for any damages ++ arising from the use of this software. ++ ++ Permission is granted to anyone to use this software for any purpose, ++ including commercial applications, and to alter it and redistribute it ++ freely, subject to the following restrictions: ++ ++ 1. The origin of this software must not be misrepresented; you must not ++ claim that you wrote the original software. If you use this software ++ in a product, an acknowledgment in the product documentation would be ++ appreciated but is not required. ++ 2. Altered source versions must be plainly marked as such, and must not be ++ misrepresented as being the original software. ++ 3. This notice may not be removed or altered from any source distribution. ++ ++ Jean-loup Gailly Mark Adler ++ jloup@gzip.org madler@alumni.caltech.edu ++ ++If you use the zlib library in a product, we would appreciate *not* ++receiving lengthy legal documents to sign. The sources are provided ++for free but without warranty of any kind. The library has been ++entirely written by Jean-loup Gailly and Mark Adler; it does not ++include third-party code. ++ ++If you redistribute modified sources, we would appreciate that you include ++in the file ChangeLog history information documenting your changes. +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/README-zlib.freeswan Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,13 @@ ++The only changes made to these files for use in FreeS/WAN are: ++ ++ - In zconf.h, macros are defined to prefix global symbols with "ipcomp_" ++ (or "_ipcomp"), when compiled with -DIPCOMP_PREFIX. ++ - The copyright strings are defined local (static) ++ ++ The above changes are made to avoid name collisions with ppp_deflate ++ and ext2compr. ++ ++ - Files not needed for FreeS/WAN have been removed ++ ++ See the "README" file for information about where to obtain the complete ++ zlib package. +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/addrtoa.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,67 @@ ++/* ++ * addresses to ASCII ++ * Copyright (C) 1998, 1999 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: addrtoa.c,v 1.10 2004-07-10 07:43:47 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++#define NBYTES 4 /* bytes in an address */ ++#define PERBYTE 4 /* three digits plus a dot or NUL */ ++#define BUFLEN (NBYTES*PERBYTE) ++ ++#if BUFLEN != ADDRTOA_BUF ++#error "ADDRTOA_BUF in openswan.h inconsistent with addrtoa() code" ++#endif ++ ++/* ++ - addrtoa - convert binary address to ASCII dotted decimal ++ */ ++size_t /* space needed for full conversion */ ++addrtoa(addr, format, dst, dstlen) ++struct in_addr addr; ++int format; /* character */ ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ unsigned long a = ntohl(addr.s_addr); ++ int i; ++ size_t n; ++ unsigned long byte; ++ char buf[BUFLEN]; ++ char *p; ++ ++ switch (format) { ++ case 0: ++ break; ++ default: ++ return 0; ++ break; ++ } ++ ++ p = buf; ++ for (i = NBYTES-1; i >= 0; i--) { ++ byte = (a >> (i*8)) & 0xff; ++ p += ultoa(byte, 10, p, PERBYTE); ++ if (i != 0) ++ *(p-1) = '.'; ++ } ++ n = p - buf; ++ ++ if (dstlen > 0) { ++ if (n > dstlen) ++ buf[dstlen - 1] = '\0'; ++ strcpy(dst, buf); ++ } ++ return n; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/addrtot.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,431 @@ ++/* ++ * addresses to text ++ * Copyright (C) 2000 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: addrtot.c,v 1.22.2.2 2007-10-30 21:32:26 paul Exp $ ++ */ ++ ++#if defined(__KERNEL__) && defined(__HAVE_ARCH_STRSTR) ++#include ++#endif ++ ++#include "openswan.h" ++ ++#define IP4BYTES 4 /* bytes in an IPv4 address */ ++#define PERBYTE 4 /* three digits plus a dot or NUL */ ++#define IP6BYTES 16 /* bytes in an IPv6 address */ ++ ++/* forwards */ ++static size_t normal4(const unsigned char *s, size_t len, char *b, char **dp); ++static size_t normal6(const unsigned char *s, size_t len, char *b, char **dp, int squish); ++static size_t reverse4(const unsigned char *s, size_t len, char *b, char **dp); ++static size_t reverse6(const unsigned char *s, size_t len, char *b, char **dp); ++ ++#if defined(__KERNEL__) && !defined(__HAVE_ARCH_STRSTR) ++#define strstr ipsec_strstr ++/* ++ * Find the first occurrence of find in s. ++ * (from NetBSD 1.6's /src/lib/libc/string/strstr.c) ++ */ ++static char *strstr(const char *s, const char *find); ++ ++static char * ++strstr(s, find) ++ const char *s, *find; ++{ ++ char c, sc; ++ size_t len; ++ ++ if ((c = *find++) != 0) { ++ len = strlen(find); ++ do { ++ do { ++ if ((sc = *s++) == 0) ++ return (NULL); ++ } while (sc != c); ++ } while (strncmp(s, find, len) != 0); ++ s--; ++ } ++ /* LINTED interface specification */ ++ return ((char *)s); ++} ++#endif ++ ++/* ++ - addrtot - convert binary address to text (dotted decimal or IPv6 string) ++ */ ++size_t /* space needed for full conversion */ ++addrtot(src, format, dst, dstlen) ++const ip_address *src; ++int format; /* character */ ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ const unsigned char *b; ++ size_t n; ++ char buf[1+ADDRTOT_BUF+1]; /* :address: */ ++ char *p; ++ int t = addrtypeof(src); ++# define TF(t, f) (((t)<<8) | (f)) ++ ++ n = addrbytesptr(src, &b); ++ if (n == 0) { ++ bad: ++ dst[0]='\0'; ++ strncat(dst, "", dstlen); ++ return sizeof(""); ++ } ++ ++ switch (TF(t, format)) { ++ case TF(AF_INET, 0): ++ n = normal4(b, n, buf, &p); ++ break; ++ case TF(AF_INET6, 0): ++ n = normal6(b, n, buf, &p, 1); ++ break; ++ case TF(AF_INET, 'Q'): ++ n = normal4(b, n, buf, &p); ++ break; ++ case TF(AF_INET6, 'Q'): ++ n = normal6(b, n, buf, &p, 0); ++ break; ++ case TF(AF_INET, 'r'): ++ n = reverse4(b, n, buf, &p); ++ break; ++ case TF(AF_INET6, 'r'): ++ n = reverse6(b, n, buf, &p); ++ break; ++ default: /* including (AF_INET, 'R') */ ++ goto bad; ++ break; ++ } ++ ++ if (dstlen > 0) { ++ if (dstlen < n) ++ p[dstlen - 1] = '\0'; ++ strcpy(dst, p); ++ } ++ return n; ++} ++ ++/* ++ - normal4 - normal IPv4 address-text conversion ++ */ ++static size_t /* size of text, including NUL */ ++normal4(srcp, srclen, buf, dstp) ++const unsigned char *srcp; ++size_t srclen; ++char *buf; /* guaranteed large enough */ ++char **dstp; /* where to put result pointer */ ++{ ++ int i; ++ char *p; ++ ++ if (srclen != IP4BYTES) /* "can't happen" */ ++ return 0; ++ p = buf; ++ for (i = 0; i < IP4BYTES; i++) { ++ p += ultot(srcp[i], 10, p, PERBYTE); ++ if (i != IP4BYTES - 1) ++ *(p-1) = '.'; /* overwrites the NUL */ ++ } ++ *dstp = buf; ++ return p - buf; ++} ++ ++/* ++ - normal6 - normal IPv6 address-text conversion ++ */ ++static size_t /* size of text, including NUL */ ++normal6(srcp, srclen, buf, dstp, squish) ++const unsigned char *srcp; ++size_t srclen; ++char *buf; /* guaranteed large enough, plus 2 */ ++char **dstp; /* where to put result pointer */ ++int squish; /* whether to squish out 0:0 */ ++{ ++ int i; ++ unsigned long piece; ++ char *p; ++ char *q; ++ ++ if (srclen != IP6BYTES) /* "can't happen" */ ++ return 0; ++ p = buf; ++ *p++ = ':'; ++ for (i = 0; i < IP6BYTES/2; i++) { ++ piece = (srcp[2*i] << 8) + srcp[2*i + 1]; ++ p += ultot(piece, 16, p, 5); /* 5 = abcd + NUL */ ++ *(p-1) = ':'; /* overwrites the NUL */ ++ } ++ *p = '\0'; ++ q = strstr(buf, ":0:0:"); ++ if (squish && q != NULL) { /* zero squishing is possible */ ++ p = q + 1; ++ while (*p == '0' && *(p+1) == ':') ++ p += 2; ++ q++; ++ *q++ = ':'; /* overwrite first 0 */ ++ while (*p != '\0') ++ *q++ = *p++; ++ *q = '\0'; ++ if (!(*(q-1) == ':' && *(q-2) == ':')) ++ *--q = '\0'; /* strip final : unless :: */ ++ p = buf; ++ if (!(*p == ':' && *(p+1) == ':')) ++ p++; /* skip initial : unless :: */ ++ } else { ++ q = p; ++ *--q = '\0'; /* strip final : */ ++ p = buf + 1; /* skip initial : */ ++ } ++ *dstp = p; ++ return q - p + 1; ++} ++ ++/* ++ - reverse4 - IPv4 reverse-lookup conversion ++ */ ++static size_t /* size of text, including NUL */ ++reverse4(srcp, srclen, buf, dstp) ++const unsigned char *srcp; ++size_t srclen; ++char *buf; /* guaranteed large enough */ ++char **dstp; /* where to put result pointer */ ++{ ++ int i; ++ char *p; ++ ++ if (srclen != IP4BYTES) /* "can't happen" */ ++ return 0; ++ p = buf; ++ for (i = IP4BYTES-1; i >= 0; i--) { ++ p += ultot(srcp[i], 10, p, PERBYTE); ++ *(p-1) = '.'; /* overwrites the NUL */ ++ } ++ strcpy(p, "IN-ADDR.ARPA."); ++ *dstp = buf; ++ return strlen(buf) + 1; ++} ++ ++/* ++ - reverse6 - IPv6 reverse-lookup conversion (RFC 1886) ++ * A trifle inefficient, really shouldn't use ultot... ++ */ ++static size_t /* size of text, including NUL */ ++reverse6(srcp, srclen, buf, dstp) ++const unsigned char *srcp; ++size_t srclen; ++char *buf; /* guaranteed large enough */ ++char **dstp; /* where to put result pointer */ ++{ ++ int i; ++ unsigned long piece; ++ char *p; ++ ++ if (srclen != IP6BYTES) /* "can't happen" */ ++ return 0; ++ p = buf; ++ for (i = IP6BYTES-1; i >= 0; i--) { ++ piece = srcp[i]; ++ p += ultot(piece&0xf, 16, p, 2); ++ *(p-1) = '.'; ++ p += ultot(piece>>4, 16, p, 2); ++ *(p-1) = '.'; ++ } ++ strcpy(p, "IP6.ARPA."); ++ *dstp = buf; ++ return strlen(buf) + 1; ++} ++ ++/* ++ - reverse6 - modern IPv6 reverse-lookup conversion (RFC 2874) ++ * this version removed as it was obsoleted in the end. ++ */ ++ ++#ifdef ADDRTOT_MAIN ++ ++#include ++#include ++#include ++#include ++ ++void regress(void); ++ ++int ++main(int argc, char *argv[]) ++{ ++ if (argc < 2) { ++ fprintf(stderr, "Usage: %s {addr|net/mask|begin...end|-r}\n", ++ argv[0]); ++ exit(2); ++ } ++ ++ if (strcmp(argv[1], "-r") == 0) { ++ regress(); ++ fprintf(stderr, "regress() returned?!?\n"); ++ exit(1); ++ } ++ exit(0); ++} ++ ++struct rtab { ++ char *input; ++ char format; ++ char *output; /* NULL means error expected */ ++} rtab[] = { ++ {"1.2.3.0", 0, "1.2.3.0"}, ++ {"1:2::3:4", 0, "1:2::3:4"}, ++ {"1:2::3:4", 'Q', "1:2:0:0:0:0:3:4"}, ++ {"1:2:0:0:3:4:0:0", 0, "1:2::3:4:0:0"}, ++ {"1.2.3.4", 'r' , "4.3.2.1.IN-ADDR.ARPA."}, ++ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f */ ++ {"1:2::3:4", 'r', "4.0.0.0.3.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.2.0.0.0.1.0.0.0.IP6.ARPA."}, ++ {NULL, 0, NULL} ++}; ++ ++void ++regress() ++{ ++ struct rtab *r; ++ int status = 0; ++ ip_address a; ++ char in[100]; ++ char buf[100]; ++ const char *oops; ++ size_t n; ++ ++ for (r = rtab; r->input != NULL; r++) { ++ strcpy(in, r->input); ++ ++ /* convert it *to* internal format */ ++ oops = ttoaddr(in, strlen(in), 0, &a); ++ ++ /* now convert it back */ ++ ++ n = addrtot(&a, r->format, buf, sizeof(buf)); ++ ++ if (n == 0 && r->output == NULL) ++ {} /* okay, error expected */ ++ ++ else if (n == 0) { ++ printf("`%s' atoasr failed\n", r->input); ++ status = 1; ++ ++ } else if (r->output == NULL) { ++ printf("`%s' atoasr succeeded unexpectedly '%c'\n", ++ r->input, r->format); ++ status = 1; ++ } else { ++ if (strcasecmp(r->output, buf) != 0) { ++ printf("`%s' '%c' gave `%s', expected `%s'\n", ++ r->input, r->format, buf, r->output); ++ status = 1; ++ } ++ } ++ } ++ exit(status); ++} ++ ++#endif /* ADDRTOT_MAIN */ ++ ++/* ++ * $Log: addrtot.c,v $ ++ * Revision 1.22.2.2 2007-10-30 21:32:26 paul ++ * Added strstr prototype [dhr] ++ * ++ * Revision 1.22.2.1 2005/11/17 22:30:49 paul ++ * pull up strstr fix from head. ++ * ++ * Revision 1.22 2005/05/20 16:47:40 mcr ++ * make strstr static if we need it. ++ * ++ * Revision 1.21 2005/03/21 00:35:12 mcr ++ * test for strstr properly ++ * ++ * Revision 1.20 2004/11/09 22:52:20 mcr ++ * until we figure out which kernels have strsep and which ++ * do not (UML does not under certain circumstances), then ++ * let's just provide our own. ++ * ++ * Revision 1.19 2004/10/08 16:30:33 mcr ++ * pull-up of initial crypto-offload work. ++ * ++ * Revision 1.18 2004/09/18 19:33:08 mcr ++ * use an appropriate kernel happy ifdef for strstr. ++ * ++ * Revision 1.17 2004/09/15 21:49:02 mcr ++ * use local copy of strstr() if this is going in the kernel. ++ * Not clear why this worked before, or why this shows up ++ * for modules only. ++ * ++ * Revision 1.16 2004/07/10 07:43:47 mcr ++ * Moved from linux/lib/libfreeswan/addrtot.c,v ++ * ++ * Revision 1.15 2004/04/11 17:39:25 mcr ++ * removed internal.h requirements. ++ * ++ * Revision 1.14 2004/03/08 01:59:08 ken ++ * freeswan.h -> openswan.h ++ * ++ * Revision 1.13 2004/01/05 23:21:05 mcr ++ * if the address type is invalid, then return length of ++ * string! ++ * ++ * Revision 1.12 2003/12/30 06:42:48 mcr ++ * added $Log: addrtot.c,v $ ++ * added Revision 1.22.2.2 2007-10-30 21:32:26 paul ++ * added Added strstr prototype [dhr] ++ * added ++ * added Revision 1.22.2.1 2005/11/17 22:30:49 paul ++ * added pull up strstr fix from head. ++ * added ++ * added Revision 1.22 2005/05/20 16:47:40 mcr ++ * added make strstr static if we need it. ++ * added ++ * added Revision 1.21 2005/03/21 00:35:12 mcr ++ * added test for strstr properly ++ * added ++ * added Revision 1.20 2004/11/09 22:52:20 mcr ++ * added until we figure out which kernels have strsep and which ++ * added do not (UML does not under certain circumstances), then ++ * added let's just provide our own. ++ * added ++ * added Revision 1.19 2004/10/08 16:30:33 mcr ++ * added pull-up of initial crypto-offload work. ++ * added ++ * added Revision 1.18 2004/09/18 19:33:08 mcr ++ * added use an appropriate kernel happy ifdef for strstr. ++ * added ++ * added Revision 1.17 2004/09/15 21:49:02 mcr ++ * added use local copy of strstr() if this is going in the kernel. ++ * added Not clear why this worked before, or why this shows up ++ * added for modules only. ++ * added ++ * added Revision 1.16 2004/07/10 07:43:47 mcr ++ * added Moved from linux/lib/libfreeswan/addrtot.c,v ++ * added ++ * added Revision 1.15 2004/04/11 17:39:25 mcr ++ * added removed internal.h requirements. ++ * added ++ * added Revision 1.14 2004/03/08 01:59:08 ken ++ * added freeswan.h -> openswan.h ++ * added ++ * added Revision 1.13 2004/01/05 23:21:05 mcr ++ * added if the address type is invalid, then return length of ++ * added string! ++ * added ++ * ++ * ++ */ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/addrtypeof.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,93 @@ ++/* ++ * extract parts of an ip_address ++ * Copyright (C) 2000 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: addrtypeof.c,v 1.10 2004-07-10 07:43:47 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++/* ++ - addrtypeof - get the type of an ip_address ++ */ ++int ++addrtypeof(src) ++const ip_address *src; ++{ ++ return src->u.v4.sin_family; ++} ++ ++/* ++ - addrbytesptr - get pointer to the address bytes of an ip_address ++ */ ++size_t /* 0 for error */ ++addrbytesptr(src, dstp) ++const ip_address *src; ++const unsigned char **dstp; /* NULL means just a size query */ ++{ ++ const unsigned char *p; ++ size_t n; ++ ++ switch (src->u.v4.sin_family) { ++ case AF_INET: ++ p = (const unsigned char *)&src->u.v4.sin_addr.s_addr; ++ n = 4; ++ break; ++ case AF_INET6: ++ p = (const unsigned char *)&src->u.v6.sin6_addr; ++ n = 16; ++ break; ++ default: ++ return 0; ++ break; ++ } ++ ++ if (dstp != NULL) ++ *dstp = p; ++ return n; ++} ++ ++/* ++ - addrlenof - get length of the address bytes of an ip_address ++ */ ++size_t /* 0 for error */ ++addrlenof(src) ++const ip_address *src; ++{ ++ return addrbytesptr(src, NULL); ++} ++ ++/* ++ - addrbytesof - get the address bytes of an ip_address ++ */ ++size_t /* 0 for error */ ++addrbytesof(src, dst, dstlen) ++const ip_address *src; ++unsigned char *dst; ++size_t dstlen; ++{ ++ const unsigned char *p; ++ size_t n; ++ size_t ncopy; ++ ++ n = addrbytesptr(src, &p); ++ if (n == 0) ++ return 0; ++ ++ if (dstlen > 0) { ++ ncopy = n; ++ if (ncopy > dstlen) ++ ncopy = dstlen; ++ memcpy(dst, p, ncopy); ++ } ++ return n; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/adler32.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,49 @@ ++/* adler32.c -- compute the Adler-32 checksum of a data stream ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* @(#) $Id: adler32.c,v 1.6 2004-07-10 19:11:18 mcr Exp $ */ ++ ++#include ++#include ++ ++#define BASE 65521L /* largest prime smaller than 65536 */ ++#define NMAX 5552 ++/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ ++ ++#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} ++#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); ++#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); ++#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); ++#define DO16(buf) DO8(buf,0); DO8(buf,8); ++ ++/* ========================================================================= */ ++uLong ZEXPORT adler32(adler, buf, len) ++ uLong adler; ++ const Bytef *buf; ++ uInt len; ++{ ++ unsigned long s1 = adler & 0xffff; ++ unsigned long s2 = (adler >> 16) & 0xffff; ++ int k; ++ ++ if (buf == Z_NULL) return 1L; ++ ++ while (len > 0) { ++ k = len < NMAX ? len : NMAX; ++ len -= k; ++ while (k >= 16) { ++ DO16(buf); ++ buf += 16; ++ k -= 16; ++ } ++ if (k != 0) do { ++ s1 += *buf++; ++ s2 += s1; ++ } while (--k); ++ s1 %= BASE; ++ s2 %= BASE; ++ } ++ return (s2 << 16) | s1; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/aes/Makefile Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,59 @@ ++# Makefile for KLIPS 3DES kernel code as a module for 2.6 kernels ++# ++# Makefile for KLIPS kernel code as a module ++# Copyright (C) 2002-2004 Michael Richardson ++# ++# This program is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by the ++# Free Software Foundation; either version 2 of the License, or (at your ++# option) any later version. See . ++# ++# This program is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# for more details. ++# ++# RCSID $Id: Makefile.fs2_6,v 1.1.10.1 2005-08-12 16:10:05 ken Exp $ ++# ++# Note! Dependencies are done automagically by 'make dep', which also ++# removes any old dependencies. DON'T put your own dependencies here ++# unless it's something special (ie not a .c file). ++# ++ ++obj-$(CONFIG_KLIPS_ENC_AES) += ipsec_alg_aes.o ++obj-$(CONFIG_KLIPS_ENC_AES) += aes_xcbc_mac.o ++obj-$(CONFIG_KLIPS_ENC_AES) += aes_cbc.o ++ ++ifeq ($(strip ${SUBARCH}),) ++SUBARCH:=${ARCH} ++endif ++ ++# the assembly version expects frame pointers, which are ++# optional in many kernel builds. If you want speed, you should ++# probably use cryptoapi code instead. ++USEASSEMBLY=${SUBARCH}${CONFIG_FRAME_POINTER} ++ifeq (${USEASSEMBLY},i386y) ++obj-$(CONFIG_KLIPS_ENC_AES) += aes-i586.o ++else ++obj-$(CONFIG_KLIPS_ENC_AES) += aes.o ++endif ++ ++ ++# ++# $Log: Makefile.fs2_6,v $ ++# Revision 1.1.10.1 2005-08-12 16:10:05 ken ++# do not use assembly code with there are no frame pointers ++# ++# Revision 1.2 2005/08/12 14:13:58 mcr ++# do not use assembly code with there are no frame pointers, ++# as it does not have the right linkages. ++# ++# Revision 1.1 2004/08/17 03:31:34 mcr ++# klips 2.6 edits. ++# ++# ++# Local Variables: ++# compile-command: "(cd ../../.. && source umlsetup.sh && make -C ${POOLSPACE} module/ipsec.o)" ++# End Variables: ++# ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/aes/aes-i586.S Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,892 @@ ++// ++// Copyright (c) 2001, Dr Brian Gladman , Worcester, UK. ++// All rights reserved. ++// ++// TERMS ++// ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted subject to the following conditions: ++// ++// 1. Redistributions of source code must retain the above copyright ++// notice, this list of conditions and the following disclaimer. ++// ++// 2. Redistributions in binary form must reproduce the above copyright ++// notice, this list of conditions and the following disclaimer in the ++// documentation and/or other materials provided with the distribution. ++// ++// 3. The copyright holder's name must not be used to endorse or promote ++// any products derived from this software without his specific prior ++// written permission. ++// ++// This software is provided 'as is' with no express or implied warranties ++// of correctness or fitness for purpose. ++ ++// Modified by Jari Ruusu, December 24 2001 ++// - Converted syntax to GNU CPP/assembler syntax ++// - C programming interface converted back to "old" API ++// - Minor portability cleanups and speed optimizations ++ ++// An AES (Rijndael) implementation for the Pentium. This version only ++// implements the standard AES block length (128 bits, 16 bytes). This code ++// does not preserve the eax, ecx or edx registers or the artihmetic status ++// flags. However, the ebx, esi, edi, and ebp registers are preserved across ++// calls. ++ ++// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f) ++// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) ++// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) ++ ++#if defined(USE_UNDERLINE) ++# define aes_set_key _aes_set_key ++# define aes_encrypt _aes_encrypt ++# define aes_decrypt _aes_decrypt ++#endif ++#if !defined(ALIGN32BYTES) ++# define ALIGN32BYTES 32 ++#endif ++ ++ .file "aes-i586.S" ++ .globl aes_set_key ++ .globl aes_encrypt ++ .globl aes_decrypt ++ ++#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) ++ ++// offsets to parameters with one register pushed onto stack ++ ++#define ctx 8 // AES context structure ++#define in_blk 12 // input byte array address parameter ++#define out_blk 16 // output byte array address parameter ++ ++// offsets in context structure ++ ++#define nkey 0 // key length, size 4 ++#define nrnd 4 // number of rounds, size 4 ++#define ekey 8 // encryption key schedule base address, size 256 ++#define dkey 264 // decryption key schedule base address, size 256 ++ ++// This macro performs a forward encryption cycle. It is entered with ++// the first previous round column values in %eax, %ebx, %esi and %edi and ++// exits with the final values in the same registers. ++ ++#define fwd_rnd(p1,p2) \ ++ mov %ebx,(%esp) ;\ ++ movzbl %al,%edx ;\ ++ mov %eax,%ecx ;\ ++ mov p2(%ebp),%eax ;\ ++ mov %edi,4(%esp) ;\ ++ mov p2+12(%ebp),%edi ;\ ++ xor p1(,%edx,4),%eax ;\ ++ movzbl %ch,%edx ;\ ++ shr $16,%ecx ;\ ++ mov p2+4(%ebp),%ebx ;\ ++ xor p1+tlen(,%edx,4),%edi ;\ ++ movzbl %cl,%edx ;\ ++ movzbl %ch,%ecx ;\ ++ xor p1+3*tlen(,%ecx,4),%ebx ;\ ++ mov %esi,%ecx ;\ ++ mov p1+2*tlen(,%edx,4),%esi ;\ ++ movzbl %cl,%edx ;\ ++ xor p1(,%edx,4),%esi ;\ ++ movzbl %ch,%edx ;\ ++ shr $16,%ecx ;\ ++ xor p1+tlen(,%edx,4),%ebx ;\ ++ movzbl %cl,%edx ;\ ++ movzbl %ch,%ecx ;\ ++ xor p1+2*tlen(,%edx,4),%eax ;\ ++ mov (%esp),%edx ;\ ++ xor p1+3*tlen(,%ecx,4),%edi ;\ ++ movzbl %dl,%ecx ;\ ++ xor p2+8(%ebp),%esi ;\ ++ xor p1(,%ecx,4),%ebx ;\ ++ movzbl %dh,%ecx ;\ ++ shr $16,%edx ;\ ++ xor p1+tlen(,%ecx,4),%eax ;\ ++ movzbl %dl,%ecx ;\ ++ movzbl %dh,%edx ;\ ++ xor p1+2*tlen(,%ecx,4),%edi ;\ ++ mov 4(%esp),%ecx ;\ ++ xor p1+3*tlen(,%edx,4),%esi ;\ ++ movzbl %cl,%edx ;\ ++ xor p1(,%edx,4),%edi ;\ ++ movzbl %ch,%edx ;\ ++ shr $16,%ecx ;\ ++ xor p1+tlen(,%edx,4),%esi ;\ ++ movzbl %cl,%edx ;\ ++ movzbl %ch,%ecx ;\ ++ xor p1+2*tlen(,%edx,4),%ebx ;\ ++ xor p1+3*tlen(,%ecx,4),%eax ++ ++// This macro performs an inverse encryption cycle. It is entered with ++// the first previous round column values in %eax, %ebx, %esi and %edi and ++// exits with the final values in the same registers. ++ ++#define inv_rnd(p1,p2) \ ++ movzbl %al,%edx ;\ ++ mov %ebx,(%esp) ;\ ++ mov %eax,%ecx ;\ ++ mov p2(%ebp),%eax ;\ ++ mov %edi,4(%esp) ;\ ++ mov p2+4(%ebp),%ebx ;\ ++ xor p1(,%edx,4),%eax ;\ ++ movzbl %ch,%edx ;\ ++ shr $16,%ecx ;\ ++ mov p2+12(%ebp),%edi ;\ ++ xor p1+tlen(,%edx,4),%ebx ;\ ++ movzbl %cl,%edx ;\ ++ movzbl %ch,%ecx ;\ ++ xor p1+3*tlen(,%ecx,4),%edi ;\ ++ mov %esi,%ecx ;\ ++ mov p1+2*tlen(,%edx,4),%esi ;\ ++ movzbl %cl,%edx ;\ ++ xor p1(,%edx,4),%esi ;\ ++ movzbl %ch,%edx ;\ ++ shr $16,%ecx ;\ ++ xor p1+tlen(,%edx,4),%edi ;\ ++ movzbl %cl,%edx ;\ ++ movzbl %ch,%ecx ;\ ++ xor p1+2*tlen(,%edx,4),%eax ;\ ++ mov (%esp),%edx ;\ ++ xor p1+3*tlen(,%ecx,4),%ebx ;\ ++ movzbl %dl,%ecx ;\ ++ xor p2+8(%ebp),%esi ;\ ++ xor p1(,%ecx,4),%ebx ;\ ++ movzbl %dh,%ecx ;\ ++ shr $16,%edx ;\ ++ xor p1+tlen(,%ecx,4),%esi ;\ ++ movzbl %dl,%ecx ;\ ++ movzbl %dh,%edx ;\ ++ xor p1+2*tlen(,%ecx,4),%edi ;\ ++ mov 4(%esp),%ecx ;\ ++ xor p1+3*tlen(,%edx,4),%eax ;\ ++ movzbl %cl,%edx ;\ ++ xor p1(,%edx,4),%edi ;\ ++ movzbl %ch,%edx ;\ ++ shr $16,%ecx ;\ ++ xor p1+tlen(,%edx,4),%eax ;\ ++ movzbl %cl,%edx ;\ ++ movzbl %ch,%ecx ;\ ++ xor p1+2*tlen(,%edx,4),%ebx ;\ ++ xor p1+3*tlen(,%ecx,4),%esi ++ ++// AES (Rijndael) Encryption Subroutine ++ ++ .text ++ .align ALIGN32BYTES ++aes_encrypt: ++ push %ebp ++ mov ctx(%esp),%ebp // pointer to context ++ mov in_blk(%esp),%ecx ++ push %ebx ++ push %esi ++ push %edi ++ mov nrnd(%ebp),%edx // number of rounds ++ lea ekey+16(%ebp),%ebp // key pointer ++ ++// input four columns and xor in first round key ++ ++ mov (%ecx),%eax ++ mov 4(%ecx),%ebx ++ mov 8(%ecx),%esi ++ mov 12(%ecx),%edi ++ xor -16(%ebp),%eax ++ xor -12(%ebp),%ebx ++ xor -8(%ebp),%esi ++ xor -4(%ebp),%edi ++ ++ sub $8,%esp // space for register saves on stack ++ ++ sub $10,%edx ++ je aes_15 ++ add $32,%ebp ++ sub $2,%edx ++ je aes_13 ++ add $32,%ebp ++ ++ fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key ++ fwd_rnd(aes_ft_tab,-48) ++aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key ++ fwd_rnd(aes_ft_tab,-16) ++aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key ++ fwd_rnd(aes_ft_tab,16) ++ fwd_rnd(aes_ft_tab,32) ++ fwd_rnd(aes_ft_tab,48) ++ fwd_rnd(aes_ft_tab,64) ++ fwd_rnd(aes_ft_tab,80) ++ fwd_rnd(aes_ft_tab,96) ++ fwd_rnd(aes_ft_tab,112) ++ fwd_rnd(aes_ft_tab,128) ++ fwd_rnd(aes_fl_tab,144) // last round uses a different table ++ ++// move final values to the output array. ++ ++ mov out_blk+20(%esp),%ebp ++ add $8,%esp ++ mov %eax,(%ebp) ++ mov %ebx,4(%ebp) ++ mov %esi,8(%ebp) ++ mov %edi,12(%ebp) ++ pop %edi ++ pop %esi ++ pop %ebx ++ pop %ebp ++ ret ++ ++ ++// AES (Rijndael) Decryption Subroutine ++ ++ .align ALIGN32BYTES ++aes_decrypt: ++ push %ebp ++ mov ctx(%esp),%ebp // pointer to context ++ mov in_blk(%esp),%ecx ++ push %ebx ++ push %esi ++ push %edi ++ mov nrnd(%ebp),%edx // number of rounds ++ lea dkey+16(%ebp),%ebp // key pointer ++ ++// input four columns and xor in first round key ++ ++ mov (%ecx),%eax ++ mov 4(%ecx),%ebx ++ mov 8(%ecx),%esi ++ mov 12(%ecx),%edi ++ xor -16(%ebp),%eax ++ xor -12(%ebp),%ebx ++ xor -8(%ebp),%esi ++ xor -4(%ebp),%edi ++ ++ sub $8,%esp // space for register saves on stack ++ ++ sub $10,%edx ++ je aes_25 ++ add $32,%ebp ++ sub $2,%edx ++ je aes_23 ++ add $32,%ebp ++ ++ inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key ++ inv_rnd(aes_it_tab,-48) ++aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key ++ inv_rnd(aes_it_tab,-16) ++aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key ++ inv_rnd(aes_it_tab,16) ++ inv_rnd(aes_it_tab,32) ++ inv_rnd(aes_it_tab,48) ++ inv_rnd(aes_it_tab,64) ++ inv_rnd(aes_it_tab,80) ++ inv_rnd(aes_it_tab,96) ++ inv_rnd(aes_it_tab,112) ++ inv_rnd(aes_it_tab,128) ++ inv_rnd(aes_il_tab,144) // last round uses a different table ++ ++// move final values to the output array. ++ ++ mov out_blk+20(%esp),%ebp ++ add $8,%esp ++ mov %eax,(%ebp) ++ mov %ebx,4(%ebp) ++ mov %esi,8(%ebp) ++ mov %edi,12(%ebp) ++ pop %edi ++ pop %esi ++ pop %ebx ++ pop %ebp ++ ret ++ ++// AES (Rijndael) Key Schedule Subroutine ++ ++// input/output parameters ++ ++#define aes_cx 12 // AES context ++#define in_key 16 // key input array address ++#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256) ++#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only ++ ++// offsets for locals ++ ++#define cnt -4 ++#define kpf -8 ++#define slen 8 ++ ++// This macro performs a column mixing operation on an input 32-bit ++// word to give a 32-bit result. It uses each of the 4 bytes in the ++// the input column to index 4 different tables of 256 32-bit words ++// that are xored together to form the output value. ++ ++#define mix_col(p1) \ ++ movzbl %bl,%ecx ;\ ++ mov p1(,%ecx,4),%eax ;\ ++ movzbl %bh,%ecx ;\ ++ ror $16,%ebx ;\ ++ xor p1+tlen(,%ecx,4),%eax ;\ ++ movzbl %bl,%ecx ;\ ++ xor p1+2*tlen(,%ecx,4),%eax ;\ ++ movzbl %bh,%ecx ;\ ++ xor p1+3*tlen(,%ecx,4),%eax ++ ++// Key Schedule Macros ++ ++#define ksc4(p1) \ ++ rol $24,%ebx ;\ ++ mix_col(aes_fl_tab) ;\ ++ ror $8,%ebx ;\ ++ xor 4*p1+aes_rcon_tab,%eax ;\ ++ xor %eax,%esi ;\ ++ xor %esi,%ebp ;\ ++ mov %esi,16*p1(%edi) ;\ ++ mov %ebp,16*p1+4(%edi) ;\ ++ xor %ebp,%edx ;\ ++ xor %edx,%ebx ;\ ++ mov %edx,16*p1+8(%edi) ;\ ++ mov %ebx,16*p1+12(%edi) ++ ++#define ksc6(p1) \ ++ rol $24,%ebx ;\ ++ mix_col(aes_fl_tab) ;\ ++ ror $8,%ebx ;\ ++ xor 4*p1+aes_rcon_tab,%eax ;\ ++ xor 24*p1-24(%edi),%eax ;\ ++ mov %eax,24*p1(%edi) ;\ ++ xor 24*p1-20(%edi),%eax ;\ ++ mov %eax,24*p1+4(%edi) ;\ ++ xor %eax,%esi ;\ ++ xor %esi,%ebp ;\ ++ mov %esi,24*p1+8(%edi) ;\ ++ mov %ebp,24*p1+12(%edi) ;\ ++ xor %ebp,%edx ;\ ++ xor %edx,%ebx ;\ ++ mov %edx,24*p1+16(%edi) ;\ ++ mov %ebx,24*p1+20(%edi) ++ ++#define ksc8(p1) \ ++ rol $24,%ebx ;\ ++ mix_col(aes_fl_tab) ;\ ++ ror $8,%ebx ;\ ++ xor 4*p1+aes_rcon_tab,%eax ;\ ++ xor 32*p1-32(%edi),%eax ;\ ++ mov %eax,32*p1(%edi) ;\ ++ xor 32*p1-28(%edi),%eax ;\ ++ mov %eax,32*p1+4(%edi) ;\ ++ xor 32*p1-24(%edi),%eax ;\ ++ mov %eax,32*p1+8(%edi) ;\ ++ xor 32*p1-20(%edi),%eax ;\ ++ mov %eax,32*p1+12(%edi) ;\ ++ push %ebx ;\ ++ mov %eax,%ebx ;\ ++ mix_col(aes_fl_tab) ;\ ++ pop %ebx ;\ ++ xor %eax,%esi ;\ ++ xor %esi,%ebp ;\ ++ mov %esi,32*p1+16(%edi) ;\ ++ mov %ebp,32*p1+20(%edi) ;\ ++ xor %ebp,%edx ;\ ++ xor %edx,%ebx ;\ ++ mov %edx,32*p1+24(%edi) ;\ ++ mov %ebx,32*p1+28(%edi) ++ ++ .align ALIGN32BYTES ++aes_set_key: ++ pushfl ++ push %ebp ++ mov %esp,%ebp ++ sub $slen,%esp ++ push %ebx ++ push %esi ++ push %edi ++ ++ mov aes_cx(%ebp),%edx // edx -> AES context ++ ++ mov key_ln(%ebp),%ecx // key length ++ cmpl $128,%ecx ++ jb aes_30 ++ shr $3,%ecx ++aes_30: cmpl $32,%ecx ++ je aes_32 ++ cmpl $24,%ecx ++ je aes_32 ++ mov $16,%ecx ++aes_32: shr $2,%ecx ++ mov %ecx,nkey(%edx) ++ ++ lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length ++ mov %eax,nrnd(%edx) ++ ++ mov in_key(%ebp),%esi // key input array ++ lea ekey(%edx),%edi // key position in AES context ++ cld ++ push %ebp ++ mov %ecx,%eax // save key length in eax ++ rep ; movsl // words in the key schedule ++ mov -4(%esi),%ebx // put some values in registers ++ mov -8(%esi),%edx // to allow faster code ++ mov -12(%esi),%ebp ++ mov -16(%esi),%esi ++ ++ cmpl $4,%eax // jump on key size ++ je aes_36 ++ cmpl $6,%eax ++ je aes_35 ++ ++ ksc8(0) ++ ksc8(1) ++ ksc8(2) ++ ksc8(3) ++ ksc8(4) ++ ksc8(5) ++ ksc8(6) ++ jmp aes_37 ++aes_35: ksc6(0) ++ ksc6(1) ++ ksc6(2) ++ ksc6(3) ++ ksc6(4) ++ ksc6(5) ++ ksc6(6) ++ ksc6(7) ++ jmp aes_37 ++aes_36: ksc4(0) ++ ksc4(1) ++ ksc4(2) ++ ksc4(3) ++ ksc4(4) ++ ksc4(5) ++ ksc4(6) ++ ksc4(7) ++ ksc4(8) ++ ksc4(9) ++aes_37: pop %ebp ++ mov aes_cx(%ebp),%edx // edx -> AES context ++ cmpl $0,ed_flg(%ebp) ++ jne aes_39 ++ ++// compile decryption key schedule from encryption schedule - reverse ++// order and do mix_column operation on round keys except first and last ++ ++ mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd ++ shl $2,%eax ++ lea dkey(%edx,%eax,4),%edi ++ lea ekey(%edx),%esi // kf = cx->e_key ++ ++ movsl // copy first round key (unmodified) ++ movsl ++ movsl ++ movsl ++ sub $32,%edi ++ movl $1,cnt(%ebp) ++aes_38: // do mix column on each column of ++ lodsl // each round key ++ mov %eax,%ebx ++ mix_col(aes_im_tab) ++ stosl ++ lodsl ++ mov %eax,%ebx ++ mix_col(aes_im_tab) ++ stosl ++ lodsl ++ mov %eax,%ebx ++ mix_col(aes_im_tab) ++ stosl ++ lodsl ++ mov %eax,%ebx ++ mix_col(aes_im_tab) ++ stosl ++ sub $32,%edi ++ ++ incl cnt(%ebp) ++ mov cnt(%ebp),%eax ++ cmp nrnd(%edx),%eax ++ jb aes_38 ++ ++ movsl // copy last round key (unmodified) ++ movsl ++ movsl ++ movsl ++aes_39: pop %edi ++ pop %esi ++ pop %ebx ++ mov %ebp,%esp ++ pop %ebp ++ popfl ++ ret ++ ++ ++// finite field multiplies by {02}, {04} and {08} ++ ++#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) ++#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) ++#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) ++ ++// finite field multiplies required in table generation ++ ++#define f3(x) (f2(x) ^ x) ++#define f9(x) (f8(x) ^ x) ++#define fb(x) (f8(x) ^ f2(x) ^ x) ++#define fd(x) (f8(x) ^ f4(x) ^ x) ++#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) ++ ++// These defines generate the forward table entries ++ ++#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x)) ++#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x)) ++#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x) ++#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x) ++ ++// These defines generate the inverse table entries ++ ++#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x)) ++#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x)) ++#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x)) ++#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x)) ++ ++// These defines generate entries for the last round tables ++ ++#define w0(x) (x) ++#define w1(x) (x << 8) ++#define w2(x) (x << 16) ++#define w3(x) (x << 24) ++ ++// macro to generate inverse mix column tables (needed for the key schedule) ++ ++#define im_data0(p1) \ ++ .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\ ++ .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\ ++ .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\ ++ .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f) ++#define im_data1(p1) \ ++ .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\ ++ .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\ ++ .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\ ++ .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f) ++#define im_data2(p1) \ ++ .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\ ++ .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\ ++ .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\ ++ .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f) ++#define im_data3(p1) \ ++ .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\ ++ .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\ ++ .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\ ++ .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f) ++#define im_data4(p1) \ ++ .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\ ++ .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\ ++ .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\ ++ .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f) ++#define im_data5(p1) \ ++ .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\ ++ .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\ ++ .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\ ++ .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf) ++#define im_data6(p1) \ ++ .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\ ++ .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\ ++ .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\ ++ .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf) ++#define im_data7(p1) \ ++ .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\ ++ .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\ ++ .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\ ++ .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff) ++ ++// S-box data - 256 entries ++ ++#define sb_data0(p1) \ ++ .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\ ++ .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\ ++ .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\ ++ .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0) ++#define sb_data1(p1) \ ++ .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\ ++ .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\ ++ .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\ ++ .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75) ++#define sb_data2(p1) \ ++ .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\ ++ .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\ ++ .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\ ++ .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf) ++#define sb_data3(p1) \ ++ .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\ ++ .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\ ++ .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\ ++ .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2) ++#define sb_data4(p1) \ ++ .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\ ++ .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\ ++ .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\ ++ .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb) ++#define sb_data5(p1) \ ++ .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\ ++ .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\ ++ .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\ ++ .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08) ++#define sb_data6(p1) \ ++ .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\ ++ .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\ ++ .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\ ++ .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e) ++#define sb_data7(p1) \ ++ .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\ ++ .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\ ++ .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\ ++ .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16) ++ ++// Inverse S-box data - 256 entries ++ ++#define ib_data0(p1) \ ++ .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\ ++ .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\ ++ .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\ ++ .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb) ++#define ib_data1(p1) \ ++ .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\ ++ .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\ ++ .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\ ++ .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25) ++#define ib_data2(p1) \ ++ .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\ ++ .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\ ++ .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\ ++ .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84) ++#define ib_data3(p1) \ ++ .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\ ++ .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\ ++ .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\ ++ .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b) ++#define ib_data4(p1) \ ++ .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\ ++ .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\ ++ .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\ ++ .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e) ++#define ib_data5(p1) \ ++ .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\ ++ .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\ ++ .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\ ++ .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4) ++#define ib_data6(p1) \ ++ .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\ ++ .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\ ++ .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\ ++ .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef) ++#define ib_data7(p1) \ ++ .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\ ++ .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\ ++ .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\ ++ .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d) ++ ++// The rcon_table (needed for the key schedule) ++// ++// Here is original Dr Brian Gladman's source code: ++// _rcon_tab: ++// %assign x 1 ++// %rep 29 ++// dd x ++// %assign x f2(x) ++// %endrep ++// ++// Here is precomputed output (it's more portable this way): ++ ++ .align ALIGN32BYTES ++aes_rcon_tab: ++ .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 ++ .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f ++ .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4 ++ .long 0xb3,0x7d,0xfa,0xef,0xc5 ++ ++// The forward xor tables ++ ++ .align ALIGN32BYTES ++aes_ft_tab: ++ sb_data0(u0) ++ sb_data1(u0) ++ sb_data2(u0) ++ sb_data3(u0) ++ sb_data4(u0) ++ sb_data5(u0) ++ sb_data6(u0) ++ sb_data7(u0) ++ ++ sb_data0(u1) ++ sb_data1(u1) ++ sb_data2(u1) ++ sb_data3(u1) ++ sb_data4(u1) ++ sb_data5(u1) ++ sb_data6(u1) ++ sb_data7(u1) ++ ++ sb_data0(u2) ++ sb_data1(u2) ++ sb_data2(u2) ++ sb_data3(u2) ++ sb_data4(u2) ++ sb_data5(u2) ++ sb_data6(u2) ++ sb_data7(u2) ++ ++ sb_data0(u3) ++ sb_data1(u3) ++ sb_data2(u3) ++ sb_data3(u3) ++ sb_data4(u3) ++ sb_data5(u3) ++ sb_data6(u3) ++ sb_data7(u3) ++ ++ .align ALIGN32BYTES ++aes_fl_tab: ++ sb_data0(w0) ++ sb_data1(w0) ++ sb_data2(w0) ++ sb_data3(w0) ++ sb_data4(w0) ++ sb_data5(w0) ++ sb_data6(w0) ++ sb_data7(w0) ++ ++ sb_data0(w1) ++ sb_data1(w1) ++ sb_data2(w1) ++ sb_data3(w1) ++ sb_data4(w1) ++ sb_data5(w1) ++ sb_data6(w1) ++ sb_data7(w1) ++ ++ sb_data0(w2) ++ sb_data1(w2) ++ sb_data2(w2) ++ sb_data3(w2) ++ sb_data4(w2) ++ sb_data5(w2) ++ sb_data6(w2) ++ sb_data7(w2) ++ ++ sb_data0(w3) ++ sb_data1(w3) ++ sb_data2(w3) ++ sb_data3(w3) ++ sb_data4(w3) ++ sb_data5(w3) ++ sb_data6(w3) ++ sb_data7(w3) ++ ++// The inverse xor tables ++ ++ .align ALIGN32BYTES ++aes_it_tab: ++ ib_data0(v0) ++ ib_data1(v0) ++ ib_data2(v0) ++ ib_data3(v0) ++ ib_data4(v0) ++ ib_data5(v0) ++ ib_data6(v0) ++ ib_data7(v0) ++ ++ ib_data0(v1) ++ ib_data1(v1) ++ ib_data2(v1) ++ ib_data3(v1) ++ ib_data4(v1) ++ ib_data5(v1) ++ ib_data6(v1) ++ ib_data7(v1) ++ ++ ib_data0(v2) ++ ib_data1(v2) ++ ib_data2(v2) ++ ib_data3(v2) ++ ib_data4(v2) ++ ib_data5(v2) ++ ib_data6(v2) ++ ib_data7(v2) ++ ++ ib_data0(v3) ++ ib_data1(v3) ++ ib_data2(v3) ++ ib_data3(v3) ++ ib_data4(v3) ++ ib_data5(v3) ++ ib_data6(v3) ++ ib_data7(v3) ++ ++ .align ALIGN32BYTES ++aes_il_tab: ++ ib_data0(w0) ++ ib_data1(w0) ++ ib_data2(w0) ++ ib_data3(w0) ++ ib_data4(w0) ++ ib_data5(w0) ++ ib_data6(w0) ++ ib_data7(w0) ++ ++ ib_data0(w1) ++ ib_data1(w1) ++ ib_data2(w1) ++ ib_data3(w1) ++ ib_data4(w1) ++ ib_data5(w1) ++ ib_data6(w1) ++ ib_data7(w1) ++ ++ ib_data0(w2) ++ ib_data1(w2) ++ ib_data2(w2) ++ ib_data3(w2) ++ ib_data4(w2) ++ ib_data5(w2) ++ ib_data6(w2) ++ ib_data7(w2) ++ ++ ib_data0(w3) ++ ib_data1(w3) ++ ib_data2(w3) ++ ib_data3(w3) ++ ib_data4(w3) ++ ib_data5(w3) ++ ib_data6(w3) ++ ib_data7(w3) ++ ++// The inverse mix column tables ++ ++ .align ALIGN32BYTES ++aes_im_tab: ++ im_data0(v0) ++ im_data1(v0) ++ im_data2(v0) ++ im_data3(v0) ++ im_data4(v0) ++ im_data5(v0) ++ im_data6(v0) ++ im_data7(v0) ++ ++ im_data0(v1) ++ im_data1(v1) ++ im_data2(v1) ++ im_data3(v1) ++ im_data4(v1) ++ im_data5(v1) ++ im_data6(v1) ++ im_data7(v1) ++ ++ im_data0(v2) ++ im_data1(v2) ++ im_data2(v2) ++ im_data3(v2) ++ im_data4(v2) ++ im_data5(v2) ++ im_data6(v2) ++ im_data7(v2) ++ ++ im_data0(v3) ++ im_data1(v3) ++ im_data2(v3) ++ im_data3(v3) ++ im_data4(v3) ++ im_data5(v3) ++ im_data6(v3) ++ im_data7(v3) +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/aes/aes.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1415 @@ ++// I retain copyright in this code but I encourage its free use provided ++// that I don't carry any responsibility for the results. I am especially ++// happy to see it used in free and open source software. If you do use ++// it I would appreciate an acknowledgement of its origin in the code or ++// the product that results and I would also appreciate knowing a little ++// about the use to which it is being put. I am grateful to Frank Yellin ++// for some ideas that are used in this implementation. ++// ++// Dr B. R. Gladman 6th April 2001. ++// ++// This is an implementation of the AES encryption algorithm (Rijndael) ++// designed by Joan Daemen and Vincent Rijmen. This version is designed ++// to provide both fixed and dynamic block and key lengths and can also ++// run with either big or little endian internal byte order (see aes.h). ++// It inputs block and key lengths in bytes with the legal values being ++// 16, 24 and 32. ++ ++/* ++ * Modified by Jari Ruusu, May 1 2001 ++ * - Fixed some compile warnings, code was ok but gcc warned anyway. ++ * - Changed basic types: byte -> unsigned char, word -> u_int32_t ++ * - Major name space cleanup: Names visible to outside now begin ++ * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c ++ * - Removed C++ and DLL support as part of name space cleanup. ++ * - Eliminated unnecessary recomputation of tables. (actual bug fix) ++ * - Merged precomputed constant tables to aes.c file. ++ * - Removed data alignment restrictions for portability reasons. ++ * - Made block and key lengths accept bit count (128/192/256) ++ * as well byte count (16/24/32). ++ * - Removed all error checks. This change also eliminated the need ++ * to preinitialize the context struct to zero. ++ * - Removed some totally unused constants. ++ */ ++ ++#include "crypto/aes.h" ++ ++// CONFIGURATION OPTIONS (see also aes.h) ++// ++// 1. Define UNROLL for full loop unrolling in encryption and decryption. ++// 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption. ++// 3. Define FIXED_TABLES for compiled rather than dynamic tables. ++// 4. Define FF_TABLES to use tables for field multiplies and inverses. ++// Do not enable this without understanding stack space requirements. ++// 5. Define ARRAYS to use arrays to hold the local state block. If this ++// is not defined, individually declared 32-bit words are used. ++// 6. Define FAST_VARIABLE if a high speed variable block implementation ++// is needed (essentially three separate fixed block size code sequences) ++// 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven ++// version using 1 table (2 kbytes of table space) or 4 tables (8 ++// kbytes of table space) for higher speed. ++// 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed ++// increase by using tables for the last rounds but with more table ++// space (2 or 8 kbytes extra). ++// 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but ++// slower version is provided. ++// 10. If fast decryption key scheduling is needed define ONE_IM_TABLE ++// or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra). ++ ++#define UNROLL ++//#define PARTIAL_UNROLL ++ ++#define FIXED_TABLES ++//#define FF_TABLES ++//#define ARRAYS ++#define FAST_VARIABLE ++ ++//#define ONE_TABLE ++#define FOUR_TABLES ++ ++//#define ONE_LR_TABLE ++#define FOUR_LR_TABLES ++ ++//#define ONE_IM_TABLE ++#define FOUR_IM_TABLES ++ ++#if defined(UNROLL) && defined (PARTIAL_UNROLL) ++#error both UNROLL and PARTIAL_UNROLL are defined ++#endif ++ ++#if defined(ONE_TABLE) && defined (FOUR_TABLES) ++#error both ONE_TABLE and FOUR_TABLES are defined ++#endif ++ ++#if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES) ++#error both ONE_LR_TABLE and FOUR_LR_TABLES are defined ++#endif ++ ++#if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES) ++#error both ONE_IM_TABLE and FOUR_IM_TABLES are defined ++#endif ++ ++#if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32 ++#error an illegal block size has been specified ++#endif ++ ++// upr(x,n): rotates bytes within words by n positions, moving bytes ++// to higher index positions with wrap around into low positions ++// ups(x,n): moves bytes by n positions to higher index positions in ++// words but without wrap around ++// bval(x,n): extracts a byte from a word ++ ++#define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n)))) ++#define ups(x,n) ((x) << 8 * (n)) ++#define bval(x,n) ((unsigned char)((x) >> 8 * (n))) ++#define bytes2word(b0, b1, b2, b3) \ ++ ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0)) ++ ++ ++/* little endian processor without data alignment restrictions: AES_LE_OK */ ++/* original code: i386 */ ++#if defined(i386) || defined(_I386) || defined(__i386__) || defined(__i386) ++#define AES_LE_OK 1 ++/* added (tested): alpha --jjo */ ++#elif defined(__alpha__)|| defined (__alpha) ++#define AES_LE_OK 1 ++/* added (tested): ia64 --jjo */ ++#elif defined(__ia64__)|| defined (__ia64) ++#define AES_LE_OK 1 ++#endif ++ ++#ifdef AES_LE_OK ++/* little endian processor without data alignment restrictions */ ++#define word_in(x) *(u_int32_t*)(x) ++#define const_word_in(x) *(const u_int32_t*)(x) ++#define word_out(x,v) *(u_int32_t*)(x) = (v) ++#define const_word_out(x,v) *(const u_int32_t*)(x) = (v) ++#else ++/* slower but generic big endian or with data alignment restrictions */ ++/* some additional "const" touches to stop "gcc -Wcast-qual" complains --jjo */ ++#define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24)) ++#define const_word_in(x) ((const u_int32_t)(((const unsigned char *)(x))[0])|((const u_int32_t)(((const unsigned char *)(x))[1])<<8)|((const u_int32_t)(((const unsigned char *)(x))[2])<<16)|((const u_int32_t)(((const unsigned char *)(x))[3])<<24)) ++#define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24) ++#define const_word_out(x,v) ((const unsigned char *)(x))[0]=(v),((const unsigned char *)(x))[1]=((v)>>8),((const unsigned char *)(x))[2]=((v)>>16),((const unsigned char *)(x))[3]=((v)>>24) ++#endif ++ ++// Disable at least some poor combinations of options ++ ++#if !defined(ONE_TABLE) && !defined(FOUR_TABLES) ++#define FIXED_TABLES ++#undef UNROLL ++#undef ONE_LR_TABLE ++#undef FOUR_LR_TABLES ++#undef ONE_IM_TABLE ++#undef FOUR_IM_TABLES ++#elif !defined(FOUR_TABLES) ++#ifdef FOUR_LR_TABLES ++#undef FOUR_LR_TABLES ++#define ONE_LR_TABLE ++#endif ++#ifdef FOUR_IM_TABLES ++#undef FOUR_IM_TABLES ++#define ONE_IM_TABLE ++#endif ++#elif !defined(AES_BLOCK_SIZE) ++#if defined(UNROLL) ++#define PARTIAL_UNROLL ++#undef UNROLL ++#endif ++#endif ++ ++// the finite field modular polynomial and elements ++ ++#define ff_poly 0x011b ++#define ff_hi 0x80 ++ ++// multiply four bytes in GF(2^8) by 'x' {02} in parallel ++ ++#define m1 0x80808080 ++#define m2 0x7f7f7f7f ++#define m3 0x0000001b ++#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3)) ++ ++// The following defines provide alternative definitions of FFmulX that might ++// give improved performance if a fast 32-bit multiply is not available. Note ++// that a temporary variable u needs to be defined where FFmulX is used. ++ ++// #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) ++// #define m4 0x1b1b1b1b ++// #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) ++ ++// perform column mix operation on four bytes in parallel ++ ++#define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1)) ++ ++#if defined(FIXED_TABLES) ++ ++// the S-Box table ++ ++static const unsigned char s_box[256] = ++{ ++ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, ++ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, ++ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, ++ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, ++ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, ++ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, ++ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, ++ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, ++ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, ++ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, ++ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, ++ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, ++ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, ++ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, ++ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, ++ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, ++ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, ++ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, ++ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, ++ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, ++ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, ++ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, ++ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, ++ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, ++ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, ++ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, ++ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, ++ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, ++ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, ++ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, ++ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, ++ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 ++}; ++ ++// the inverse S-Box table ++ ++static const unsigned char inv_s_box[256] = ++{ ++ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, ++ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, ++ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, ++ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, ++ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, ++ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, ++ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, ++ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, ++ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, ++ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, ++ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, ++ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, ++ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, ++ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, ++ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, ++ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, ++ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, ++ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, ++ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, ++ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, ++ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, ++ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, ++ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, ++ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, ++ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, ++ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, ++ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, ++ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, ++ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, ++ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, ++ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, ++ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d ++}; ++ ++#define w0(p) 0x000000##p ++ ++// Number of elements required in this table for different ++// block and key lengths is: ++// ++// Nk = 4 6 8 ++// ---------- ++// Nb = 4 | 10 8 7 ++// 6 | 19 12 11 ++// 8 | 29 19 14 ++// ++// this table can be a table of bytes if the key schedule ++// code is adjusted accordingly ++ ++static const u_int32_t rcon_tab[29] = ++{ ++ w0(01), w0(02), w0(04), w0(08), ++ w0(10), w0(20), w0(40), w0(80), ++ w0(1b), w0(36), w0(6c), w0(d8), ++ w0(ab), w0(4d), w0(9a), w0(2f), ++ w0(5e), w0(bc), w0(63), w0(c6), ++ w0(97), w0(35), w0(6a), w0(d4), ++ w0(b3), w0(7d), w0(fa), w0(ef), ++ w0(c5) ++}; ++ ++#undef w0 ++ ++#define r0(p,q,r,s) 0x##p##q##r##s ++#define r1(p,q,r,s) 0x##q##r##s##p ++#define r2(p,q,r,s) 0x##r##s##p##q ++#define r3(p,q,r,s) 0x##s##p##q##r ++#define w0(p) 0x000000##p ++#define w1(p) 0x0000##p##00 ++#define w2(p) 0x00##p##0000 ++#define w3(p) 0x##p##000000 ++ ++#if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES)) ++ ++// data for forward tables (other than last round) ++ ++#define f_table \ ++ r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\ ++ r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\ ++ r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\ ++ r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\ ++ r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\ ++ r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\ ++ r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\ ++ r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\ ++ r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\ ++ r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\ ++ r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\ ++ r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\ ++ r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\ ++ r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\ ++ r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\ ++ r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\ ++ r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\ ++ r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\ ++ r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\ ++ r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\ ++ r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\ ++ r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\ ++ r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\ ++ r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\ ++ r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\ ++ r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\ ++ r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\ ++ r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\ ++ r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\ ++ r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\ ++ r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\ ++ r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\ ++ r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\ ++ r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\ ++ r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\ ++ r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\ ++ r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\ ++ r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\ ++ r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\ ++ r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\ ++ r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\ ++ r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\ ++ r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\ ++ r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\ ++ r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\ ++ r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\ ++ r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\ ++ r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\ ++ r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\ ++ r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\ ++ r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\ ++ r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\ ++ r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\ ++ r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\ ++ r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\ ++ r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\ ++ r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\ ++ r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\ ++ r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\ ++ r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\ ++ r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\ ++ r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\ ++ r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\ ++ r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c) ++ ++// data for inverse tables (other than last round) ++ ++#define i_table \ ++ r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\ ++ r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\ ++ r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\ ++ r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\ ++ r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\ ++ r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\ ++ r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\ ++ r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\ ++ r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\ ++ r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\ ++ r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\ ++ r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\ ++ r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\ ++ r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\ ++ r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\ ++ r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\ ++ r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\ ++ r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\ ++ r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\ ++ r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\ ++ r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\ ++ r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\ ++ r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\ ++ r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\ ++ r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\ ++ r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\ ++ r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\ ++ r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\ ++ r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\ ++ r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\ ++ r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\ ++ r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\ ++ r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\ ++ r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\ ++ r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\ ++ r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\ ++ r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\ ++ r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\ ++ r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\ ++ r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\ ++ r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\ ++ r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\ ++ r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\ ++ r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\ ++ r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\ ++ r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\ ++ r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\ ++ r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\ ++ r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\ ++ r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\ ++ r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\ ++ r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\ ++ r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\ ++ r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\ ++ r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\ ++ r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\ ++ r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\ ++ r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\ ++ r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\ ++ r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\ ++ r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\ ++ r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\ ++ r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\ ++ r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0) ++ ++// generate the required tables in the desired endian format ++ ++#undef r ++#define r r0 ++ ++#if defined(ONE_TABLE) ++static const u_int32_t ft_tab[256] = ++ { f_table }; ++#elif defined(FOUR_TABLES) ++static const u_int32_t ft_tab[4][256] = ++{ { f_table }, ++#undef r ++#define r r1 ++ { f_table }, ++#undef r ++#define r r2 ++ { f_table }, ++#undef r ++#define r r3 ++ { f_table } ++}; ++#endif ++ ++#undef r ++#define r r0 ++#if defined(ONE_TABLE) ++static const u_int32_t it_tab[256] = ++ { i_table }; ++#elif defined(FOUR_TABLES) ++static const u_int32_t it_tab[4][256] = ++{ { i_table }, ++#undef r ++#define r r1 ++ { i_table }, ++#undef r ++#define r r2 ++ { i_table }, ++#undef r ++#define r r3 ++ { i_table } ++}; ++#endif ++ ++#endif ++ ++#if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES)) ++ ++// data for inverse tables (last round) ++ ++#define li_table \ ++ w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\ ++ w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\ ++ w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\ ++ w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\ ++ w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\ ++ w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\ ++ w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\ ++ w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\ ++ w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\ ++ w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\ ++ w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\ ++ w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\ ++ w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\ ++ w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\ ++ w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\ ++ w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\ ++ w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\ ++ w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\ ++ w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\ ++ w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\ ++ w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\ ++ w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\ ++ w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\ ++ w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\ ++ w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\ ++ w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\ ++ w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\ ++ w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\ ++ w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\ ++ w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\ ++ w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\ ++ w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d), ++ ++// generate the required tables in the desired endian format ++ ++#undef r ++#define r(p,q,r,s) w0(q) ++#if defined(ONE_LR_TABLE) ++static const u_int32_t fl_tab[256] = ++ { f_table }; ++#elif defined(FOUR_LR_TABLES) ++static const u_int32_t fl_tab[4][256] = ++{ { f_table }, ++#undef r ++#define r(p,q,r,s) w1(q) ++ { f_table }, ++#undef r ++#define r(p,q,r,s) w2(q) ++ { f_table }, ++#undef r ++#define r(p,q,r,s) w3(q) ++ { f_table } ++}; ++#endif ++ ++#undef w ++#define w w0 ++#if defined(ONE_LR_TABLE) ++static const u_int32_t il_tab[256] = ++ { li_table }; ++#elif defined(FOUR_LR_TABLES) ++static const u_int32_t il_tab[4][256] = ++{ { li_table }, ++#undef w ++#define w w1 ++ { li_table }, ++#undef w ++#define w w2 ++ { li_table }, ++#undef w ++#define w w3 ++ { li_table } ++}; ++#endif ++ ++#endif ++ ++#if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES)) ++ ++#define m_table \ ++ r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\ ++ r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\ ++ r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\ ++ r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\ ++ r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\ ++ r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\ ++ r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\ ++ r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\ ++ r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\ ++ r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\ ++ r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\ ++ r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\ ++ r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\ ++ r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\ ++ r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\ ++ r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\ ++ r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\ ++ r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\ ++ r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\ ++ r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\ ++ r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\ ++ r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\ ++ r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\ ++ r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\ ++ r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\ ++ r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\ ++ r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\ ++ r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\ ++ r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\ ++ r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\ ++ r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\ ++ r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\ ++ r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\ ++ r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\ ++ r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\ ++ r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\ ++ r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\ ++ r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\ ++ r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\ ++ r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\ ++ r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\ ++ r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\ ++ r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\ ++ r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\ ++ r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\ ++ r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\ ++ r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\ ++ r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\ ++ r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\ ++ r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\ ++ r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\ ++ r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\ ++ r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\ ++ r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\ ++ r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\ ++ r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\ ++ r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\ ++ r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\ ++ r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\ ++ r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\ ++ r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\ ++ r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\ ++ r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\ ++ r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d) ++ ++#undef r ++#define r r0 ++ ++#if defined(ONE_IM_TABLE) ++static const u_int32_t im_tab[256] = ++ { m_table }; ++#elif defined(FOUR_IM_TABLES) ++static const u_int32_t im_tab[4][256] = ++{ { m_table }, ++#undef r ++#define r r1 ++ { m_table }, ++#undef r ++#define r r2 ++ { m_table }, ++#undef r ++#define r r3 ++ { m_table } ++}; ++#endif ++ ++#endif ++ ++#else ++ ++static int tab_gen = 0; ++ ++static unsigned char s_box[256]; // the S box ++static unsigned char inv_s_box[256]; // the inverse S box ++static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants ++ ++#if defined(ONE_TABLE) ++static u_int32_t ft_tab[256]; ++static u_int32_t it_tab[256]; ++#elif defined(FOUR_TABLES) ++static u_int32_t ft_tab[4][256]; ++static u_int32_t it_tab[4][256]; ++#endif ++ ++#if defined(ONE_LR_TABLE) ++static u_int32_t fl_tab[256]; ++static u_int32_t il_tab[256]; ++#elif defined(FOUR_LR_TABLES) ++static u_int32_t fl_tab[4][256]; ++static u_int32_t il_tab[4][256]; ++#endif ++ ++#if defined(ONE_IM_TABLE) ++static u_int32_t im_tab[256]; ++#elif defined(FOUR_IM_TABLES) ++static u_int32_t im_tab[4][256]; ++#endif ++ ++// Generate the tables for the dynamic table option ++ ++#if !defined(FF_TABLES) ++ ++// It will generally be sensible to use tables to compute finite ++// field multiplies and inverses but where memory is scarse this ++// code might sometimes be better. ++ ++// return 2 ^ (n - 1) where n is the bit number of the highest bit ++// set in x with x in the range 1 < x < 0x00000200. This form is ++// used so that locals within FFinv can be bytes rather than words ++ ++static unsigned char hibit(const u_int32_t x) ++{ unsigned char r = (unsigned char)((x >> 1) | (x >> 2)); ++ ++ r |= (r >> 2); ++ r |= (r >> 4); ++ return (r + 1) >> 1; ++} ++ ++// return the inverse of the finite field element x ++ ++static unsigned char FFinv(const unsigned char x) ++{ unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0; ++ ++ if(x < 2) return x; ++ ++ for(;;) ++ { ++ if(!n1) return v1; ++ ++ while(n2 >= n1) ++ { ++ n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2); ++ } ++ ++ if(!n2) return v2; ++ ++ while(n1 >= n2) ++ { ++ n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1); ++ } ++ } ++} ++ ++// define the finite field multiplies required for Rijndael ++ ++#define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0)) ++#define FFmul03(x) ((x) ^ FFmul02(x)) ++#define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x)))) ++#define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x)))) ++#define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x)))) ++#define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x))) ++ ++#else ++ ++#define FFinv(x) ((x) ? pow[255 - log[x]]: 0) ++ ++#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0) ++#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0) ++#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0) ++#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0) ++#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0) ++#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0) ++ ++#endif ++ ++// The forward and inverse affine transformations used in the S-box ++ ++#define fwd_affine(x) \ ++ (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8))) ++ ++#define inv_affine(x) \ ++ (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8))) ++ ++static void gen_tabs(void) ++{ u_int32_t i, w; ++ ++#if defined(FF_TABLES) ++ ++ unsigned char pow[512], log[256]; ++ ++ // log and power tables for GF(2^8) finite field with ++ // 0x011b as modular polynomial - the simplest primitive ++ // root is 0x03, used here to generate the tables ++ ++ i = 0; w = 1; ++ do ++ { ++ pow[i] = (unsigned char)w; ++ pow[i + 255] = (unsigned char)w; ++ log[w] = (unsigned char)i++; ++ w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0); ++ } ++ while (w != 1); ++ ++#endif ++ ++ for(i = 0, w = 1; i < AES_RC_LENGTH; ++i) ++ { ++ rcon_tab[i] = bytes2word(w, 0, 0, 0); ++ w = (w << 1) ^ (w & ff_hi ? ff_poly : 0); ++ } ++ ++ for(i = 0; i < 256; ++i) ++ { unsigned char b; ++ ++ s_box[i] = b = fwd_affine(FFinv((unsigned char)i)); ++ ++ w = bytes2word(b, 0, 0, 0); ++#if defined(ONE_LR_TABLE) ++ fl_tab[i] = w; ++#elif defined(FOUR_LR_TABLES) ++ fl_tab[0][i] = w; ++ fl_tab[1][i] = upr(w,1); ++ fl_tab[2][i] = upr(w,2); ++ fl_tab[3][i] = upr(w,3); ++#endif ++ w = bytes2word(FFmul02(b), b, b, FFmul03(b)); ++#if defined(ONE_TABLE) ++ ft_tab[i] = w; ++#elif defined(FOUR_TABLES) ++ ft_tab[0][i] = w; ++ ft_tab[1][i] = upr(w,1); ++ ft_tab[2][i] = upr(w,2); ++ ft_tab[3][i] = upr(w,3); ++#endif ++ inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i)); ++ ++ w = bytes2word(b, 0, 0, 0); ++#if defined(ONE_LR_TABLE) ++ il_tab[i] = w; ++#elif defined(FOUR_LR_TABLES) ++ il_tab[0][i] = w; ++ il_tab[1][i] = upr(w,1); ++ il_tab[2][i] = upr(w,2); ++ il_tab[3][i] = upr(w,3); ++#endif ++ w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b)); ++#if defined(ONE_TABLE) ++ it_tab[i] = w; ++#elif defined(FOUR_TABLES) ++ it_tab[0][i] = w; ++ it_tab[1][i] = upr(w,1); ++ it_tab[2][i] = upr(w,2); ++ it_tab[3][i] = upr(w,3); ++#endif ++#if defined(ONE_IM_TABLE) ++ im_tab[b] = w; ++#elif defined(FOUR_IM_TABLES) ++ im_tab[0][b] = w; ++ im_tab[1][b] = upr(w,1); ++ im_tab[2][b] = upr(w,2); ++ im_tab[3][b] = upr(w,3); ++#endif ++ ++ } ++} ++ ++#endif ++ ++#define no_table(x,box,vf,rf,c) bytes2word( \ ++ box[bval(vf(x,0,c),rf(0,c))], \ ++ box[bval(vf(x,1,c),rf(1,c))], \ ++ box[bval(vf(x,2,c),rf(2,c))], \ ++ box[bval(vf(x,3,c),rf(3,c))]) ++ ++#define one_table(x,op,tab,vf,rf,c) \ ++ ( tab[bval(vf(x,0,c),rf(0,c))] \ ++ ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ ++ ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ ++ ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) ++ ++#define four_tables(x,tab,vf,rf,c) \ ++ ( tab[0][bval(vf(x,0,c),rf(0,c))] \ ++ ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ ++ ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ ++ ^ tab[3][bval(vf(x,3,c),rf(3,c))]) ++ ++#define vf1(x,r,c) (x) ++#define rf1(r,c) (r) ++#define rf2(r,c) ((r-c)&3) ++ ++#if defined(FOUR_LR_TABLES) ++#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) ++#elif defined(ONE_LR_TABLE) ++#define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c) ++#else ++#define ls_box(x,c) no_table(x,s_box,vf1,rf2,c) ++#endif ++ ++#if defined(FOUR_IM_TABLES) ++#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) ++#elif defined(ONE_IM_TABLE) ++#define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0) ++#else ++#define inv_mcol(x) \ ++ (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \ ++ f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1)) ++#endif ++ ++// Subroutine to set the block size (if variable) in bytes, legal ++// values being 16, 24 and 32. ++ ++#if defined(AES_BLOCK_SIZE) ++#define nc (AES_BLOCK_SIZE / 4) ++#else ++#define nc (cx->aes_Ncol) ++ ++void aes_set_blk(aes_context *cx, int n_bytes) ++{ ++#if !defined(FIXED_TABLES) ++ if(!tab_gen) { gen_tabs(); tab_gen = 1; } ++#endif ++ ++ switch(n_bytes) { ++ case 32: /* bytes */ ++ case 256: /* bits */ ++ nc = 8; ++ break; ++ case 24: /* bytes */ ++ case 192: /* bits */ ++ nc = 6; ++ break; ++ case 16: /* bytes */ ++ case 128: /* bits */ ++ default: ++ nc = 4; ++ break; ++ } ++} ++ ++#endif ++ ++// Initialise the key schedule from the user supplied key. The key ++// length is now specified in bytes - 16, 24 or 32 as appropriate. ++// This corresponds to bit lengths of 128, 192 and 256 bits, and ++// to Nk values of 4, 6 and 8 respectively. ++ ++#define mx(t,f) (*t++ = inv_mcol(*f),f++) ++#define cp(t,f) *t++ = *f++ ++ ++#if AES_BLOCK_SIZE == 16 ++#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s) ++#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s) ++#elif AES_BLOCK_SIZE == 24 ++#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \ ++ cp(d,s); cp(d,s) ++#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \ ++ mx(d,s); mx(d,s) ++#elif AES_BLOCK_SIZE == 32 ++#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \ ++ cp(d,s); cp(d,s); cp(d,s); cp(d,s) ++#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \ ++ mx(d,s); mx(d,s); mx(d,s); mx(d,s) ++#else ++ ++#define cpy(d,s) \ ++switch(nc) \ ++{ case 8: cp(d,s); cp(d,s); \ ++ case 6: cp(d,s); cp(d,s); \ ++ case 4: cp(d,s); cp(d,s); \ ++ cp(d,s); cp(d,s); \ ++} ++ ++#define mix(d,s) \ ++switch(nc) \ ++{ case 8: mx(d,s); mx(d,s); \ ++ case 6: mx(d,s); mx(d,s); \ ++ case 4: mx(d,s); mx(d,s); \ ++ mx(d,s); mx(d,s); \ ++} ++ ++#endif ++ ++void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f) ++{ u_int32_t *kf, *kt, rci; ++ ++#if !defined(FIXED_TABLES) ++ if(!tab_gen) { gen_tabs(); tab_gen = 1; } ++#endif ++ ++ switch(n_bytes) { ++ case 32: /* bytes */ ++ case 256: /* bits */ ++ cx->aes_Nkey = 8; ++ break; ++ case 24: /* bytes */ ++ case 192: /* bits */ ++ cx->aes_Nkey = 6; ++ break; ++ case 16: /* bytes */ ++ case 128: /* bits */ ++ default: ++ cx->aes_Nkey = 4; ++ break; ++ } ++ ++ cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6; ++ ++ cx->aes_e_key[0] = const_word_in(in_key ); ++ cx->aes_e_key[1] = const_word_in(in_key + 4); ++ cx->aes_e_key[2] = const_word_in(in_key + 8); ++ cx->aes_e_key[3] = const_word_in(in_key + 12); ++ ++ kf = cx->aes_e_key; ++ kt = kf + nc * (cx->aes_Nrnd + 1) - cx->aes_Nkey; ++ rci = 0; ++ ++ switch(cx->aes_Nkey) ++ { ++ case 4: do ++ { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++]; ++ kf[5] = kf[1] ^ kf[4]; ++ kf[6] = kf[2] ^ kf[5]; ++ kf[7] = kf[3] ^ kf[6]; ++ kf += 4; ++ } ++ while(kf < kt); ++ break; ++ ++ case 6: cx->aes_e_key[4] = const_word_in(in_key + 16); ++ cx->aes_e_key[5] = const_word_in(in_key + 20); ++ do ++ { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++]; ++ kf[ 7] = kf[1] ^ kf[ 6]; ++ kf[ 8] = kf[2] ^ kf[ 7]; ++ kf[ 9] = kf[3] ^ kf[ 8]; ++ kf[10] = kf[4] ^ kf[ 9]; ++ kf[11] = kf[5] ^ kf[10]; ++ kf += 6; ++ } ++ while(kf < kt); ++ break; ++ ++ case 8: cx->aes_e_key[4] = const_word_in(in_key + 16); ++ cx->aes_e_key[5] = const_word_in(in_key + 20); ++ cx->aes_e_key[6] = const_word_in(in_key + 24); ++ cx->aes_e_key[7] = const_word_in(in_key + 28); ++ do ++ { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++]; ++ kf[ 9] = kf[1] ^ kf[ 8]; ++ kf[10] = kf[2] ^ kf[ 9]; ++ kf[11] = kf[3] ^ kf[10]; ++ kf[12] = kf[4] ^ ls_box(kf[11],0); ++ kf[13] = kf[5] ^ kf[12]; ++ kf[14] = kf[6] ^ kf[13]; ++ kf[15] = kf[7] ^ kf[14]; ++ kf += 8; ++ } ++ while (kf < kt); ++ break; ++ } ++ ++ if(!f) ++ { u_int32_t i; ++ ++ kt = cx->aes_d_key + nc * cx->aes_Nrnd; ++ kf = cx->aes_e_key; ++ ++ cpy(kt, kf); kt -= 2 * nc; ++ ++ for(i = 1; i < cx->aes_Nrnd; ++i) ++ { ++#if defined(ONE_TABLE) || defined(FOUR_TABLES) ++#if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES) ++ u_int32_t f2, f4, f8, f9; ++#endif ++ mix(kt, kf); ++#else ++ cpy(kt, kf); ++#endif ++ kt -= 2 * nc; ++ } ++ ++ cpy(kt, kf); ++ } ++} ++ ++// y = output word, x = input word, r = row, c = column ++// for r = 0, 1, 2 and 3 = column accessed for row r ++ ++#if defined(ARRAYS) ++#define s(x,c) x[c] ++#else ++#define s(x,c) x##c ++#endif ++ ++// I am grateful to Frank Yellin for the following constructions ++// which, given the column (c) of the output state variable that ++// is being computed, return the input state variables which are ++// needed for each row (r) of the state ++ ++// For the fixed block size options, compilers reduce these two ++// expressions to fixed variable references. For variable block ++// size code conditional clauses will sometimes be returned ++ ++#define unused 77 // Sunset Strip ++ ++#define fwd_var(x,r,c) \ ++ ( r==0 ? \ ++ ( c==0 ? s(x,0) \ ++ : c==1 ? s(x,1) \ ++ : c==2 ? s(x,2) \ ++ : c==3 ? s(x,3) \ ++ : c==4 ? s(x,4) \ ++ : c==5 ? s(x,5) \ ++ : c==6 ? s(x,6) \ ++ : s(x,7)) \ ++ : r==1 ? \ ++ ( c==0 ? s(x,1) \ ++ : c==1 ? s(x,2) \ ++ : c==2 ? s(x,3) \ ++ : c==3 ? nc==4 ? s(x,0) : s(x,4) \ ++ : c==4 ? s(x,5) \ ++ : c==5 ? nc==8 ? s(x,6) : s(x,0) \ ++ : c==6 ? s(x,7) \ ++ : s(x,0)) \ ++ : r==2 ? \ ++ ( c==0 ? nc==8 ? s(x,3) : s(x,2) \ ++ : c==1 ? nc==8 ? s(x,4) : s(x,3) \ ++ : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \ ++ : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \ ++ : c==4 ? nc==8 ? s(x,7) : s(x,0) \ ++ : c==5 ? nc==8 ? s(x,0) : s(x,1) \ ++ : c==6 ? s(x,1) \ ++ : s(x,2)) \ ++ : \ ++ ( c==0 ? nc==8 ? s(x,4) : s(x,3) \ ++ : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \ ++ : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \ ++ : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \ ++ : c==4 ? nc==8 ? s(x,0) : s(x,1) \ ++ : c==5 ? nc==8 ? s(x,1) : s(x,2) \ ++ : c==6 ? s(x,2) \ ++ : s(x,3))) ++ ++#define inv_var(x,r,c) \ ++ ( r==0 ? \ ++ ( c==0 ? s(x,0) \ ++ : c==1 ? s(x,1) \ ++ : c==2 ? s(x,2) \ ++ : c==3 ? s(x,3) \ ++ : c==4 ? s(x,4) \ ++ : c==5 ? s(x,5) \ ++ : c==6 ? s(x,6) \ ++ : s(x,7)) \ ++ : r==1 ? \ ++ ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \ ++ : c==1 ? s(x,0) \ ++ : c==2 ? s(x,1) \ ++ : c==3 ? s(x,2) \ ++ : c==4 ? s(x,3) \ ++ : c==5 ? s(x,4) \ ++ : c==6 ? s(x,5) \ ++ : s(x,6)) \ ++ : r==2 ? \ ++ ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \ ++ : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \ ++ : c==2 ? nc==8 ? s(x,7) : s(x,0) \ ++ : c==3 ? nc==8 ? s(x,0) : s(x,1) \ ++ : c==4 ? nc==8 ? s(x,1) : s(x,2) \ ++ : c==5 ? nc==8 ? s(x,2) : s(x,3) \ ++ : c==6 ? s(x,3) \ ++ : s(x,4)) \ ++ : \ ++ ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \ ++ : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \ ++ : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \ ++ : c==3 ? nc==8 ? s(x,7) : s(x,0) \ ++ : c==4 ? nc==8 ? s(x,0) : s(x,1) \ ++ : c==5 ? nc==8 ? s(x,1) : s(x,2) \ ++ : c==6 ? s(x,2) \ ++ : s(x,3))) ++ ++#define si(y,x,k,c) s(y,c) = const_word_in(x + 4 * c) ^ k[c] ++#define so(y,x,c) word_out(y + 4 * c, s(x,c)) ++ ++#if defined(FOUR_TABLES) ++#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c) ++#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c) ++#elif defined(ONE_TABLE) ++#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c) ++#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c) ++#else ++#define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c] ++#define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]) ++#endif ++ ++#if defined(FOUR_LR_TABLES) ++#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c) ++#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c) ++#elif defined(ONE_LR_TABLE) ++#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c) ++#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c) ++#else ++#define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c] ++#define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c] ++#endif ++ ++#if AES_BLOCK_SIZE == 16 ++ ++#if defined(ARRAYS) ++#define locals(y,x) x[4],y[4] ++#else ++#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 ++// the following defines prevent the compiler requiring the declaration ++// of generated but unused variables in the fwd_var and inv_var macros ++#define b04 unused ++#define b05 unused ++#define b06 unused ++#define b07 unused ++#define b14 unused ++#define b15 unused ++#define b16 unused ++#define b17 unused ++#endif ++#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ ++ s(y,2) = s(x,2); s(y,3) = s(x,3); ++#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) ++#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) ++#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) ++ ++#elif AES_BLOCK_SIZE == 24 ++ ++#if defined(ARRAYS) ++#define locals(y,x) x[6],y[6] ++#else ++#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \ ++ y##0,y##1,y##2,y##3,y##4,y##5 ++#define b06 unused ++#define b07 unused ++#define b16 unused ++#define b17 unused ++#endif ++#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ ++ s(y,2) = s(x,2); s(y,3) = s(x,3); \ ++ s(y,4) = s(x,4); s(y,5) = s(x,5); ++#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \ ++ si(y,x,k,3); si(y,x,k,4); si(y,x,k,5) ++#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \ ++ so(y,x,3); so(y,x,4); so(y,x,5) ++#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \ ++ rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5) ++#else ++ ++#if defined(ARRAYS) ++#define locals(y,x) x[8],y[8] ++#else ++#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \ ++ y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7 ++#endif ++#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ ++ s(y,2) = s(x,2); s(y,3) = s(x,3); \ ++ s(y,4) = s(x,4); s(y,5) = s(x,5); \ ++ s(y,6) = s(x,6); s(y,7) = s(x,7); ++ ++#if AES_BLOCK_SIZE == 32 ++ ++#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \ ++ si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7) ++#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \ ++ so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7) ++#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \ ++ rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7) ++#else ++ ++#define state_in(y,x,k) \ ++switch(nc) \ ++{ case 8: si(y,x,k,7); si(y,x,k,6); \ ++ case 6: si(y,x,k,5); si(y,x,k,4); \ ++ case 4: si(y,x,k,3); si(y,x,k,2); \ ++ si(y,x,k,1); si(y,x,k,0); \ ++} ++ ++#define state_out(y,x) \ ++switch(nc) \ ++{ case 8: so(y,x,7); so(y,x,6); \ ++ case 6: so(y,x,5); so(y,x,4); \ ++ case 4: so(y,x,3); so(y,x,2); \ ++ so(y,x,1); so(y,x,0); \ ++} ++ ++#if defined(FAST_VARIABLE) ++ ++#define round(rm,y,x,k) \ ++switch(nc) \ ++{ case 8: rm(y,x,k,7); rm(y,x,k,6); \ ++ rm(y,x,k,5); rm(y,x,k,4); \ ++ rm(y,x,k,3); rm(y,x,k,2); \ ++ rm(y,x,k,1); rm(y,x,k,0); \ ++ break; \ ++ case 6: rm(y,x,k,5); rm(y,x,k,4); \ ++ rm(y,x,k,3); rm(y,x,k,2); \ ++ rm(y,x,k,1); rm(y,x,k,0); \ ++ break; \ ++ case 4: rm(y,x,k,3); rm(y,x,k,2); \ ++ rm(y,x,k,1); rm(y,x,k,0); \ ++ break; \ ++} ++#else ++ ++#define round(rm,y,x,k) \ ++switch(nc) \ ++{ case 8: rm(y,x,k,7); rm(y,x,k,6); \ ++ case 6: rm(y,x,k,5); rm(y,x,k,4); \ ++ case 4: rm(y,x,k,3); rm(y,x,k,2); \ ++ rm(y,x,k,1); rm(y,x,k,0); \ ++} ++ ++#endif ++ ++#endif ++#endif ++ ++void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) ++{ u_int32_t locals(b0, b1); ++ const u_int32_t *kp = cx->aes_e_key; ++ ++#if !defined(ONE_TABLE) && !defined(FOUR_TABLES) ++ u_int32_t f2; ++#endif ++ ++ state_in(b0, in_blk, kp); kp += nc; ++ ++#if defined(UNROLL) ++ ++ switch(cx->aes_Nrnd) ++ { ++ case 14: round(fwd_rnd, b1, b0, kp ); ++ round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc; ++ case 12: round(fwd_rnd, b1, b0, kp ); ++ round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc; ++ case 10: round(fwd_rnd, b1, b0, kp ); ++ round(fwd_rnd, b0, b1, kp + nc); ++ round(fwd_rnd, b1, b0, kp + 2 * nc); ++ round(fwd_rnd, b0, b1, kp + 3 * nc); ++ round(fwd_rnd, b1, b0, kp + 4 * nc); ++ round(fwd_rnd, b0, b1, kp + 5 * nc); ++ round(fwd_rnd, b1, b0, kp + 6 * nc); ++ round(fwd_rnd, b0, b1, kp + 7 * nc); ++ round(fwd_rnd, b1, b0, kp + 8 * nc); ++ round(fwd_lrnd, b0, b1, kp + 9 * nc); ++ } ++ ++#elif defined(PARTIAL_UNROLL) ++ { u_int32_t rnd; ++ ++ for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd) ++ { ++ round(fwd_rnd, b1, b0, kp); ++ round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc; ++ } ++ ++ round(fwd_rnd, b1, b0, kp); ++ round(fwd_lrnd, b0, b1, kp + nc); ++ } ++#else ++ { u_int32_t rnd; ++ ++ for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd) ++ { ++ round(fwd_rnd, b1, b0, kp); ++ l_copy(b0, b1); kp += nc; ++ } ++ ++ round(fwd_lrnd, b0, b1, kp); ++ } ++#endif ++ ++ state_out(out_blk, b0); ++} ++ ++void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) ++{ u_int32_t locals(b0, b1); ++ const u_int32_t *kp = cx->aes_d_key; ++ ++#if !defined(ONE_TABLE) && !defined(FOUR_TABLES) ++ u_int32_t f2, f4, f8, f9; ++#endif ++ ++ state_in(b0, in_blk, kp); kp += nc; ++ ++#if defined(UNROLL) ++ ++ switch(cx->aes_Nrnd) ++ { ++ case 14: round(inv_rnd, b1, b0, kp ); ++ round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc; ++ case 12: round(inv_rnd, b1, b0, kp ); ++ round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc; ++ case 10: round(inv_rnd, b1, b0, kp ); ++ round(inv_rnd, b0, b1, kp + nc); ++ round(inv_rnd, b1, b0, kp + 2 * nc); ++ round(inv_rnd, b0, b1, kp + 3 * nc); ++ round(inv_rnd, b1, b0, kp + 4 * nc); ++ round(inv_rnd, b0, b1, kp + 5 * nc); ++ round(inv_rnd, b1, b0, kp + 6 * nc); ++ round(inv_rnd, b0, b1, kp + 7 * nc); ++ round(inv_rnd, b1, b0, kp + 8 * nc); ++ round(inv_lrnd, b0, b1, kp + 9 * nc); ++ } ++ ++#elif defined(PARTIAL_UNROLL) ++ { u_int32_t rnd; ++ ++ for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd) ++ { ++ round(inv_rnd, b1, b0, kp); ++ round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc; ++ } ++ ++ round(inv_rnd, b1, b0, kp); ++ round(inv_lrnd, b0, b1, kp + nc); ++ } ++#else ++ { u_int32_t rnd; ++ ++ for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd) ++ { ++ round(inv_rnd, b1, b0, kp); ++ l_copy(b0, b1); kp += nc; ++ } ++ ++ round(inv_lrnd, b0, b1, kp); ++ } ++#endif ++ ++ state_out(out_blk, b0); ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/aes/aes_cbc.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,46 @@ ++/* ++// I retain copyright in this code but I encourage its free use provided ++// that I don't carry any responsibility for the results. I am especially ++// happy to see it used in free and open source software. If you do use ++// it I would appreciate an acknowledgement of its origin in the code or ++// the product that results and I would also appreciate knowing a little ++// about the use to which it is being put. I am grateful to Frank Yellin ++// for some ideas that are used in this implementation. ++// ++// Dr B. R. Gladman 6th April 2001. ++// ++// This is an implementation of the AES encryption algorithm (Rijndael) ++// designed by Joan Daemen and Vincent Rijmen. This version is designed ++// to provide both fixed and dynamic block and key lengths and can also ++// run with either big or little endian internal byte order (see aes.h). ++// It inputs block and key lengths in bytes with the legal values being ++// 16, 24 and 32. ++* ++*/ ++ ++#ifdef __KERNEL__ ++#include ++#else ++#include ++#endif ++#include "crypto/aes_cbc.h" ++#include "crypto/cbc_generic.h" ++ ++/* returns bool success */ ++int AES_set_key(aes_context *aes_ctx, const u_int8_t *key, int keysize) { ++ aes_set_key(aes_ctx, key, keysize, 0); ++ return 1; ++} ++CBC_IMPL_BLK16(AES_cbc_encrypt, aes_context, u_int8_t *, aes_encrypt, aes_decrypt); ++ ++ ++/* ++ * $Log: aes_cbc.c,v $ ++ * Revision 1.2 2004-07-10 07:48:40 mcr ++ * Moved from linux/crypto/ciphers/aes/aes_cbc.c,v ++ * ++ * Revision 1.1 2004/04/06 02:48:12 mcr ++ * pullup of AES cipher from alg-branch. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/aes/aes_xcbc_mac.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,67 @@ ++#ifdef __KERNEL__ ++#include ++#include ++#define DEBUG(x) ++#else ++#include ++#include ++#define DEBUG(x) x ++#endif ++ ++#include "crypto/aes.h" ++#include "crypto/aes_xcbc_mac.h" ++ ++int AES_xcbc_mac_set_key(aes_context_mac *ctxm, const u_int8_t *key, int keylen) ++{ ++ int ret=1; ++ aes_block kn[3] = { ++ { 0x01010101, 0x01010101, 0x01010101, 0x01010101 }, ++ { 0x02020202, 0x02020202, 0x02020202, 0x02020202 }, ++ { 0x03030303, 0x03030303, 0x03030303, 0x03030303 }, ++ }; ++ aes_set_key(&ctxm->ctx_k1, key, keylen, 0); ++ aes_encrypt(&ctxm->ctx_k1, (u_int8_t *) kn[0], (u_int8_t *) kn[0]); ++ aes_encrypt(&ctxm->ctx_k1, (u_int8_t *) kn[1], (u_int8_t *) ctxm->k2); ++ aes_encrypt(&ctxm->ctx_k1, (u_int8_t *) kn[2], (u_int8_t *) ctxm->k3); ++ aes_set_key(&ctxm->ctx_k1, (u_int8_t *) kn[0], 16, 0); ++ return ret; ++} ++static void do_pad_xor(u_int8_t *out, const u_int8_t *in, int len) { ++ int pos=0; ++ for (pos=1; pos <= 16; pos++, in++, out++) { ++ if (pos <= len) ++ *out ^= *in; ++ if (pos > len) { ++ DEBUG(printf("put 0x80 at pos=%d\n", pos)); ++ *out ^= 0x80; ++ break; ++ } ++ } ++} ++static void xor_block(aes_block res, const aes_block op) { ++ res[0] ^= op[0]; ++ res[1] ^= op[1]; ++ res[2] ^= op[2]; ++ res[3] ^= op[3]; ++} ++int AES_xcbc_mac_hash(const aes_context_mac *ctxm, const u_int8_t * in, int ilen, u_int8_t hash[16]) { ++ int ret=ilen; ++ u_int32_t out[4] = { 0, 0, 0, 0 }; ++ for (; ilen > 16 ; ilen-=16) { ++ xor_block(out, (const u_int32_t*) &in[0]); ++ aes_encrypt(&ctxm->ctx_k1, in, (u_int8_t *)&out[0]); ++ in+=16; ++ } ++ do_pad_xor((u_int8_t *)&out, in, ilen); ++ if (ilen==16) { ++ DEBUG(printf("using k3\n")); ++ xor_block(out, ctxm->k3); ++ } ++ else ++ { ++ DEBUG(printf("using k2\n")); ++ xor_block(out, ctxm->k2); ++ } ++ aes_encrypt(&ctxm->ctx_k1, (u_int8_t *)out, hash); ++ return ret; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/aes/ipsec_alg_aes.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,296 @@ ++/* ++ * ipsec_alg AES cipher stubs ++ * ++ * Author: JuanJo Ciarlante ++ * ++ * ipsec_alg_aes.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * Fixes by: ++ * PK: Pawel Krawczyk ++ * Fixes list: ++ * PK: make XCBC comply with latest draft (keylength) ++ * ++ */ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++/* ++ * special case: ipsec core modular with this static algo inside: ++ * must avoid MODULE magic for this file ++ */ ++#if defined(CONFIG_KLIPS_MODULE) && defined(CONFIG_KLIPS_ENC_AES) ++#undef MODULE ++#endif ++ ++#include ++#include ++ ++#include /* printk() */ ++#include /* error codes */ ++#include /* size_t */ ++#include ++ ++/* Check if __exit is defined, if not null it */ ++#ifndef __exit ++#define __exit ++#endif ++ ++/* Low freeswan header coupling */ ++#include "openswan/ipsec_alg.h" ++#include "crypto/aes_cbc.h" ++ ++#define CONFIG_KLIPS_ENC_AES_MAC 1 ++ ++#define AES_CONTEXT_T aes_context ++static int debug_aes=0; ++static int test_aes=0; ++static int excl_aes=0; ++static int keyminbits=0; ++static int keymaxbits=0; ++#if defined(CONFIG_KLIPS_ENC_AES_MODULE) ++MODULE_AUTHOR("JuanJo Ciarlante "); ++#ifdef module_param ++module_param(debug_aes,int,0600) ++module_param(test_aes,int,0600) ++module_param(excl_aes,int,0600) ++module_param(keyminbits,int,0600) ++module_param(keymaxbits,int,0600) ++#else ++MODULE_PARM(debug_aes, "i"); ++MODULE_PARM(test_aes, "i"); ++MODULE_PARM(excl_aes, "i"); ++MODULE_PARM(keyminbits, "i"); ++MODULE_PARM(keymaxbits, "i"); ++#endif ++#endif ++ ++#if CONFIG_KLIPS_ENC_AES_MAC ++#include "crypto/aes_xcbc_mac.h" ++ ++/* ++ * Not IANA number yet (draft-ietf-ipsec-ciph-aes-xcbc-mac-00.txt). ++ * We use 9 for non-modular algorithm and none for modular, thus ++ * forcing user to specify one on module load. -kravietz ++ */ ++#ifdef MODULE ++static int auth_id=0; ++#else ++static int auth_id=9; ++#endif ++#ifdef module_param ++module_param(auth_id, int, 0600); ++#else ++MODULE_PARM(auth_id, "i"); ++#endif ++#endif ++ ++#define ESP_AES 12 /* truely _constant_ :) */ ++ ++/* 128, 192 or 256 */ ++#define ESP_AES_KEY_SZ_MIN 16 /* 128 bit secret key */ ++#define ESP_AES_KEY_SZ_MAX 32 /* 256 bit secret key */ ++#define ESP_AES_CBC_BLK_LEN 16 /* AES-CBC block size */ ++ ++/* Values according to draft-ietf-ipsec-ciph-aes-xcbc-mac-02.txt ++ * -kravietz ++ */ ++#define ESP_AES_MAC_KEY_SZ 16 /* 128 bit MAC key */ ++#define ESP_AES_MAC_BLK_LEN 16 /* 128 bit block */ ++ ++static int _aes_set_key(struct ipsec_alg_enc *alg, ++ __u8 * key_e, const __u8 * key, ++ size_t keysize) ++{ ++ int ret; ++ AES_CONTEXT_T *ctx=(AES_CONTEXT_T*)key_e; ++ ret=AES_set_key(ctx, key, keysize)!=0? 0: -EINVAL; ++ if (debug_aes > 0) ++ printk(KERN_DEBUG "klips_debug:_aes_set_key:" ++ "ret=%d key_e=%p key=%p keysize=%ld\n", ++ ret, key_e, key, (unsigned long int) keysize); ++ return ret; ++} ++ ++static int _aes_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, ++ __u8 * in, int ilen, const __u8 * iv, ++ int encrypt) ++{ ++ AES_CONTEXT_T *ctx=(AES_CONTEXT_T*)key_e; ++ if (debug_aes > 0) ++ printk(KERN_DEBUG "klips_debug:_aes_cbc_encrypt:" ++ "key_e=%p in=%p ilen=%d iv=%p encrypt=%d\n", ++ key_e, in, ilen, iv, encrypt); ++ return AES_cbc_encrypt(ctx, in, in, ilen, iv, encrypt); ++} ++#if CONFIG_KLIPS_ENC_AES_MAC ++static int _aes_mac_set_key(struct ipsec_alg_auth *alg, __u8 * key_a, const __u8 * key, int keylen) { ++ aes_context_mac *ctxm=(aes_context_mac *)key_a; ++ return AES_xcbc_mac_set_key(ctxm, key, keylen)? 0 : -EINVAL; ++} ++static int _aes_mac_hash(struct ipsec_alg_auth *alg, __u8 * key_a, const __u8 * dat, int len, __u8 * hash, int hashlen) { ++ int ret; ++ char hash_buf[16]; ++ aes_context_mac *ctxm=(aes_context_mac *)key_a; ++ ret=AES_xcbc_mac_hash(ctxm, dat, len, hash_buf); ++ memcpy(hash, hash_buf, hashlen); ++ return ret; ++} ++static struct ipsec_alg_auth ipsec_alg_AES_MAC = { ++ ixt_common: { ixt_version: IPSEC_ALG_VERSION, ++ ixt_refcnt: ATOMIC_INIT(0), ++ ixt_name: "aes_mac", ++ ixt_blocksize: ESP_AES_MAC_BLK_LEN, ++ ixt_support: { ++ ias_exttype: IPSEC_ALG_TYPE_AUTH, ++ ias_id: 0, ++ ias_keyminbits: ESP_AES_MAC_KEY_SZ*8, ++ ias_keymaxbits: ESP_AES_MAC_KEY_SZ*8, ++ }, ++ }, ++#if defined(CONFIG_KLIPS_ENC_AES_MODULE) ++ ixt_module: THIS_MODULE, ++#endif ++ ixt_a_keylen: ESP_AES_MAC_KEY_SZ, ++ ixt_a_ctx_size: sizeof(aes_context_mac), ++ ixt_a_hmac_set_key: _aes_mac_set_key, ++ ixt_a_hmac_hash:_aes_mac_hash, ++}; ++#endif /* CONFIG_KLIPS_ENC_AES_MAC */ ++static struct ipsec_alg_enc ipsec_alg_AES = { ++ ixt_common: { ixt_version: IPSEC_ALG_VERSION, ++ ixt_refcnt: ATOMIC_INIT(0), ++ ixt_name: "aes", ++ ixt_blocksize: ESP_AES_CBC_BLK_LEN, ++ ixt_support: { ++ ias_exttype: IPSEC_ALG_TYPE_ENCRYPT, ++ ias_id: ESP_AES, ++ ias_keyminbits: ESP_AES_KEY_SZ_MIN*8, ++ ias_keymaxbits: ESP_AES_KEY_SZ_MAX*8, ++ }, ++ }, ++#if defined(CONFIG_KLIPS_ENC_AES_MODULE) ++ ixt_module: THIS_MODULE, ++#endif ++ ixt_e_keylen: ESP_AES_KEY_SZ_MAX, ++ ixt_e_ctx_size: sizeof(AES_CONTEXT_T), ++ ixt_e_set_key: _aes_set_key, ++ ixt_e_cbc_encrypt:_aes_cbc_encrypt, ++}; ++ ++#if defined(CONFIG_KLIPS_ENC_AES_MODULE) ++IPSEC_ALG_MODULE_INIT_MOD( ipsec_aes_init ) ++#else ++IPSEC_ALG_MODULE_INIT_STATIC( ipsec_aes_init ) ++#endif ++{ ++ int ret, test_ret; ++ ++ if (keyminbits) ++ ipsec_alg_AES.ixt_common.ixt_support.ias_keyminbits=keyminbits; ++ if (keymaxbits) { ++ ipsec_alg_AES.ixt_common.ixt_support.ias_keymaxbits=keymaxbits; ++ if (keymaxbits*8>ipsec_alg_AES.ixt_common.ixt_support.ias_keymaxbits) ++ ipsec_alg_AES.ixt_e_keylen=keymaxbits*8; ++ } ++ if (excl_aes) ipsec_alg_AES.ixt_common.ixt_state |= IPSEC_ALG_ST_EXCL; ++ ret=register_ipsec_alg_enc(&ipsec_alg_AES); ++ printk("ipsec_aes_init(alg_type=%d alg_id=%d name=%s): ret=%d\n", ++ ipsec_alg_AES.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_AES.ixt_common.ixt_support.ias_id, ++ ipsec_alg_AES.ixt_common.ixt_name, ++ ret); ++ if (ret==0 && test_aes) { ++ test_ret=ipsec_alg_test( ++ ipsec_alg_AES.ixt_common.ixt_support.ias_exttype , ++ ipsec_alg_AES.ixt_common.ixt_support.ias_id, ++ test_aes); ++ printk("ipsec_aes_init(alg_type=%d alg_id=%d): test_ret=%d\n", ++ ipsec_alg_AES.ixt_common.ixt_support.ias_exttype , ++ ipsec_alg_AES.ixt_common.ixt_support.ias_id, ++ test_ret); ++ } ++#if CONFIG_KLIPS_ENC_AES_MAC ++ if (auth_id!=0){ ++ int ret; ++ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_id=auth_id; ++ ret=register_ipsec_alg_auth(&ipsec_alg_AES_MAC); ++ printk("ipsec_aes_init(alg_type=%d alg_id=%d name=%s): ret=%d\n", ++ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_id, ++ ipsec_alg_AES_MAC.ixt_common.ixt_name, ++ ret); ++ if (ret==0 && test_aes) { ++ test_ret=ipsec_alg_test( ++ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_id, ++ test_aes); ++ printk("ipsec_aes_init(alg_type=%d alg_id=%d): test_ret=%d\n", ++ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_id, ++ test_ret); ++ } ++ } else { ++ printk(KERN_DEBUG "klips_debug: experimental ipsec_alg_AES_MAC not registered [Ok] (auth_id=%d)\n", auth_id); ++ } ++#endif /* CONFIG_KLIPS_ENC_AES_MAC */ ++ return ret; ++} ++ ++#if defined(CONFIG_KLIPS_ENC_AES_MODULE) ++IPSEC_ALG_MODULE_EXIT_MOD( ipsec_aes_fini ) ++#else ++IPSEC_ALG_MODULE_EXIT_STATIC( ipsec_aes_fini ) ++#endif ++{ ++#if CONFIG_KLIPS_ENC_AES_MAC ++ if (auth_id) unregister_ipsec_alg_auth(&ipsec_alg_AES_MAC); ++#endif /* CONFIG_KLIPS_ENC_AES_MAC */ ++ unregister_ipsec_alg_enc(&ipsec_alg_AES); ++ return; ++} ++#ifdef MODULE_LICENSE ++MODULE_LICENSE("GPL"); ++#endif ++ ++#if 0 /* +NOT_YET */ ++#ifndef MODULE ++/* ++ * This is intended for static module setups, currently ++ * doesn't work for modular ipsec.o with static algos inside ++ */ ++static int setup_keybits(const char *str) ++{ ++ unsigned aux; ++ char *end; ++ ++ aux = simple_strtoul(str,&end,0); ++ if (aux != 128 && aux != 192 && aux != 256) ++ return 0; ++ keyminbits = aux; ++ ++ if (*end == 0 || *end != ',') ++ return 1; ++ str=end+1; ++ aux = simple_strtoul(str, NULL, 0); ++ if (aux != 128 && aux != 192 && aux != 256) ++ return 0; ++ if (aux >= keyminbits) ++ keymaxbits = aux; ++ return 1; ++} ++__setup("ipsec_aes_keybits=", setup_keybits); ++#endif ++#endif ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/alg/Config.alg_aes.in Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,3 @@ ++if [ "$CONFIG_IPSEC_ALG" = "y" ]; then ++ tristate ' AES encryption algorithm' CONFIG_IPSEC_ENC_AES ++fi +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/alg/Config.alg_cryptoapi.in Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,6 @@ ++if [ "$CONFIG_IPSEC_ALG" = "y" ]; then ++ dep_tristate ' CRYPTOAPI ciphers support (needs cryptoapi patch)' CONFIG_IPSEC_ALG_CRYPTOAPI $CONFIG_CRYPTO ++ if [ "$CONFIG_IPSEC_ALG_CRYPTOAPI" != "n" ]; then ++ bool ' CRYPTOAPI proprietary ciphers ' CONFIG_IPSEC_ALG_NON_LIBRE ++ fi ++fi +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/alg/Config.in Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,3 @@ ++#Placeholder ++source net/ipsec/alg/Config.alg_aes.in ++source net/ipsec/alg/Config.alg_cryptoapi.in +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/alg/Makefile Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,112 @@ ++# Makefile,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp ++ifeq ($(strip $(KLIPSMODULE)),) ++FREESWANSRCDIR=. ++else ++FREESWANSRCDIR=../../../.. ++endif ++ifeq ($(strip $(KLIPS_TOP)),) ++KLIPS_TOP=../../.. ++override EXTRA_CFLAGS += -I$(KLIPS_TOP)/include ++endif ++ ++ifeq ($(CONFIG_IPSEC_DEBUG),y) ++override EXTRA_CFLAGS += -g ++endif ++ ++# LIBCRYPTO normally comes as an argument from "parent" Makefile ++# (this applies both to FS' "make module" and eg. Linux' "make modules" ++# But make dep doest follow same evaluations, so we need this default: ++LIBCRYPTO=$(TOPDIR)/lib/libcrypto ++ ++override EXTRA_CFLAGS += -I$(LIBCRYPTO)/include ++override EXTRA_CFLAGS += -Wall -Wpointer-arith -Wstrict-prototypes ++ ++MOD_LIST_NAME := NET_MISC_MODULES ++ ++#O_TARGET := static_init.o ++ ++subdir- := ++subdir-n := ++subdir-y := ++subdir-m := ++ ++obj-y := static_init.o ++ ++ARCH_ASM-y := ++ARCH_ASM-$(CONFIG_M586) := i586 ++ARCH_ASM-$(CONFIG_M586TSC) := i586 ++ARCH_ASM-$(CONFIG_M586MMX) := i586 ++ARCH_ASM-$(CONFIG_MK6) := i586 ++ARCH_ASM-$(CONFIG_M686) := i686 ++ARCH_ASM-$(CONFIG_MPENTIUMIII) := i686 ++ARCH_ASM-$(CONFIG_MPENTIUM4) := i686 ++ARCH_ASM-$(CONFIG_MK7) := i686 ++ARCH_ASM-$(CONFIG_MCRUSOE) := i586 ++ARCH_ASM-$(CONFIG_MWINCHIPC6) := i586 ++ARCH_ASM-$(CONFIG_MWINCHIP2) := i586 ++ARCH_ASM-$(CONFIG_MWINCHIP3D) := i586 ++ARCH_ASM-$(CONFIG_USERMODE) := i586 ++ ++ARCH_ASM :=$(ARCH_ASM-y) ++ifdef NO_ASM ++ARCH_ASM := ++endif ++ ++# The algorithm makefiles may put dependences, short-circuit them ++null: ++ ++makefiles=$(filter-out %.preipsec, $(wildcard Makefile.alg_*)) ++ifneq ($(makefiles),) ++#include Makefile.alg_aes ++#include Makefile.alg_aes-opt ++include $(makefiles) ++endif ++ ++# These rules translate from new to old makefile rules ++# Translate to Rules.make lists. ++multi-used := $(filter $(list-multi), $(obj-y) $(obj-m)) ++multi-objs := $(foreach m, $(multi-used), $($(basename $(m))-objs)) ++active-objs := $(sort $(multi-objs) $(obj-y) $(obj-m)) ++O_OBJS := $(obj-y) ++M_OBJS := $(obj-m) ++MIX_OBJS := $(filter $(export-objs), $(active-objs)) ++#OX_OBJS := $(export-objs) ++SUB_DIRS := $(subdir-y) ++ALL_SUB_DIRS := $(subdir-y) $(subdir-m) ++MOD_SUB_DIRS := $(subdir-m) ++ ++ ++static_init_mod.o: $(obj-y) ++ rm -f $@ ++ $(LD) $(LD_EXTRAFLAGS) $(obj-y) -r -o $@ ++ ++perlasm: ../../../crypto/ciphers/des/asm/perlasm ++ ln -sf $? $@ ++ ++$(obj-y) $(obj-m): $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/autoconf.h $(KLIPS_TOP)/include/freeswan/ipsec_alg.h ++$(alg_obj-y) $(alg_obj-m): perlasm $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/autoconf.h $(KLIPS_TOP)/include/freeswan/ipsec_alg.h ++ ++ ++all_alg_modules: perlasm $(ALG_MODULES) ++ @echo "ALG_MODULES=$(ALG_MODULES)" ++ ++ ++# ++# Construct alg. init. function: call ipsec_ALGO_init() for every static algo ++# Needed when there are static algos (with static or modular ipsec.o) ++# ++static_init.c: $(TOPDIR)/include/linux/autoconf.h Makefile $(makefiles) scripts/mk-static_init.c.sh ++ @echo "Re-creating $@" ++ $(SHELL) scripts/mk-static_init.c.sh $(static_init-func-y) > $@ ++ ++clean: ++ @for i in $(ALG_SUBDIRS);do test -d $$i && make -C $$i clean;done;exit 0 ++ @find . -type l -exec rm -f {} \; ++ -rm -f perlasm ++ -rm -rf $(ALG_SUBDIRS) ++ -rm -f *.o static_init.c ++ ++ifdef TOPDIR ++include $(TOPDIR)/Rules.make ++endif ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/alg/Makefile.alg_aes Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,18 @@ ++MOD_AES := ipsec_aes.o ++ ++ALG_MODULES += $(MOD_AES) ++ALG_SUBDIRS += libaes ++ ++obj-$(CONFIG_IPSEC_ALG_AES) += $(MOD_AES) ++static_init-func-$(CONFIG_IPSEC_ALG_AES)+= ipsec_aes_init ++alg_obj-$(CONFIG_IPSEC_ALG_AES) += ipsec_alg_aes.o ++ ++AES_OBJS := ipsec_alg_aes.o $(LIBCRYPTO)/libaes/libaes.a ++ ++ ++$(MOD_AES): $(AES_OBJS) ++ $(LD) $(EXTRA_LDFLAGS) -r $(AES_OBJS) -o $@ ++ ++$(LIBCRYPTO)/libaes/libaes.a: ++ $(MAKE) -C $(LIBCRYPTO)/libaes CC='$(CC)' 'ARCH_ASM=$(ARCH_ASM)' CFLAGS='$(CFLAGS) $(EXTRA_CFLAGS)' libaes.a ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/alg/Makefile.alg_cryptoapi Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,14 @@ ++MOD_CRYPTOAPI := ipsec_cryptoapi.o ++ ++ifneq ($(wildcard $(TOPDIR)/include/linux/crypto.h),) ++ALG_MODULES += $(MOD_CRYPTOAPI) ++obj-$(CONFIG_IPSEC_ALG_CRYPTOAPI) += $(MOD_CRYPTOAPI) ++static_init-func-$(CONFIG_IPSEC_ALG_CRYPTOAPI)+= ipsec_cryptoapi_init ++alg_obj-$(CONFIG_IPSEC_ALG_CRYPTOAPI) += ipsec_alg_cryptoapi.o ++else ++$(warning "Linux CryptoAPI (2.4.22+ or 2.6.x) not found, not building ipsec_cryptoapi.o") ++endif ++ ++CRYPTOAPI_OBJS := ipsec_alg_cryptoapi.o ++$(MOD_CRYPTOAPI): $(CRYPTOAPI_OBJS) ++ $(LD) -r $(CRYPTOAPI_OBJS) -o $@ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/alg/ipsec_alg_cryptoapi.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,442 @@ ++/* ++ * ipsec_alg to linux cryptoapi GLUE ++ * ++ * Authors: CODE.ar TEAM ++ * Harpo MAxx ++ * JuanJo Ciarlante ++ * Luciano Ruete ++ * ++ * ipsec_alg_cryptoapi.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * Example usage: ++ * modinfo -p ipsec_cryptoapi (quite useful info, including supported algos) ++ * modprobe ipsec_cryptoapi ++ * modprobe ipsec_cryptoapi test=1 ++ * modprobe ipsec_cryptoapi excl=1 (exclusive cipher/algo) ++ * modprobe ipsec_cryptoapi noauto=1 aes=1 twofish=1 (only these ciphers) ++ * modprobe ipsec_cryptoapi aes=128,128 (force these keylens) ++ * modprobe ipsec_cryptoapi des_ede3=0 (everything but 3DES) ++ */ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++/* ++ * special case: ipsec core modular with this static algo inside: ++ * must avoid MODULE magic for this file ++ */ ++#if CONFIG_IPSEC_MODULE && CONFIG_IPSEC_ALG_CRYPTOAPI ++#undef MODULE ++#endif ++ ++#include ++#include ++ ++#include /* printk() */ ++#include /* error codes */ ++#include /* size_t */ ++#include ++ ++/* Check if __exit is defined, if not null it */ ++#ifndef __exit ++#define __exit ++#endif ++ ++/* warn the innocent */ ++#if !defined (CONFIG_CRYPTO) && !defined (CONFIG_CRYPTO_MODULE) ++#warning "No linux CryptoAPI found, install 2.4.22+ or 2.6.x" ++#define NO_CRYPTOAPI_SUPPORT ++#endif ++/* Low freeswan header coupling */ ++#include "openswan/ipsec_alg.h" ++ ++#include ++#ifdef CRYPTO_API_VERSION_CODE ++#warning "Old CryptoAPI is not supported. Only linux-2.4.22+ or linux-2.6.x are supported" ++#define NO_CRYPTOAPI_SUPPORT ++#endif ++ ++#ifdef NO_CRYPTOAPI_SUPPORT ++#warning "Building an unusable module :P" ++/* Catch old CryptoAPI by not allowing module to load */ ++IPSEC_ALG_MODULE_INIT( ipsec_cryptoapi_init ) ++{ ++ printk(KERN_WARNING "ipsec_cryptoapi.o was not built on stock Linux CryptoAPI (2.4.22+ or 2.6.x), not loading.\n"); ++ return -EINVAL; ++} ++#else ++#include ++#include ++#include ++ ++#define CIPHERNAME_AES "aes" ++#define CIPHERNAME_3DES "des3_ede" ++#define CIPHERNAME_BLOWFISH "blowfish" ++#define CIPHERNAME_CAST "cast5" ++#define CIPHERNAME_SERPENT "serpent" ++#define CIPHERNAME_TWOFISH "twofish" ++ ++#define ESP_3DES 3 ++#define ESP_AES 12 ++#define ESP_BLOWFISH 7 /* truely _constant_ :) */ ++#define ESP_CAST 6 /* quite constant :) */ ++#define ESP_SERPENT 252 /* from ipsec drafts */ ++#define ESP_TWOFISH 253 /* from ipsec drafts */ ++ ++#define AH_MD5 2 ++#define AH_SHA 3 ++#define DIGESTNAME_MD5 "md5" ++#define DIGESTNAME_SHA1 "sha1" ++ ++MODULE_AUTHOR("Juanjo Ciarlante, Harpo MAxx, Luciano Ruete"); ++static int debug=0; ++static int test=0; ++static int excl=0; ++static int noauto = 0; ++ ++static int des_ede3[] = {-1, -1}; ++static int aes[] = {-1, -1}; ++static int blowfish[] = {-1, -1}; ++static int cast[] = {-1, -1}; ++static int serpent[] = {-1, -1}; ++static int twofish[] = {-1, -1}; ++ ++#ifdef module_param ++module_param(debug,int,0600); ++module_param(test,int,0600); ++module_param(ebug,int,0600); ++ ++module_param(noauto,int,0600); ++module_param(ebug,int,0600); ++ ++module_param_array(des_ede3,int,NULL,0); ++module_param(aes,int,NULL,0); ++module_param(blowfish,int,NULL,0); ++module_param(cast,int,NULL,0); ++module_param(serpent,int,NULL,0); ++module_param(twofish,int,NULL,0); ++#else ++MODULE_PARM(debug, "i"); ++MODULE_PARM(test, "i"); ++MODULE_PARM(excl, "i"); ++ ++MODULE_PARM(noauto,"i"); ++ ++MODULE_PARM(des_ede3,"1-2i"); ++MODULE_PARM(aes,"1-2i"); ++MODULE_PARM(blowfish,"1-2i"); ++MODULE_PARM(cast,"1-2i"); ++MODULE_PARM(serpent,"1-2i"); ++MODULE_PARM(twofish,"1-2i"); ++#endif ++ ++MODULE_PARM_DESC(noauto, "Dont try all known algos, just setup enabled ones"); ++ ++MODULE_PARM_DESC(des_ede3, "0: disable | 1: force_enable | min,max: dontuse"); ++MODULE_PARM_DESC(aes, "0: disable | 1: force_enable | min,max: keybitlens"); ++MODULE_PARM_DESC(blowfish, "0: disable | 1: force_enable | min,max: keybitlens"); ++MODULE_PARM_DESC(cast, "0: disable | 1: force_enable | min,max: keybitlens"); ++MODULE_PARM_DESC(serpent, "0: disable | 1: force_enable | min,max: keybitlens"); ++MODULE_PARM_DESC(twofish, "0: disable | 1: force_enable | min,max: keybitlens"); ++ ++struct ipsec_alg_capi_cipher { ++ const char *ciphername; /* cryptoapi's ciphername */ ++ unsigned blocksize; ++ unsigned short minbits; ++ unsigned short maxbits; ++ int *parm; /* lkm param for this cipher */ ++ struct ipsec_alg_enc alg; /* note it's not a pointer */ ++}; ++static struct ipsec_alg_capi_cipher alg_capi_carray[] = { ++ { CIPHERNAME_AES , 16, 128, 256, aes , { ixt_alg_id: ESP_AES, }}, ++ { CIPHERNAME_TWOFISH , 16, 128, 256, twofish, { ixt_alg_id: ESP_TWOFISH, }}, ++ { CIPHERNAME_SERPENT , 16, 128, 256, serpent, { ixt_alg_id: ESP_SERPENT, }}, ++ { CIPHERNAME_CAST , 8, 128, 128, cast , { ixt_alg_id: ESP_CAST, }}, ++ { CIPHERNAME_BLOWFISH , 8, 96, 448, blowfish,{ ixt_alg_id: ESP_BLOWFISH, }}, ++ { CIPHERNAME_3DES , 8, 192, 192, des_ede3,{ ixt_alg_id: ESP_3DES, }}, ++ { NULL, 0, 0, 0, NULL, {} } ++}; ++#ifdef NOT_YET ++struct ipsec_alg_capi_digest { ++ const char *digestname; /* cryptoapi's digestname */ ++ struct digest_implementation *di; ++ struct ipsec_alg_auth alg; /* note it's not a pointer */ ++}; ++static struct ipsec_alg_capi_cipher alg_capi_darray[] = { ++ { DIGESTNAME_MD5, NULL, { ixt_alg_id: AH_MD5, }}, ++ { DIGESTNAME_SHA1, NULL, { ixt_alg_id: AH_SHA, }}, ++ { NULL, NULL, {} } ++}; ++#endif ++/* ++ * "generic" linux cryptoapi setup_cipher() function ++ */ ++int setup_cipher(const char *ciphername) ++{ ++ return crypto_alg_available(ciphername, 0); ++} ++ ++/* ++ * setups ipsec_alg_capi_cipher "hyper" struct components, calling ++ * register_ipsec_alg for cointaned ipsec_alg object ++ */ ++static void _capi_destroy_key (struct ipsec_alg_enc *alg, __u8 *key_e); ++static __u8 * _capi_new_key (struct ipsec_alg_enc *alg, const __u8 *key, size_t keylen); ++static int _capi_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, __u8 * in, int ilen, const __u8 * iv, int encrypt); ++ ++static int ++setup_ipsec_alg_capi_cipher(struct ipsec_alg_capi_cipher *cptr) ++{ ++ int ret; ++ cptr->alg.ixt_version = IPSEC_ALG_VERSION; ++ cptr->alg.ixt_module = THIS_MODULE; ++ atomic_set (& cptr->alg.ixt_refcnt, 0); ++ strncpy (cptr->alg.ixt_name , cptr->ciphername, sizeof (cptr->alg.ixt_name)); ++ ++ cptr->alg.ixt_blocksize=cptr->blocksize; ++ cptr->alg.ixt_keyminbits=cptr->minbits; ++ cptr->alg.ixt_keymaxbits=cptr->maxbits; ++ cptr->alg.ixt_state = 0; ++ if (excl) cptr->alg.ixt_state |= IPSEC_ALG_ST_EXCL; ++ cptr->alg.ixt_e_keylen=cptr->alg.ixt_keymaxbits/8; ++ cptr->alg.ixt_e_ctx_size = 0; ++ cptr->alg.ixt_alg_type = IPSEC_ALG_TYPE_ENCRYPT; ++ cptr->alg.ixt_e_new_key = _capi_new_key; ++ cptr->alg.ixt_e_destroy_key = _capi_destroy_key; ++ cptr->alg.ixt_e_cbc_encrypt = _capi_cbc_encrypt; ++ cptr->alg.ixt_data = cptr; ++ ++ ret=register_ipsec_alg_enc(&cptr->alg); ++ printk("setup_ipsec_alg_capi_cipher(): " ++ "alg_type=%d alg_id=%d name=%s " ++ "keyminbits=%d keymaxbits=%d, ret=%d\n", ++ cptr->alg.ixt_alg_type, ++ cptr->alg.ixt_alg_id, ++ cptr->alg.ixt_name, ++ cptr->alg.ixt_keyminbits, ++ cptr->alg.ixt_keymaxbits, ++ ret); ++ return ret; ++} ++/* ++ * called in ipsec_sa_wipe() time, will destroy key contexts ++ * and do 1 unbind() ++ */ ++static void ++_capi_destroy_key (struct ipsec_alg_enc *alg, __u8 *key_e) ++{ ++ struct crypto_tfm *tfm=(struct crypto_tfm*)key_e; ++ ++ if (debug > 0) ++ printk(KERN_DEBUG "klips_debug: _capi_destroy_key:" ++ "name=%s key_e=%p \n", ++ alg->ixt_name, key_e); ++ if (!key_e) { ++ printk(KERN_ERR "klips_debug: _capi_destroy_key:" ++ "name=%s NULL key_e!\n", ++ alg->ixt_name); ++ return; ++ } ++ crypto_free_tfm(tfm); ++} ++ ++/* ++ * create new key context, need alg->ixt_data to know which ++ * (of many) cipher inside this module is the target ++ */ ++static __u8 * ++_capi_new_key (struct ipsec_alg_enc *alg, const __u8 *key, size_t keylen) ++{ ++ struct ipsec_alg_capi_cipher *cptr; ++ struct crypto_tfm *tfm=NULL; ++ ++ cptr = alg->ixt_data; ++ if (!cptr) { ++ printk(KERN_ERR "_capi_new_key(): " ++ "NULL ixt_data (?!) for \"%s\" algo\n" ++ , alg->ixt_name); ++ goto err; ++ } ++ if (debug > 0) ++ printk(KERN_DEBUG "klips_debug:_capi_new_key:" ++ "name=%s cptr=%p key=%p keysize=%d\n", ++ alg->ixt_name, cptr, key, keylen); ++ ++ /* ++ * alloc tfm ++ */ ++ tfm = crypto_alloc_tfm(cptr->ciphername, CRYPTO_TFM_MODE_CBC); ++ if (!tfm) { ++ printk(KERN_ERR "_capi_new_key(): " ++ "NULL tfm for \"%s\" cryptoapi (\"%s\") algo\n" ++ , alg->ixt_name, cptr->ciphername); ++ goto err; ++ } ++ if (crypto_cipher_setkey(tfm, key, keylen) < 0) { ++ printk(KERN_ERR "_capi_new_key(): " ++ "failed new_key() for \"%s\" cryptoapi algo (keylen=%d)\n" ++ , alg->ixt_name, keylen); ++ crypto_free_tfm(tfm); ++ tfm=NULL; ++ } ++err: ++ if (debug > 0) ++ printk(KERN_DEBUG "klips_debug:_capi_new_key:" ++ "name=%s key=%p keylen=%d tfm=%p\n", ++ alg->ixt_name, key, keylen, tfm); ++ return (__u8 *) tfm; ++} ++/* ++ * core encryption function: will use cx->ci to call actual cipher's ++ * cbc function ++ */ ++static int ++_capi_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, __u8 * in, int ilen, const __u8 * iv, int encrypt) { ++ int error =0; ++ struct crypto_tfm *tfm=(struct crypto_tfm *)key_e; ++ struct scatterlist sg = { ++ .page = virt_to_page(in), ++ .offset = (unsigned long)(in) % PAGE_SIZE, ++ .length=ilen, ++ }; ++ if (debug > 1) ++ printk(KERN_DEBUG "klips_debug:_capi_cbc_encrypt:" ++ "key_e=%p " ++ "in=%p out=%p ilen=%d iv=%p encrypt=%d\n" ++ , key_e ++ , in, in, ilen, iv, encrypt); ++ crypto_cipher_set_iv(tfm, iv, crypto_tfm_alg_ivsize(tfm)); ++ if (encrypt) ++ error = crypto_cipher_encrypt (tfm, &sg, &sg, ilen); ++ else ++ error = crypto_cipher_decrypt (tfm, &sg, &sg, ilen); ++ if (debug > 1) ++ printk(KERN_DEBUG "klips_debug:_capi_cbc_encrypt:" ++ "error=%d\n" ++ , error); ++ return (error<0)? error : ilen; ++} ++/* ++ * main initialization loop: for each cipher in list, do ++ * 1) setup cryptoapi cipher else continue ++ * 2) register ipsec_alg object ++ */ ++static int ++setup_cipher_list (struct ipsec_alg_capi_cipher* clist) ++{ ++ struct ipsec_alg_capi_cipher *cptr; ++ /* foreach cipher in list ... */ ++ for (cptr=clist;cptr->ciphername;cptr++) { ++ /* ++ * see if cipher has been disabled (0) or ++ * if noauto set and not enabled (1) ++ */ ++ if (cptr->parm[0] == 0 || (noauto && cptr->parm[0] < 0)) { ++ if (debug>0) ++ printk(KERN_INFO "setup_cipher_list(): " ++ "ciphername=%s skipped at user request: " ++ "noauto=%d parm[0]=%d parm[1]=%d\n" ++ , cptr->ciphername ++ , noauto ++ , cptr->parm[0] ++ , cptr->parm[1]); ++ continue; ++ } ++ /* ++ * use a local ci to avoid touching cptr->ci, ++ * if register ipsec_alg success then bind cipher ++ */ ++ if( setup_cipher(cptr->ciphername) ) { ++ if (debug > 0) ++ printk(KERN_DEBUG "klips_debug:" ++ "setup_cipher_list():" ++ "ciphername=%s found\n" ++ , cptr->ciphername); ++ if (setup_ipsec_alg_capi_cipher(cptr) == 0) { ++ ++ ++ } else { ++ printk(KERN_ERR "klips_debug:" ++ "setup_cipher_list():" ++ "ciphername=%s failed ipsec_alg_register\n" ++ , cptr->ciphername); ++ } ++ } else { ++ if (debug>0) ++ printk(KERN_INFO "setup_cipher_list(): lookup for ciphername=%s: not found \n", ++ cptr->ciphername); ++ } ++ } ++ return 0; ++} ++/* ++ * deregister ipsec_alg objects and unbind ciphers ++ */ ++static int ++unsetup_cipher_list (struct ipsec_alg_capi_cipher* clist) ++{ ++ struct ipsec_alg_capi_cipher *cptr; ++ /* foreach cipher in list ... */ ++ for (cptr=clist;cptr->ciphername;cptr++) { ++ if (cptr->alg.ixt_state & IPSEC_ALG_ST_REGISTERED) { ++ unregister_ipsec_alg_enc(&cptr->alg); ++ } ++ } ++ return 0; ++} ++/* ++ * test loop for registered algos ++ */ ++static int ++test_cipher_list (struct ipsec_alg_capi_cipher* clist) ++{ ++ int test_ret; ++ struct ipsec_alg_capi_cipher *cptr; ++ /* foreach cipher in list ... */ ++ for (cptr=clist;cptr->ciphername;cptr++) { ++ if (cptr->alg.ixt_state & IPSEC_ALG_ST_REGISTERED) { ++ test_ret=ipsec_alg_test( ++ cptr->alg.ixt_alg_type, ++ cptr->alg.ixt_alg_id, ++ test); ++ printk("test_cipher_list(alg_type=%d alg_id=%d): test_ret=%d\n", ++ cptr->alg.ixt_alg_type, ++ cptr->alg.ixt_alg_id, ++ test_ret); ++ } ++ } ++ return 0; ++} ++ ++IPSEC_ALG_MODULE_INIT( ipsec_cryptoapi_init ) ++{ ++ int ret, test_ret; ++ if ((ret=setup_cipher_list(alg_capi_carray)) < 0) ++ return -EPROTONOSUPPORT; ++ if (ret==0 && test) { ++ test_ret=test_cipher_list(alg_capi_carray); ++ } ++ return ret; ++} ++IPSEC_ALG_MODULE_EXIT( ipsec_cryptoapi_fini ) ++{ ++ unsetup_cipher_list(alg_capi_carray); ++ return; ++} ++#ifdef MODULE_LICENSE ++MODULE_LICENSE("GPL"); ++#endif ++ ++EXPORT_NO_SYMBOLS; ++#endif /* NO_CRYPTOAPI_SUPPORT */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/alg/scripts/mk-static_init.c.sh Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,18 @@ ++#!/bin/sh ++cat << EOF ++#include ++#include ++#include "freeswan/ipsec_alg.h" ++$(for i in $*; do ++ test -z "$i" && continue ++ echo "extern int $i(void);" ++done) ++void ipsec_alg_static_init(void){ ++ int __attribute__ ((unused)) err=0; ++$(for i in $*; do ++ test -z "$i" && continue ++ echo " if ((err=$i()) < 0)" ++ echo " printk(KERN_WARNING \"$i() returned %d\", err);" ++done) ++} ++EOF +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/anyaddr.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,148 @@ ++/* ++ * special addresses ++ * Copyright (C) 2000 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: anyaddr.c,v 1.10.10.1 2006-11-24 05:55:46 paul Exp $ ++ */ ++#include "openswan.h" ++ ++/* these are mostly fallbacks for the no-IPv6-support-in-library case */ ++#ifndef IN6ADDR_ANY_INIT ++#define IN6ADDR_ANY_INIT {{{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }}} ++#endif ++#ifndef IN6ADDR_LOOPBACK_INIT ++#define IN6ADDR_LOOPBACK_INIT {{{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 }}} ++#endif ++ ++static struct in6_addr v6any = IN6ADDR_ANY_INIT; ++static struct in6_addr v6loop = IN6ADDR_LOOPBACK_INIT; ++ ++/* ++ - anyaddr - initialize to the any-address value ++ */ ++err_t /* NULL for success, else string literal */ ++anyaddr(af, dst) ++int af; /* address family */ ++ip_address *dst; ++{ ++ uint32_t v4any = htonl(INADDR_ANY); ++ ++ switch (af) { ++ case AF_INET: ++ return initaddr((unsigned char *)&v4any, sizeof(v4any), af, dst); ++ break; ++ case AF_INET6: ++ return initaddr((unsigned char *)&v6any, sizeof(v6any), af, dst); ++ break; ++ default: ++ return "unknown address family in anyaddr/unspecaddr"; ++ break; ++ } ++} ++ ++/* ++ - unspecaddr - initialize to the unspecified-address value ++ */ ++err_t /* NULL for success, else string literal */ ++unspecaddr(af, dst) ++int af; /* address family */ ++ip_address *dst; ++{ ++ return anyaddr(af, dst); ++} ++ ++/* ++ - loopbackaddr - initialize to the loopback-address value ++ */ ++err_t /* NULL for success, else string literal */ ++loopbackaddr(af, dst) ++int af; /* address family */ ++ip_address *dst; ++{ ++ uint32_t v4loop = htonl(INADDR_LOOPBACK); ++ ++ switch (af) { ++ case AF_INET: ++ return initaddr((unsigned char *)&v4loop, sizeof(v4loop), af, dst); ++ break; ++ case AF_INET6: ++ return initaddr((unsigned char *)&v6loop, sizeof(v6loop), af, dst); ++ break; ++ default: ++ return "unknown address family in loopbackaddr"; ++ break; ++ } ++} ++ ++/* ++ - isanyaddr - test for the any-address value ++ */ ++int ++isanyaddr(src) ++const ip_address *src; ++{ ++ uint32_t v4any = htonl(INADDR_ANY); ++ int cmp; ++ ++ switch (src->u.v4.sin_family) { ++ case AF_INET: ++ cmp = memcmp(&src->u.v4.sin_addr.s_addr, &v4any, sizeof(v4any)); ++ break; ++ case AF_INET6: ++ cmp = memcmp(&src->u.v6.sin6_addr, &v6any, sizeof(v6any)); ++ break; ++ case 0: ++ /* a zeroed structure is considered any address */ ++ return 1; ++ default: ++ return 0; ++ break; ++ } ++ ++ return (cmp == 0) ? 1 : 0; ++} ++ ++/* ++ - isunspecaddr - test for the unspecified-address value ++ */ ++int ++isunspecaddr(src) ++const ip_address *src; ++{ ++ return isanyaddr(src); ++} ++ ++/* ++ - isloopbackaddr - test for the loopback-address value ++ */ ++int ++isloopbackaddr(src) ++const ip_address *src; ++{ ++ uint32_t v4loop = htonl(INADDR_LOOPBACK); ++ int cmp; ++ ++ switch (src->u.v4.sin_family) { ++ case AF_INET: ++ cmp = memcmp(&src->u.v4.sin_addr.s_addr, &v4loop, sizeof(v4loop)); ++ break; ++ case AF_INET6: ++ cmp = memcmp(&src->u.v6.sin6_addr, &v6loop, sizeof(v6loop)); ++ break; ++ default: ++ return 0; ++ break; ++ } ++ ++ return (cmp == 0) ? 1 : 0; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/datatot.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,234 @@ ++/* ++ * convert from binary data (e.g. key) to text form ++ * Copyright (C) 2000 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: datatot.c,v 1.7 2005-04-14 20:48:43 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++static void convert(const char *src, size_t nreal, int format, char *out); ++ ++/* ++ - datatot - convert data bytes to text ++ */ ++size_t /* true length (with NUL) for success */ ++datatot(src, srclen, format, dst, dstlen) ++const char *src; ++size_t srclen; ++int format; /* character indicating what format */ ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ size_t inblocksize; /* process this many bytes at a time */ ++ size_t outblocksize; /* producing this many */ ++ size_t breakevery; /* add a _ every this many (0 means don't) */ ++ size_t sincebreak; /* output bytes since last _ */ ++ char breakchar; /* character used to break between groups */ ++ char inblock[10]; /* enough for any format */ ++ char outblock[10]; /* enough for any format */ ++ char fake[1]; /* fake output area for dstlen == 0 */ ++ size_t needed; /* return value */ ++ char *stop; /* where the terminating NUL will go */ ++ size_t ntodo; /* remaining input */ ++ size_t nreal; ++ char *out; ++ char *prefix; ++ ++ breakevery = 0; ++ breakchar = '_'; ++ ++ switch (format) { ++ case 0: ++ case 'h': ++ format = 'x'; ++ breakevery = 8; ++ /* FALLTHROUGH */ ++ case 'x': ++ inblocksize = 1; ++ outblocksize = 2; ++ prefix = "0x"; ++ break; ++ case ':': ++ format = 'x'; ++ breakevery = 2; ++ breakchar = ':'; ++ /* FALLTHROUGH */ ++ case 16: ++ inblocksize = 1; ++ outblocksize = 2; ++ prefix = ""; ++ format = 'x'; ++ break; ++ case 's': ++ inblocksize = 3; ++ outblocksize = 4; ++ prefix = "0s"; ++ break; ++ case 64: /* beware, equals ' ' */ ++ inblocksize = 3; ++ outblocksize = 4; ++ prefix = ""; ++ format = 's'; ++ break; ++ default: ++ return 0; ++ break; ++ } ++ ++ user_assert(inblocksize < sizeof(inblock)); ++ user_assert(outblocksize < sizeof(outblock)); ++ user_assert(breakevery % outblocksize == 0); ++ ++ if (srclen == 0) ++ return 0; ++ ntodo = srclen; ++ ++ if (dstlen == 0) { /* dispose of awkward special case */ ++ dst = fake; ++ dstlen = 1; ++ } ++ stop = dst + dstlen - 1; ++ ++ nreal = strlen(prefix); ++ needed = nreal; /* for starters */ ++ if (dstlen <= nreal) { /* prefix won't fit */ ++ strncpy(dst, prefix, dstlen - 1); ++ dst += dstlen - 1; ++ } else { ++ strcpy(dst, prefix); ++ dst += nreal; ++ } ++ ++ user_assert(dst <= stop); ++ sincebreak = 0; ++ ++ while (ntodo > 0) { ++ if (ntodo < inblocksize) { /* incomplete input */ ++ memset(inblock, 0, sizeof(inblock)); ++ memcpy(inblock, src, ntodo); ++ src = inblock; ++ nreal = ntodo; ++ ntodo = inblocksize; ++ } else ++ nreal = inblocksize; ++ out = (outblocksize > stop - dst) ? outblock : dst; ++ ++ convert(src, nreal, format, out); ++ needed += outblocksize; ++ sincebreak += outblocksize; ++ if (dst < stop) { ++ if (out != dst) { ++ user_assert(outblocksize > stop - dst); ++ memcpy(dst, out, stop - dst); ++ dst = stop; ++ } else ++ dst += outblocksize; ++ } ++ ++ src += inblocksize; ++ ntodo -= inblocksize; ++ if (breakevery != 0 && sincebreak >= breakevery && ntodo > 0) { ++ if (dst < stop) ++ *dst++ = breakchar; ++ needed++; ++ sincebreak = 0; ++ } ++ } ++ ++ user_assert(dst <= stop); ++ *dst++ = '\0'; ++ needed++; ++ ++ return needed; ++} ++ ++/* ++ - convert - convert one input block to one output block ++ */ ++static void ++convert(src, nreal, format, out) ++const char *src; ++size_t nreal; /* how much of the input block is real */ ++int format; ++char *out; ++{ ++ static char hex[] = "0123456789abcdef"; ++ static char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ++ "abcdefghijklmnopqrstuvwxyz" ++ "0123456789+/"; ++ unsigned char c; ++ unsigned char c1, c2, c3; ++ ++ user_assert(nreal > 0); ++ switch (format) { ++ case 'x': ++ user_assert(nreal == 1); ++ c = (unsigned char)*src; ++ *out++ = hex[c >> 4]; ++ *out++ = hex[c & 0xf]; ++ break; ++ case 's': ++ c1 = (unsigned char)*src++; ++ c2 = (unsigned char)*src++; ++ c3 = (unsigned char)*src++; ++ *out++ = base64[c1 >> 2]; /* top 6 bits of c1 */ ++ c = (c1 & 0x3) << 4; /* bottom 2 of c1... */ ++ c |= c2 >> 4; /* ...top 4 of c2 */ ++ *out++ = base64[c]; ++ if (nreal == 1) ++ *out++ = '='; ++ else { ++ c = (c2 & 0xf) << 2; /* bottom 4 of c2... */ ++ c |= c3 >> 6; /* ...top 2 of c3 */ ++ *out++ = base64[c]; ++ } ++ if (nreal <= 2) ++ *out++ = '='; ++ else ++ *out++ = base64[c3 & 0x3f]; /* bottom 6 of c3 */ ++ break; ++ default: ++ user_assert(nreal == 0); /* unknown format */ ++ break; ++ } ++} ++ ++/* ++ - datatoa - convert data to ASCII ++ * backward-compatibility synonym for datatot ++ */ ++size_t /* true length (with NUL) for success */ ++datatoa(src, srclen, format, dst, dstlen) ++const char *src; ++size_t srclen; ++int format; /* character indicating what format */ ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ return datatot(src, srclen, format, dst, dstlen); ++} ++ ++/* ++ - bytestoa - convert data bytes to ASCII ++ * backward-compatibility synonym for datatot ++ */ ++size_t /* true length (with NUL) for success */ ++bytestoa(src, srclen, format, dst, dstlen) ++const char *src; ++size_t srclen; ++int format; /* character indicating what format */ ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ return datatot(src, srclen, format, dst, dstlen); ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/defconfig Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,148 @@ ++ ++# ++# RCSID $Id: defconfig,v 1.28.2.1 2006-10-11 18:14:33 paul Exp $ ++# ++ ++# ++# FreeS/WAN IPSec implementation, KLIPS kernel config defaults ++# ++ ++# ++# First, lets override stuff already set or not in the kernel config. ++# ++# We can't even think about leaving this off... ++CONFIG_INET=y ++ ++# ++# This must be on for subnet protection. ++CONFIG_IP_FORWARD=y ++ ++# Shut off IPSEC masquerading if it has been enabled, since it will ++# break the compile. IPPROTO_ESP and IPPROTO_AH were included in ++# net/ipv4/ip_masq.c when they should have gone into include/linux/in.h. ++CONFIG_IP_MASQUERADE_IPSEC=n ++ ++# ++# Next, lets set the recommended FreeS/WAN configuration. ++# ++ ++# To config as static (preferred), 'y'. To config as module, 'm'. ++CONFIG_KLIPS=m ++ ++# To do tunnel mode IPSec, this must be enabled. ++CONFIG_KLIPS_IPIP=y ++ ++# To enable authentication, say 'y'. (Highly recommended) ++CONFIG_KLIPS_AH=y ++ ++# Authentication algorithm(s): ++CONFIG_KLIPS_AUTH_HMAC_MD5=y ++CONFIG_KLIPS_AUTH_HMAC_SHA1=y ++ ++# To enable encryption, say 'y'. (Highly recommended) ++CONFIG_KLIPS_ESP=y ++ ++# modular algo extensions (and new ALGOs) ++CONFIG_KLIPS_ALG=y ++ ++# Encryption algorithm(s): ++CONFIG_KLIPS_ENC_3DES=y ++CONFIG_KLIPS_ENC_AES=y ++# CONFIG_KLIPS_ENC_NULL=y ++ ++# Use CryptoAPI for ALG? - by default, no. ++CONFIG_KLIPS_ENC_CRYPTOAPI=n ++ ++# IP Compression: new, probably still has minor bugs. ++CONFIG_KLIPS_IPCOMP=y ++ ++# To enable userspace-switchable KLIPS debugging, say 'y'. ++CONFIG_KLIPS_DEBUG=y ++ ++# NAT Traversal ++CONFIG_IPSEC_NAT_TRAVERSAL=y ++ ++# ++# ++# $Log: defconfig,v $ ++# Revision 1.28.2.1 2006-10-11 18:14:33 paul ++# Add JuanJo Ciarlante's ESP_NULL patches for KLIPS, but leave it disabled ++# per default. ++# ++# Revision 1.28 2005/05/11 03:15:42 mcr ++# adjusted makefiles to sanely build modules properly. ++# ++# Revision 1.27 2005/03/20 03:00:05 mcr ++# default configuration should enable NAT_TRAVERSAL. ++# ++# Revision 1.26 2004/07/10 19:11:18 mcr ++# CONFIG_IPSEC -> CONFIG_KLIPS. ++# ++# Revision 1.25 2004/07/05 01:03:53 mcr ++# fix for adding cryptoapi code. ++# keep it off for now, since UMLs do not have it yet. ++# ++# Revision 1.24 2004/04/06 02:49:25 mcr ++# pullup of algo code from alg-branch. ++# ++# Revision 1.23.2.2 2004/04/05 04:30:46 mcr ++# patches for alg-branch to compile/work with 2.x openswan ++# ++# Revision 1.23.2.1 2003/12/22 15:25:52 jjo ++# . Merged algo-0.8.1-rc11-test1 into alg-branch ++# ++# Revision 1.23 2003/12/10 01:14:27 mcr ++# NAT-traversal patches to KLIPS. ++# ++# Revision 1.22 2003/02/24 19:37:27 mcr ++# changed default compilation mode to static. ++# ++# Revision 1.21 2002/04/24 07:36:27 mcr ++# Moved from ./klips/net/ipsec/defconfig,v ++# ++# Revision 1.20 2002/04/02 04:07:40 mcr ++# default build is now 'm'odule for KLIPS ++# ++# Revision 1.19 2002/03/08 18:57:17 rgb ++# Added a blank line at the beginning of the file to make it easier for ++# other projects to patch ./arch/i386/defconfig, for example ++# LIDS+grSecurity requested by Jason Pattie. ++# ++# Revision 1.18 2000/11/30 17:26:56 rgb ++# Cleaned out unused options and enabled ipcomp by default. ++# ++# Revision 1.17 2000/09/15 11:37:01 rgb ++# Merge in heavily modified Svenning Soerensen's ++# IPCOMP zlib deflate code. ++# ++# Revision 1.16 2000/09/08 19:12:55 rgb ++# Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++# ++# Revision 1.15 2000/05/24 19:37:13 rgb ++# *** empty log message *** ++# ++# Revision 1.14 2000/05/11 21:14:57 henry ++# just commenting the FOOBAR=y lines out is not enough ++# ++# Revision 1.13 2000/05/10 20:17:58 rgb ++# Comment out netlink defaults, which are no longer needed. ++# ++# Revision 1.12 2000/05/10 19:13:38 rgb ++# Added configure option to shut off no eroute passthrough. ++# ++# Revision 1.11 2000/03/16 07:09:46 rgb ++# Hardcode PF_KEYv2 support. ++# Disable IPSEC_ICMP by default. ++# Remove DES config option from defaults file. ++# ++# Revision 1.10 2000/01/11 03:09:42 rgb ++# Added a default of 'y' to PF_KEYv2 keying I/F. ++# ++# Revision 1.9 1999/05/08 21:23:12 rgb ++# Added support for 2.2.x kernels. ++# ++# Revision 1.8 1999/04/06 04:54:25 rgb ++# Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++# patch shell fixes. ++# ++# +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/deflate.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1351 @@ ++/* deflate.c -- compress data using the deflation algorithm ++ * Copyright (C) 1995-2002 Jean-loup Gailly. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* ++ * ALGORITHM ++ * ++ * The "deflation" process depends on being able to identify portions ++ * of the input text which are identical to earlier input (within a ++ * sliding window trailing behind the input currently being processed). ++ * ++ * The most straightforward technique turns out to be the fastest for ++ * most input files: try all possible matches and select the longest. ++ * The key feature of this algorithm is that insertions into the string ++ * dictionary are very simple and thus fast, and deletions are avoided ++ * completely. Insertions are performed at each input character, whereas ++ * string matches are performed only when the previous match ends. So it ++ * is preferable to spend more time in matches to allow very fast string ++ * insertions and avoid deletions. The matching algorithm for small ++ * strings is inspired from that of Rabin & Karp. A brute force approach ++ * is used to find longer strings when a small match has been found. ++ * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze ++ * (by Leonid Broukhis). ++ * A previous version of this file used a more sophisticated algorithm ++ * (by Fiala and Greene) which is guaranteed to run in linear amortized ++ * time, but has a larger average cost, uses more memory and is patented. ++ * However the F&G algorithm may be faster for some highly redundant ++ * files if the parameter max_chain_length (described below) is too large. ++ * ++ * ACKNOWLEDGEMENTS ++ * ++ * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and ++ * I found it in 'freeze' written by Leonid Broukhis. ++ * Thanks to many people for bug reports and testing. ++ * ++ * REFERENCES ++ * ++ * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". ++ * Available in ftp://ds.internic.net/rfc/rfc1951.txt ++ * ++ * A description of the Rabin and Karp algorithm is given in the book ++ * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. ++ * ++ * Fiala,E.R., and Greene,D.H. ++ * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 ++ * ++ */ ++ ++/* @(#) $Id: deflate.c,v 1.4 2004-07-10 07:48:37 mcr Exp $ */ ++ ++#include "deflate.h" ++ ++local const char deflate_copyright[] = ++ " deflate 1.1.4 Copyright 1995-2002 Jean-loup Gailly "; ++/* ++ If you use the zlib library in a product, an acknowledgment is welcome ++ in the documentation of your product. If for some reason you cannot ++ include such an acknowledgment, I would appreciate that you keep this ++ copyright string in the executable of your product. ++ */ ++ ++/* =========================================================================== ++ * Function prototypes. ++ */ ++typedef enum { ++ need_more, /* block not completed, need more input or more output */ ++ block_done, /* block flush performed */ ++ finish_started, /* finish started, need only more output at next deflate */ ++ finish_done /* finish done, accept no more input or output */ ++} block_state; ++ ++typedef block_state (*compress_func) OF((deflate_state *s, int flush)); ++/* Compression function. Returns the block state after the call. */ ++ ++local void fill_window OF((deflate_state *s)); ++local block_state deflate_stored OF((deflate_state *s, int flush)); ++local block_state deflate_fast OF((deflate_state *s, int flush)); ++local block_state deflate_slow OF((deflate_state *s, int flush)); ++local void lm_init OF((deflate_state *s)); ++local void putShortMSB OF((deflate_state *s, uInt b)); ++local void flush_pending OF((z_streamp strm)); ++local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); ++#ifdef ASMV ++ void match_init OF((void)); /* asm code initialization */ ++ uInt longest_match OF((deflate_state *s, IPos cur_match)); ++#else ++local uInt longest_match OF((deflate_state *s, IPos cur_match)); ++#endif ++ ++#ifdef DEBUG ++local void check_match OF((deflate_state *s, IPos start, IPos match, ++ int length)); ++#endif ++ ++/* =========================================================================== ++ * Local data ++ */ ++ ++#define NIL 0 ++/* Tail of hash chains */ ++ ++#ifndef TOO_FAR ++# define TOO_FAR 4096 ++#endif ++/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ ++ ++#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) ++/* Minimum amount of lookahead, except at the end of the input file. ++ * See deflate.c for comments about the MIN_MATCH+1. ++ */ ++ ++/* Values for max_lazy_match, good_match and max_chain_length, depending on ++ * the desired pack level (0..9). The values given below have been tuned to ++ * exclude worst case performance for pathological files. Better values may be ++ * found for specific files. ++ */ ++typedef struct config_s { ++ ush good_length; /* reduce lazy search above this match length */ ++ ush max_lazy; /* do not perform lazy search above this match length */ ++ ush nice_length; /* quit search above this match length */ ++ ush max_chain; ++ compress_func func; ++} config; ++ ++local const config configuration_table[10] = { ++/* good lazy nice chain */ ++/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ ++/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */ ++/* 2 */ {4, 5, 16, 8, deflate_fast}, ++/* 3 */ {4, 6, 32, 32, deflate_fast}, ++ ++/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ ++/* 5 */ {8, 16, 32, 32, deflate_slow}, ++/* 6 */ {8, 16, 128, 128, deflate_slow}, ++/* 7 */ {8, 32, 128, 256, deflate_slow}, ++/* 8 */ {32, 128, 258, 1024, deflate_slow}, ++/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */ ++ ++/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 ++ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different ++ * meaning. ++ */ ++ ++#define EQUAL 0 ++/* result of memcmp for equal strings */ ++ ++struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ ++ ++/* =========================================================================== ++ * Update a hash value with the given input byte ++ * IN assertion: all calls to to UPDATE_HASH are made with consecutive ++ * input characters, so that a running hash key can be computed from the ++ * previous key instead of complete recalculation each time. ++ */ ++#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) ++ ++ ++/* =========================================================================== ++ * Insert string str in the dictionary and set match_head to the previous head ++ * of the hash chain (the most recent string with same hash key). Return ++ * the previous length of the hash chain. ++ * If this file is compiled with -DFASTEST, the compression level is forced ++ * to 1, and no hash chains are maintained. ++ * IN assertion: all calls to to INSERT_STRING are made with consecutive ++ * input characters and the first MIN_MATCH bytes of str are valid ++ * (except for the last MIN_MATCH-1 bytes of the input file). ++ */ ++#ifdef FASTEST ++#define INSERT_STRING(s, str, match_head) \ ++ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ ++ match_head = s->head[s->ins_h], \ ++ s->head[s->ins_h] = (Pos)(str)) ++#else ++#define INSERT_STRING(s, str, match_head) \ ++ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ ++ s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \ ++ s->head[s->ins_h] = (Pos)(str)) ++#endif ++ ++/* =========================================================================== ++ * Initialize the hash table (avoiding 64K overflow for 16 bit systems). ++ * prev[] will be initialized on the fly. ++ */ ++#define CLEAR_HASH(s) \ ++ s->head[s->hash_size-1] = NIL; \ ++ zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); ++ ++/* ========================================================================= */ ++int ZEXPORT deflateInit_(strm, level, version, stream_size) ++ z_streamp strm; ++ int level; ++ const char *version; ++ int stream_size; ++{ ++ return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, ++ Z_DEFAULT_STRATEGY, version, stream_size); ++ /* To do: ignore strm->next_in if we use it as window */ ++} ++ ++/* ========================================================================= */ ++int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, ++ version, stream_size) ++ z_streamp strm; ++ int level; ++ int method; ++ int windowBits; ++ int memLevel; ++ int strategy; ++ const char *version; ++ int stream_size; ++{ ++ deflate_state *s; ++ int noheader = 0; ++ static const char* my_version = ZLIB_VERSION; ++ ++ ushf *overlay; ++ /* We overlay pending_buf and d_buf+l_buf. This works since the average ++ * output size for (length,distance) codes is <= 24 bits. ++ */ ++ ++ if (version == Z_NULL || version[0] != my_version[0] || ++ stream_size != sizeof(z_stream)) { ++ return Z_VERSION_ERROR; ++ } ++ if (strm == Z_NULL) return Z_STREAM_ERROR; ++ ++ strm->msg = Z_NULL; ++ if (strm->zalloc == Z_NULL) { ++ return Z_STREAM_ERROR; ++/* strm->zalloc = zcalloc; ++ strm->opaque = (voidpf)0;*/ ++ } ++ if (strm->zfree == Z_NULL) return Z_STREAM_ERROR; /* strm->zfree = zcfree; */ ++ ++ if (level == Z_DEFAULT_COMPRESSION) level = 6; ++#ifdef FASTEST ++ level = 1; ++#endif ++ ++ if (windowBits < 0) { /* undocumented feature: suppress zlib header */ ++ noheader = 1; ++ windowBits = -windowBits; ++ } ++ if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || ++ windowBits < 9 || windowBits > 15 || level < 0 || level > 9 || ++ strategy < 0 || strategy > Z_HUFFMAN_ONLY) { ++ return Z_STREAM_ERROR; ++ } ++ s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); ++ if (s == Z_NULL) return Z_MEM_ERROR; ++ strm->state = (struct internal_state FAR *)s; ++ s->strm = strm; ++ ++ s->noheader = noheader; ++ s->w_bits = windowBits; ++ s->w_size = 1 << s->w_bits; ++ s->w_mask = s->w_size - 1; ++ ++ s->hash_bits = memLevel + 7; ++ s->hash_size = 1 << s->hash_bits; ++ s->hash_mask = s->hash_size - 1; ++ s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); ++ ++ s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); ++ s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); ++ s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); ++ ++ s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ ++ ++ overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); ++ s->pending_buf = (uchf *) overlay; ++ s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); ++ ++ if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || ++ s->pending_buf == Z_NULL) { ++ strm->msg = ERR_MSG(Z_MEM_ERROR); ++ deflateEnd (strm); ++ return Z_MEM_ERROR; ++ } ++ s->d_buf = overlay + s->lit_bufsize/sizeof(ush); ++ s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; ++ ++ s->level = level; ++ s->strategy = strategy; ++ s->method = (Byte)method; ++ ++ return deflateReset(strm); ++} ++ ++/* ========================================================================= */ ++int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) ++ z_streamp strm; ++ const Bytef *dictionary; ++ uInt dictLength; ++{ ++ deflate_state *s; ++ uInt length = dictLength; ++ uInt n; ++ IPos hash_head = 0; ++ ++ if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || ++ strm->state->status != INIT_STATE) return Z_STREAM_ERROR; ++ ++ s = strm->state; ++ strm->adler = adler32(strm->adler, dictionary, dictLength); ++ ++ if (length < MIN_MATCH) return Z_OK; ++ if (length > MAX_DIST(s)) { ++ length = MAX_DIST(s); ++#ifndef USE_DICT_HEAD ++ dictionary += dictLength - length; /* use the tail of the dictionary */ ++#endif ++ } ++ zmemcpy(s->window, dictionary, length); ++ s->strstart = length; ++ s->block_start = (long)length; ++ ++ /* Insert all strings in the hash table (except for the last two bytes). ++ * s->lookahead stays null, so s->ins_h will be recomputed at the next ++ * call of fill_window. ++ */ ++ s->ins_h = s->window[0]; ++ UPDATE_HASH(s, s->ins_h, s->window[1]); ++ for (n = 0; n <= length - MIN_MATCH; n++) { ++ INSERT_STRING(s, n, hash_head); ++ } ++ if (hash_head) hash_head = 0; /* to make compiler happy */ ++ return Z_OK; ++} ++ ++/* ========================================================================= */ ++int ZEXPORT deflateReset (strm) ++ z_streamp strm; ++{ ++ deflate_state *s; ++ ++ if (strm == Z_NULL || strm->state == Z_NULL || ++ strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR; ++ ++ strm->total_in = strm->total_out = 0; ++ strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ ++ strm->data_type = Z_UNKNOWN; ++ ++ s = (deflate_state *)strm->state; ++ s->pending = 0; ++ s->pending_out = s->pending_buf; ++ ++ if (s->noheader < 0) { ++ s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */ ++ } ++ s->status = s->noheader ? BUSY_STATE : INIT_STATE; ++ strm->adler = 1; ++ s->last_flush = Z_NO_FLUSH; ++ ++ _tr_init(s); ++ lm_init(s); ++ ++ return Z_OK; ++} ++ ++/* ========================================================================= */ ++int ZEXPORT deflateParams(strm, level, strategy) ++ z_streamp strm; ++ int level; ++ int strategy; ++{ ++ deflate_state *s; ++ compress_func func; ++ int err = Z_OK; ++ ++ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; ++ s = strm->state; ++ ++ if (level == Z_DEFAULT_COMPRESSION) { ++ level = 6; ++ } ++ if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { ++ return Z_STREAM_ERROR; ++ } ++ func = configuration_table[s->level].func; ++ ++ if (func != configuration_table[level].func && strm->total_in != 0) { ++ /* Flush the last buffer: */ ++ err = deflate(strm, Z_PARTIAL_FLUSH); ++ } ++ if (s->level != level) { ++ s->level = level; ++ s->max_lazy_match = configuration_table[level].max_lazy; ++ s->good_match = configuration_table[level].good_length; ++ s->nice_match = configuration_table[level].nice_length; ++ s->max_chain_length = configuration_table[level].max_chain; ++ } ++ s->strategy = strategy; ++ return err; ++} ++ ++/* ========================================================================= ++ * Put a short in the pending buffer. The 16-bit value is put in MSB order. ++ * IN assertion: the stream state is correct and there is enough room in ++ * pending_buf. ++ */ ++local void putShortMSB (s, b) ++ deflate_state *s; ++ uInt b; ++{ ++ put_byte(s, (Byte)(b >> 8)); ++ put_byte(s, (Byte)(b & 0xff)); ++} ++ ++/* ========================================================================= ++ * Flush as much pending output as possible. All deflate() output goes ++ * through this function so some applications may wish to modify it ++ * to avoid allocating a large strm->next_out buffer and copying into it. ++ * (See also read_buf()). ++ */ ++local void flush_pending(strm) ++ z_streamp strm; ++{ ++ unsigned len = strm->state->pending; ++ ++ if (len > strm->avail_out) len = strm->avail_out; ++ if (len == 0) return; ++ ++ zmemcpy(strm->next_out, strm->state->pending_out, len); ++ strm->next_out += len; ++ strm->state->pending_out += len; ++ strm->total_out += len; ++ strm->avail_out -= len; ++ strm->state->pending -= len; ++ if (strm->state->pending == 0) { ++ strm->state->pending_out = strm->state->pending_buf; ++ } ++} ++ ++/* ========================================================================= */ ++int ZEXPORT deflate (strm, flush) ++ z_streamp strm; ++ int flush; ++{ ++ int old_flush; /* value of flush param for previous deflate call */ ++ deflate_state *s; ++ ++ if (strm == Z_NULL || strm->state == Z_NULL || ++ flush > Z_FINISH || flush < 0) { ++ return Z_STREAM_ERROR; ++ } ++ s = strm->state; ++ ++ if (strm->next_out == Z_NULL || ++ (strm->next_in == Z_NULL && strm->avail_in != 0) || ++ (s->status == FINISH_STATE && flush != Z_FINISH)) { ++ ERR_RETURN(strm, Z_STREAM_ERROR); ++ } ++ if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); ++ ++ s->strm = strm; /* just in case */ ++ old_flush = s->last_flush; ++ s->last_flush = flush; ++ ++ /* Write the zlib header */ ++ if (s->status == INIT_STATE) { ++ ++ uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; ++ uInt level_flags = (s->level-1) >> 1; ++ ++ if (level_flags > 3) level_flags = 3; ++ header |= (level_flags << 6); ++ if (s->strstart != 0) header |= PRESET_DICT; ++ header += 31 - (header % 31); ++ ++ s->status = BUSY_STATE; ++ putShortMSB(s, header); ++ ++ /* Save the adler32 of the preset dictionary: */ ++ if (s->strstart != 0) { ++ putShortMSB(s, (uInt)(strm->adler >> 16)); ++ putShortMSB(s, (uInt)(strm->adler & 0xffff)); ++ } ++ strm->adler = 1L; ++ } ++ ++ /* Flush as much pending output as possible */ ++ if (s->pending != 0) { ++ flush_pending(strm); ++ if (strm->avail_out == 0) { ++ /* Since avail_out is 0, deflate will be called again with ++ * more output space, but possibly with both pending and ++ * avail_in equal to zero. There won't be anything to do, ++ * but this is not an error situation so make sure we ++ * return OK instead of BUF_ERROR at next call of deflate: ++ */ ++ s->last_flush = -1; ++ return Z_OK; ++ } ++ ++ /* Make sure there is something to do and avoid duplicate consecutive ++ * flushes. For repeated and useless calls with Z_FINISH, we keep ++ * returning Z_STREAM_END instead of Z_BUFF_ERROR. ++ */ ++ } else if (strm->avail_in == 0 && flush <= old_flush && ++ flush != Z_FINISH) { ++ ERR_RETURN(strm, Z_BUF_ERROR); ++ } ++ ++ /* User must not provide more input after the first FINISH: */ ++ if (s->status == FINISH_STATE && strm->avail_in != 0) { ++ ERR_RETURN(strm, Z_BUF_ERROR); ++ } ++ ++ /* Start a new block or continue the current one. ++ */ ++ if (strm->avail_in != 0 || s->lookahead != 0 || ++ (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { ++ block_state bstate; ++ ++ bstate = (*(configuration_table[s->level].func))(s, flush); ++ ++ if (bstate == finish_started || bstate == finish_done) { ++ s->status = FINISH_STATE; ++ } ++ if (bstate == need_more || bstate == finish_started) { ++ if (strm->avail_out == 0) { ++ s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ ++ } ++ return Z_OK; ++ /* If flush != Z_NO_FLUSH && avail_out == 0, the next call ++ * of deflate should use the same flush parameter to make sure ++ * that the flush is complete. So we don't have to output an ++ * empty block here, this will be done at next call. This also ++ * ensures that for a very small output buffer, we emit at most ++ * one empty block. ++ */ ++ } ++ if (bstate == block_done) { ++ if (flush == Z_PARTIAL_FLUSH) { ++ _tr_align(s); ++ } else { /* FULL_FLUSH or SYNC_FLUSH */ ++ _tr_stored_block(s, (char*)0, 0L, 0); ++ /* For a full flush, this empty block will be recognized ++ * as a special marker by inflate_sync(). ++ */ ++ if (flush == Z_FULL_FLUSH) { ++ CLEAR_HASH(s); /* forget history */ ++ } ++ } ++ flush_pending(strm); ++ if (strm->avail_out == 0) { ++ s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ ++ return Z_OK; ++ } ++ } ++ } ++ Assert(strm->avail_out > 0, "bug2"); ++ ++ if (flush != Z_FINISH) return Z_OK; ++ if (s->noheader) return Z_STREAM_END; ++ ++ /* Write the zlib trailer (adler32) */ ++ putShortMSB(s, (uInt)(strm->adler >> 16)); ++ putShortMSB(s, (uInt)(strm->adler & 0xffff)); ++ flush_pending(strm); ++ /* If avail_out is zero, the application will call deflate again ++ * to flush the rest. ++ */ ++ s->noheader = -1; /* write the trailer only once! */ ++ return s->pending != 0 ? Z_OK : Z_STREAM_END; ++} ++ ++/* ========================================================================= */ ++int ZEXPORT deflateEnd (strm) ++ z_streamp strm; ++{ ++ int status; ++ ++ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; ++ ++ status = strm->state->status; ++ if (status != INIT_STATE && status != BUSY_STATE && ++ status != FINISH_STATE) { ++ return Z_STREAM_ERROR; ++ } ++ ++ /* Deallocate in reverse order of allocations: */ ++ TRY_FREE(strm, strm->state->pending_buf); ++ TRY_FREE(strm, strm->state->head); ++ TRY_FREE(strm, strm->state->prev); ++ TRY_FREE(strm, strm->state->window); ++ ++ ZFREE(strm, strm->state); ++ strm->state = Z_NULL; ++ ++ return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; ++} ++ ++/* ========================================================================= ++ * Copy the source state to the destination state. ++ * To simplify the source, this is not supported for 16-bit MSDOS (which ++ * doesn't have enough memory anyway to duplicate compression states). ++ */ ++int ZEXPORT deflateCopy (dest, source) ++ z_streamp dest; ++ z_streamp source; ++{ ++#ifdef MAXSEG_64K ++ return Z_STREAM_ERROR; ++#else ++ deflate_state *ds; ++ deflate_state *ss; ++ ushf *overlay; ++ ++ ++ if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { ++ return Z_STREAM_ERROR; ++ } ++ ++ ss = source->state; ++ ++ *dest = *source; ++ ++ ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); ++ if (ds == Z_NULL) return Z_MEM_ERROR; ++ dest->state = (struct internal_state FAR *) ds; ++ *ds = *ss; ++ ds->strm = dest; ++ ++ ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); ++ ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); ++ ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); ++ overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); ++ ds->pending_buf = (uchf *) overlay; ++ ++ if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || ++ ds->pending_buf == Z_NULL) { ++ deflateEnd (dest); ++ return Z_MEM_ERROR; ++ } ++ /* following zmemcpy do not work for 16-bit MSDOS */ ++ zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); ++ zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); ++ zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); ++ zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); ++ ++ ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); ++ ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); ++ ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; ++ ++ ds->l_desc.dyn_tree = ds->dyn_ltree; ++ ds->d_desc.dyn_tree = ds->dyn_dtree; ++ ds->bl_desc.dyn_tree = ds->bl_tree; ++ ++ return Z_OK; ++#endif ++} ++ ++/* =========================================================================== ++ * Read a new buffer from the current input stream, update the adler32 ++ * and total number of bytes read. All deflate() input goes through ++ * this function so some applications may wish to modify it to avoid ++ * allocating a large strm->next_in buffer and copying from it. ++ * (See also flush_pending()). ++ */ ++local int read_buf(strm, buf, size) ++ z_streamp strm; ++ Bytef *buf; ++ unsigned size; ++{ ++ unsigned len = strm->avail_in; ++ ++ if (len > size) len = size; ++ if (len == 0) return 0; ++ ++ strm->avail_in -= len; ++ ++ if (!strm->state->noheader) { ++ strm->adler = adler32(strm->adler, strm->next_in, len); ++ } ++ zmemcpy(buf, strm->next_in, len); ++ strm->next_in += len; ++ strm->total_in += len; ++ ++ return (int)len; ++} ++ ++/* =========================================================================== ++ * Initialize the "longest match" routines for a new zlib stream ++ */ ++local void lm_init (s) ++ deflate_state *s; ++{ ++ s->window_size = (ulg)2L*s->w_size; ++ ++ CLEAR_HASH(s); ++ ++ /* Set the default configuration parameters: ++ */ ++ s->max_lazy_match = configuration_table[s->level].max_lazy; ++ s->good_match = configuration_table[s->level].good_length; ++ s->nice_match = configuration_table[s->level].nice_length; ++ s->max_chain_length = configuration_table[s->level].max_chain; ++ ++ s->strstart = 0; ++ s->block_start = 0L; ++ s->lookahead = 0; ++ s->match_length = s->prev_length = MIN_MATCH-1; ++ s->match_available = 0; ++ s->ins_h = 0; ++#ifdef ASMV ++ match_init(); /* initialize the asm code */ ++#endif ++} ++ ++/* =========================================================================== ++ * Set match_start to the longest match starting at the given string and ++ * return its length. Matches shorter or equal to prev_length are discarded, ++ * in which case the result is equal to prev_length and match_start is ++ * garbage. ++ * IN assertions: cur_match is the head of the hash chain for the current ++ * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 ++ * OUT assertion: the match length is not greater than s->lookahead. ++ */ ++#ifndef ASMV ++/* For 80x86 and 680x0, an optimized version will be provided in match.asm or ++ * match.S. The code will be functionally equivalent. ++ */ ++#ifndef FASTEST ++local uInt longest_match(s, cur_match) ++ deflate_state *s; ++ IPos cur_match; /* current match */ ++{ ++ unsigned chain_length = s->max_chain_length;/* max hash chain length */ ++ register Bytef *scan = s->window + s->strstart; /* current string */ ++ register Bytef *match; /* matched string */ ++ register int len; /* length of current match */ ++ int best_len = s->prev_length; /* best match length so far */ ++ int nice_match = s->nice_match; /* stop if match long enough */ ++ IPos limit = s->strstart > (IPos)MAX_DIST(s) ? ++ s->strstart - (IPos)MAX_DIST(s) : NIL; ++ /* Stop when cur_match becomes <= limit. To simplify the code, ++ * we prevent matches with the string of window index 0. ++ */ ++ Posf *prev = s->prev; ++ uInt wmask = s->w_mask; ++ ++#ifdef UNALIGNED_OK ++ /* Compare two bytes at a time. Note: this is not always beneficial. ++ * Try with and without -DUNALIGNED_OK to check. ++ */ ++ register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; ++ register ush scan_start = *(ushf*)scan; ++ register ush scan_end = *(ushf*)(scan+best_len-1); ++#else ++ register Bytef *strend = s->window + s->strstart + MAX_MATCH; ++ register Byte scan_end1 = scan[best_len-1]; ++ register Byte scan_end = scan[best_len]; ++#endif ++ ++ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. ++ * It is easy to get rid of this optimization if necessary. ++ */ ++ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); ++ ++ /* Do not waste too much time if we already have a good match: */ ++ if (s->prev_length >= s->good_match) { ++ chain_length >>= 2; ++ } ++ /* Do not look for matches beyond the end of the input. This is necessary ++ * to make deflate deterministic. ++ */ ++ if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; ++ ++ Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); ++ ++ do { ++ Assert(cur_match < s->strstart, "no future"); ++ match = s->window + cur_match; ++ ++ /* Skip to next match if the match length cannot increase ++ * or if the match length is less than 2: ++ */ ++#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) ++ /* This code assumes sizeof(unsigned short) == 2. Do not use ++ * UNALIGNED_OK if your compiler uses a different size. ++ */ ++ if (*(ushf*)(match+best_len-1) != scan_end || ++ *(ushf*)match != scan_start) continue; ++ ++ /* It is not necessary to compare scan[2] and match[2] since they are ++ * always equal when the other bytes match, given that the hash keys ++ * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at ++ * strstart+3, +5, ... up to strstart+257. We check for insufficient ++ * lookahead only every 4th comparison; the 128th check will be made ++ * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is ++ * necessary to put more guard bytes at the end of the window, or ++ * to check more often for insufficient lookahead. ++ */ ++ Assert(scan[2] == match[2], "scan[2]?"); ++ scan++, match++; ++ do { ++ } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && ++ *(ushf*)(scan+=2) == *(ushf*)(match+=2) && ++ *(ushf*)(scan+=2) == *(ushf*)(match+=2) && ++ *(ushf*)(scan+=2) == *(ushf*)(match+=2) && ++ scan < strend); ++ /* The funny "do {}" generates better code on most compilers */ ++ ++ /* Here, scan <= window+strstart+257 */ ++ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); ++ if (*scan == *match) scan++; ++ ++ len = (MAX_MATCH - 1) - (int)(strend-scan); ++ scan = strend - (MAX_MATCH-1); ++ ++#else /* UNALIGNED_OK */ ++ ++ if (match[best_len] != scan_end || ++ match[best_len-1] != scan_end1 || ++ *match != *scan || ++ *++match != scan[1]) continue; ++ ++ /* The check at best_len-1 can be removed because it will be made ++ * again later. (This heuristic is not always a win.) ++ * It is not necessary to compare scan[2] and match[2] since they ++ * are always equal when the other bytes match, given that ++ * the hash keys are equal and that HASH_BITS >= 8. ++ */ ++ scan += 2, match++; ++ Assert(*scan == *match, "match[2]?"); ++ ++ /* We check for insufficient lookahead only every 8th comparison; ++ * the 256th check will be made at strstart+258. ++ */ ++ do { ++ } while (*++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ scan < strend); ++ ++ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); ++ ++ len = MAX_MATCH - (int)(strend - scan); ++ scan = strend - MAX_MATCH; ++ ++#endif /* UNALIGNED_OK */ ++ ++ if (len > best_len) { ++ s->match_start = cur_match; ++ best_len = len; ++ if (len >= nice_match) break; ++#ifdef UNALIGNED_OK ++ scan_end = *(ushf*)(scan+best_len-1); ++#else ++ scan_end1 = scan[best_len-1]; ++ scan_end = scan[best_len]; ++#endif ++ } ++ } while ((cur_match = prev[cur_match & wmask]) > limit ++ && --chain_length != 0); ++ ++ if ((uInt)best_len <= s->lookahead) return (uInt)best_len; ++ return s->lookahead; ++} ++ ++#else /* FASTEST */ ++/* --------------------------------------------------------------------------- ++ * Optimized version for level == 1 only ++ */ ++local uInt longest_match(s, cur_match) ++ deflate_state *s; ++ IPos cur_match; /* current match */ ++{ ++ register Bytef *scan = s->window + s->strstart; /* current string */ ++ register Bytef *match; /* matched string */ ++ register int len; /* length of current match */ ++ register Bytef *strend = s->window + s->strstart + MAX_MATCH; ++ ++ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. ++ * It is easy to get rid of this optimization if necessary. ++ */ ++ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); ++ ++ Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); ++ ++ Assert(cur_match < s->strstart, "no future"); ++ ++ match = s->window + cur_match; ++ ++ /* Return failure if the match length is less than 2: ++ */ ++ if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; ++ ++ /* The check at best_len-1 can be removed because it will be made ++ * again later. (This heuristic is not always a win.) ++ * It is not necessary to compare scan[2] and match[2] since they ++ * are always equal when the other bytes match, given that ++ * the hash keys are equal and that HASH_BITS >= 8. ++ */ ++ scan += 2, match += 2; ++ Assert(*scan == *match, "match[2]?"); ++ ++ /* We check for insufficient lookahead only every 8th comparison; ++ * the 256th check will be made at strstart+258. ++ */ ++ do { ++ } while (*++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ *++scan == *++match && *++scan == *++match && ++ scan < strend); ++ ++ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); ++ ++ len = MAX_MATCH - (int)(strend - scan); ++ ++ if (len < MIN_MATCH) return MIN_MATCH - 1; ++ ++ s->match_start = cur_match; ++ return len <= s->lookahead ? len : s->lookahead; ++} ++#endif /* FASTEST */ ++#endif /* ASMV */ ++ ++#ifdef DEBUG ++/* =========================================================================== ++ * Check that the match at match_start is indeed a match. ++ */ ++local void check_match(s, start, match, length) ++ deflate_state *s; ++ IPos start, match; ++ int length; ++{ ++ /* check that the match is indeed a match */ ++ if (zmemcmp(s->window + match, ++ s->window + start, length) != EQUAL) { ++ fprintf(stderr, " start %u, match %u, length %d\n", ++ start, match, length); ++ do { ++ fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); ++ } while (--length != 0); ++ z_error("invalid match"); ++ } ++ if (z_verbose > 1) { ++ fprintf(stderr,"\\[%d,%d]", start-match, length); ++ do { putc(s->window[start++], stderr); } while (--length != 0); ++ } ++} ++#else ++# define check_match(s, start, match, length) ++#endif ++ ++/* =========================================================================== ++ * Fill the window when the lookahead becomes insufficient. ++ * Updates strstart and lookahead. ++ * ++ * IN assertion: lookahead < MIN_LOOKAHEAD ++ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD ++ * At least one byte has been read, or avail_in == 0; reads are ++ * performed for at least two bytes (required for the zip translate_eol ++ * option -- not supported here). ++ */ ++local void fill_window(s) ++ deflate_state *s; ++{ ++ register unsigned n, m; ++ register Posf *p; ++ unsigned more; /* Amount of free space at the end of the window. */ ++ uInt wsize = s->w_size; ++ ++ do { ++ more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); ++ ++ /* Deal with !@#$% 64K limit: */ ++ if (more == 0 && s->strstart == 0 && s->lookahead == 0) { ++ more = wsize; ++ ++ } else if (more == (unsigned)(-1)) { ++ /* Very unlikely, but possible on 16 bit machine if strstart == 0 ++ * and lookahead == 1 (input done one byte at time) ++ */ ++ more--; ++ ++ /* If the window is almost full and there is insufficient lookahead, ++ * move the upper half to the lower one to make room in the upper half. ++ */ ++ } else if (s->strstart >= wsize+MAX_DIST(s)) { ++ ++ zmemcpy(s->window, s->window+wsize, (unsigned)wsize); ++ s->match_start -= wsize; ++ s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ ++ s->block_start -= (long) wsize; ++ ++ /* Slide the hash table (could be avoided with 32 bit values ++ at the expense of memory usage). We slide even when level == 0 ++ to keep the hash table consistent if we switch back to level > 0 ++ later. (Using level 0 permanently is not an optimal usage of ++ zlib, so we don't care about this pathological case.) ++ */ ++ n = s->hash_size; ++ p = &s->head[n]; ++ do { ++ m = *--p; ++ *p = (Pos)(m >= wsize ? m-wsize : NIL); ++ } while (--n); ++ ++ n = wsize; ++#ifndef FASTEST ++ p = &s->prev[n]; ++ do { ++ m = *--p; ++ *p = (Pos)(m >= wsize ? m-wsize : NIL); ++ /* If n is not on any hash chain, prev[n] is garbage but ++ * its value will never be used. ++ */ ++ } while (--n); ++#endif ++ more += wsize; ++ } ++ if (s->strm->avail_in == 0) return; ++ ++ /* If there was no sliding: ++ * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && ++ * more == window_size - lookahead - strstart ++ * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) ++ * => more >= window_size - 2*WSIZE + 2 ++ * In the BIG_MEM or MMAP case (not yet supported), ++ * window_size == input_size + MIN_LOOKAHEAD && ++ * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. ++ * Otherwise, window_size == 2*WSIZE so more >= 2. ++ * If there was sliding, more >= WSIZE. So in all cases, more >= 2. ++ */ ++ Assert(more >= 2, "more < 2"); ++ ++ n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); ++ s->lookahead += n; ++ ++ /* Initialize the hash value now that we have some input: */ ++ if (s->lookahead >= MIN_MATCH) { ++ s->ins_h = s->window[s->strstart]; ++ UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); ++#if MIN_MATCH != 3 ++ Call UPDATE_HASH() MIN_MATCH-3 more times ++#endif ++ } ++ /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, ++ * but this is not important since only literal bytes will be emitted. ++ */ ++ ++ } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); ++} ++ ++/* =========================================================================== ++ * Flush the current block, with given end-of-file flag. ++ * IN assertion: strstart is set to the end of the current match. ++ */ ++#define FLUSH_BLOCK_ONLY(s, eof) { \ ++ _tr_flush_block(s, (s->block_start >= 0L ? \ ++ (charf *)&s->window[(unsigned)s->block_start] : \ ++ (charf *)Z_NULL), \ ++ (ulg)((long)s->strstart - s->block_start), \ ++ (eof)); \ ++ s->block_start = s->strstart; \ ++ flush_pending(s->strm); \ ++ Tracev((stderr,"[FLUSH]")); \ ++} ++ ++/* Same but force premature exit if necessary. */ ++#define FLUSH_BLOCK(s, eof) { \ ++ FLUSH_BLOCK_ONLY(s, eof); \ ++ if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ ++} ++ ++/* =========================================================================== ++ * Copy without compression as much as possible from the input stream, return ++ * the current block state. ++ * This function does not insert new strings in the dictionary since ++ * uncompressible data is probably not useful. This function is used ++ * only for the level=0 compression option. ++ * NOTE: this function should be optimized to avoid extra copying from ++ * window to pending_buf. ++ */ ++local block_state deflate_stored(s, flush) ++ deflate_state *s; ++ int flush; ++{ ++ /* Stored blocks are limited to 0xffff bytes, pending_buf is limited ++ * to pending_buf_size, and each stored block has a 5 byte header: ++ */ ++ ulg max_block_size = 0xffff; ++ ulg max_start; ++ ++ if (max_block_size > s->pending_buf_size - 5) { ++ max_block_size = s->pending_buf_size - 5; ++ } ++ ++ /* Copy as much as possible from input to output: */ ++ for (;;) { ++ /* Fill the window as much as possible: */ ++ if (s->lookahead <= 1) { ++ ++ Assert(s->strstart < s->w_size+MAX_DIST(s) || ++ s->block_start >= (long)s->w_size, "slide too late"); ++ ++ fill_window(s); ++ if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; ++ ++ if (s->lookahead == 0) break; /* flush the current block */ ++ } ++ Assert(s->block_start >= 0L, "block gone"); ++ ++ s->strstart += s->lookahead; ++ s->lookahead = 0; ++ ++ /* Emit a stored block if pending_buf will be full: */ ++ max_start = s->block_start + max_block_size; ++ if (s->strstart == 0 || (ulg)s->strstart >= max_start) { ++ /* strstart == 0 is possible when wraparound on 16-bit machine */ ++ s->lookahead = (uInt)(s->strstart - max_start); ++ s->strstart = (uInt)max_start; ++ FLUSH_BLOCK(s, 0); ++ } ++ /* Flush if we may have to slide, otherwise block_start may become ++ * negative and the data will be gone: ++ */ ++ if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { ++ FLUSH_BLOCK(s, 0); ++ } ++ } ++ FLUSH_BLOCK(s, flush == Z_FINISH); ++ return flush == Z_FINISH ? finish_done : block_done; ++} ++ ++/* =========================================================================== ++ * Compress as much as possible from the input stream, return the current ++ * block state. ++ * This function does not perform lazy evaluation of matches and inserts ++ * new strings in the dictionary only for unmatched strings or for short ++ * matches. It is used only for the fast compression options. ++ */ ++local block_state deflate_fast(s, flush) ++ deflate_state *s; ++ int flush; ++{ ++ IPos hash_head = NIL; /* head of the hash chain */ ++ int bflush; /* set if current block must be flushed */ ++ ++ for (;;) { ++ /* Make sure that we always have enough lookahead, except ++ * at the end of the input file. We need MAX_MATCH bytes ++ * for the next match, plus MIN_MATCH bytes to insert the ++ * string following the next match. ++ */ ++ if (s->lookahead < MIN_LOOKAHEAD) { ++ fill_window(s); ++ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { ++ return need_more; ++ } ++ if (s->lookahead == 0) break; /* flush the current block */ ++ } ++ ++ /* Insert the string window[strstart .. strstart+2] in the ++ * dictionary, and set hash_head to the head of the hash chain: ++ */ ++ if (s->lookahead >= MIN_MATCH) { ++ INSERT_STRING(s, s->strstart, hash_head); ++ } ++ ++ /* Find the longest match, discarding those <= prev_length. ++ * At this point we have always match_length < MIN_MATCH ++ */ ++ if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { ++ /* To simplify the code, we prevent matches with the string ++ * of window index 0 (in particular we have to avoid a match ++ * of the string with itself at the start of the input file). ++ */ ++ if (s->strategy != Z_HUFFMAN_ONLY) { ++ s->match_length = longest_match (s, hash_head); ++ } ++ /* longest_match() sets match_start */ ++ } ++ if (s->match_length >= MIN_MATCH) { ++ check_match(s, s->strstart, s->match_start, s->match_length); ++ ++ _tr_tally_dist(s, s->strstart - s->match_start, ++ s->match_length - MIN_MATCH, bflush); ++ ++ s->lookahead -= s->match_length; ++ ++ /* Insert new strings in the hash table only if the match length ++ * is not too large. This saves time but degrades compression. ++ */ ++#ifndef FASTEST ++ if (s->match_length <= s->max_insert_length && ++ s->lookahead >= MIN_MATCH) { ++ s->match_length--; /* string at strstart already in hash table */ ++ do { ++ s->strstart++; ++ INSERT_STRING(s, s->strstart, hash_head); ++ /* strstart never exceeds WSIZE-MAX_MATCH, so there are ++ * always MIN_MATCH bytes ahead. ++ */ ++ } while (--s->match_length != 0); ++ s->strstart++; ++ } else ++#endif ++ { ++ s->strstart += s->match_length; ++ s->match_length = 0; ++ s->ins_h = s->window[s->strstart]; ++ UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); ++#if MIN_MATCH != 3 ++ Call UPDATE_HASH() MIN_MATCH-3 more times ++#endif ++ /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not ++ * matter since it will be recomputed at next deflate call. ++ */ ++ } ++ } else { ++ /* No match, output a literal byte */ ++ Tracevv((stderr,"%c", s->window[s->strstart])); ++ _tr_tally_lit (s, s->window[s->strstart], bflush); ++ s->lookahead--; ++ s->strstart++; ++ } ++ if (bflush) FLUSH_BLOCK(s, 0); ++ } ++ FLUSH_BLOCK(s, flush == Z_FINISH); ++ return flush == Z_FINISH ? finish_done : block_done; ++} ++ ++/* =========================================================================== ++ * Same as above, but achieves better compression. We use a lazy ++ * evaluation for matches: a match is finally adopted only if there is ++ * no better match at the next window position. ++ */ ++local block_state deflate_slow(s, flush) ++ deflate_state *s; ++ int flush; ++{ ++ IPos hash_head = NIL; /* head of hash chain */ ++ int bflush; /* set if current block must be flushed */ ++ ++ /* Process the input block. */ ++ for (;;) { ++ /* Make sure that we always have enough lookahead, except ++ * at the end of the input file. We need MAX_MATCH bytes ++ * for the next match, plus MIN_MATCH bytes to insert the ++ * string following the next match. ++ */ ++ if (s->lookahead < MIN_LOOKAHEAD) { ++ fill_window(s); ++ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { ++ return need_more; ++ } ++ if (s->lookahead == 0) break; /* flush the current block */ ++ } ++ ++ /* Insert the string window[strstart .. strstart+2] in the ++ * dictionary, and set hash_head to the head of the hash chain: ++ */ ++ if (s->lookahead >= MIN_MATCH) { ++ INSERT_STRING(s, s->strstart, hash_head); ++ } ++ ++ /* Find the longest match, discarding those <= prev_length. ++ */ ++ s->prev_length = s->match_length, s->prev_match = s->match_start; ++ s->match_length = MIN_MATCH-1; ++ ++ if (hash_head != NIL && s->prev_length < s->max_lazy_match && ++ s->strstart - hash_head <= MAX_DIST(s)) { ++ /* To simplify the code, we prevent matches with the string ++ * of window index 0 (in particular we have to avoid a match ++ * of the string with itself at the start of the input file). ++ */ ++ if (s->strategy != Z_HUFFMAN_ONLY) { ++ s->match_length = longest_match (s, hash_head); ++ } ++ /* longest_match() sets match_start */ ++ ++ if (s->match_length <= 5 && (s->strategy == Z_FILTERED || ++ (s->match_length == MIN_MATCH && ++ s->strstart - s->match_start > TOO_FAR))) { ++ ++ /* If prev_match is also MIN_MATCH, match_start is garbage ++ * but we will ignore the current match anyway. ++ */ ++ s->match_length = MIN_MATCH-1; ++ } ++ } ++ /* If there was a match at the previous step and the current ++ * match is not better, output the previous match: ++ */ ++ if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { ++ uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; ++ /* Do not insert strings in hash table beyond this. */ ++ ++ check_match(s, s->strstart-1, s->prev_match, s->prev_length); ++ ++ _tr_tally_dist(s, s->strstart -1 - s->prev_match, ++ s->prev_length - MIN_MATCH, bflush); ++ ++ /* Insert in hash table all strings up to the end of the match. ++ * strstart-1 and strstart are already inserted. If there is not ++ * enough lookahead, the last two strings are not inserted in ++ * the hash table. ++ */ ++ s->lookahead -= s->prev_length-1; ++ s->prev_length -= 2; ++ do { ++ if (++s->strstart <= max_insert) { ++ INSERT_STRING(s, s->strstart, hash_head); ++ } ++ } while (--s->prev_length != 0); ++ s->match_available = 0; ++ s->match_length = MIN_MATCH-1; ++ s->strstart++; ++ ++ if (bflush) FLUSH_BLOCK(s, 0); ++ ++ } else if (s->match_available) { ++ /* If there was no match at the previous position, output a ++ * single literal. If there was a match but the current match ++ * is longer, truncate the previous match to a single literal. ++ */ ++ Tracevv((stderr,"%c", s->window[s->strstart-1])); ++ _tr_tally_lit(s, s->window[s->strstart-1], bflush); ++ if (bflush) { ++ FLUSH_BLOCK_ONLY(s, 0); ++ } ++ s->strstart++; ++ s->lookahead--; ++ if (s->strm->avail_out == 0) return need_more; ++ } else { ++ /* There is no previous match to compare with, wait for ++ * the next step to decide. ++ */ ++ s->match_available = 1; ++ s->strstart++; ++ s->lookahead--; ++ } ++ } ++ Assert (flush != Z_NO_FLUSH, "no flush?"); ++ if (s->match_available) { ++ Tracevv((stderr,"%c", s->window[s->strstart-1])); ++ _tr_tally_lit(s, s->window[s->strstart-1], bflush); ++ s->match_available = 0; ++ } ++ FLUSH_BLOCK(s, flush == Z_FINISH); ++ return flush == Z_FINISH ? finish_done : block_done; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/deflate.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,318 @@ ++/* deflate.h -- internal compression state ++ * Copyright (C) 1995-2002 Jean-loup Gailly ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* WARNING: this file should *not* be used by applications. It is ++ part of the implementation of the compression library and is ++ subject to change. Applications should only use zlib.h. ++ */ ++ ++/* @(#) $Id: deflate.h,v 1.5 2004-07-10 07:48:38 mcr Exp $ */ ++ ++#ifndef _DEFLATE_H ++#define _DEFLATE_H ++ ++#include "zlib/zutil.h" ++ ++/* =========================================================================== ++ * Internal compression state. ++ */ ++ ++#define LENGTH_CODES 29 ++/* number of length codes, not counting the special END_BLOCK code */ ++ ++#define LITERALS 256 ++/* number of literal bytes 0..255 */ ++ ++#define L_CODES (LITERALS+1+LENGTH_CODES) ++/* number of Literal or Length codes, including the END_BLOCK code */ ++ ++#define D_CODES 30 ++/* number of distance codes */ ++ ++#define BL_CODES 19 ++/* number of codes used to transfer the bit lengths */ ++ ++#define HEAP_SIZE (2*L_CODES+1) ++/* maximum heap size */ ++ ++#define MAX_BITS 15 ++/* All codes must not exceed MAX_BITS bits */ ++ ++#define INIT_STATE 42 ++#define BUSY_STATE 113 ++#define FINISH_STATE 666 ++/* Stream status */ ++ ++ ++/* Data structure describing a single value and its code string. */ ++typedef struct ct_data_s { ++ union { ++ ush freq; /* frequency count */ ++ ush code; /* bit string */ ++ } fc; ++ union { ++ ush dad; /* father node in Huffman tree */ ++ ush len; /* length of bit string */ ++ } dl; ++} FAR ct_data; ++ ++#define Freq fc.freq ++#define Code fc.code ++#define Dad dl.dad ++#define Len dl.len ++ ++typedef struct static_tree_desc_s static_tree_desc; ++ ++typedef struct tree_desc_s { ++ ct_data *dyn_tree; /* the dynamic tree */ ++ int max_code; /* largest code with non zero frequency */ ++ static_tree_desc *stat_desc; /* the corresponding static tree */ ++} FAR tree_desc; ++ ++typedef ush Pos; ++typedef Pos FAR Posf; ++typedef unsigned IPos; ++ ++/* A Pos is an index in the character window. We use short instead of int to ++ * save space in the various tables. IPos is used only for parameter passing. ++ */ ++ ++typedef struct internal_state { ++ z_streamp strm; /* pointer back to this zlib stream */ ++ int status; /* as the name implies */ ++ Bytef *pending_buf; /* output still pending */ ++ ulg pending_buf_size; /* size of pending_buf */ ++ Bytef *pending_out; /* next pending byte to output to the stream */ ++ int pending; /* nb of bytes in the pending buffer */ ++ int noheader; /* suppress zlib header and adler32 */ ++ Byte data_type; /* UNKNOWN, BINARY or ASCII */ ++ Byte method; /* STORED (for zip only) or DEFLATED */ ++ int last_flush; /* value of flush param for previous deflate call */ ++ ++ /* used by deflate.c: */ ++ ++ uInt w_size; /* LZ77 window size (32K by default) */ ++ uInt w_bits; /* log2(w_size) (8..16) */ ++ uInt w_mask; /* w_size - 1 */ ++ ++ Bytef *window; ++ /* Sliding window. Input bytes are read into the second half of the window, ++ * and move to the first half later to keep a dictionary of at least wSize ++ * bytes. With this organization, matches are limited to a distance of ++ * wSize-MAX_MATCH bytes, but this ensures that IO is always ++ * performed with a length multiple of the block size. Also, it limits ++ * the window size to 64K, which is quite useful on MSDOS. ++ * To do: use the user input buffer as sliding window. ++ */ ++ ++ ulg window_size; ++ /* Actual size of window: 2*wSize, except when the user input buffer ++ * is directly used as sliding window. ++ */ ++ ++ Posf *prev; ++ /* Link to older string with same hash index. To limit the size of this ++ * array to 64K, this link is maintained only for the last 32K strings. ++ * An index in this array is thus a window index modulo 32K. ++ */ ++ ++ Posf *head; /* Heads of the hash chains or NIL. */ ++ ++ uInt ins_h; /* hash index of string to be inserted */ ++ uInt hash_size; /* number of elements in hash table */ ++ uInt hash_bits; /* log2(hash_size) */ ++ uInt hash_mask; /* hash_size-1 */ ++ ++ uInt hash_shift; ++ /* Number of bits by which ins_h must be shifted at each input ++ * step. It must be such that after MIN_MATCH steps, the oldest ++ * byte no longer takes part in the hash key, that is: ++ * hash_shift * MIN_MATCH >= hash_bits ++ */ ++ ++ long block_start; ++ /* Window position at the beginning of the current output block. Gets ++ * negative when the window is moved backwards. ++ */ ++ ++ uInt match_length; /* length of best match */ ++ IPos prev_match; /* previous match */ ++ int match_available; /* set if previous match exists */ ++ uInt strstart; /* start of string to insert */ ++ uInt match_start; /* start of matching string */ ++ uInt lookahead; /* number of valid bytes ahead in window */ ++ ++ uInt prev_length; ++ /* Length of the best match at previous step. Matches not greater than this ++ * are discarded. This is used in the lazy match evaluation. ++ */ ++ ++ uInt max_chain_length; ++ /* To speed up deflation, hash chains are never searched beyond this ++ * length. A higher limit improves compression ratio but degrades the ++ * speed. ++ */ ++ ++ uInt max_lazy_match; ++ /* Attempt to find a better match only when the current match is strictly ++ * smaller than this value. This mechanism is used only for compression ++ * levels >= 4. ++ */ ++# define max_insert_length max_lazy_match ++ /* Insert new strings in the hash table only if the match length is not ++ * greater than this length. This saves time but degrades compression. ++ * max_insert_length is used only for compression levels <= 3. ++ */ ++ ++ int level; /* compression level (1..9) */ ++ int strategy; /* favor or force Huffman coding*/ ++ ++ uInt good_match; ++ /* Use a faster search when the previous match is longer than this */ ++ ++ int nice_match; /* Stop searching when current match exceeds this */ ++ ++ /* used by trees.c: */ ++ /* Didn't use ct_data typedef below to supress compiler warning */ ++ struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ ++ struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ ++ struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ ++ ++ struct tree_desc_s l_desc; /* desc. for literal tree */ ++ struct tree_desc_s d_desc; /* desc. for distance tree */ ++ struct tree_desc_s bl_desc; /* desc. for bit length tree */ ++ ++ ush bl_count[MAX_BITS+1]; ++ /* number of codes at each bit length for an optimal tree */ ++ ++ int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ ++ int heap_len; /* number of elements in the heap */ ++ int heap_max; /* element of largest frequency */ ++ /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. ++ * The same heap array is used to build all trees. ++ */ ++ ++ uch depth[2*L_CODES+1]; ++ /* Depth of each subtree used as tie breaker for trees of equal frequency ++ */ ++ ++ uchf *l_buf; /* buffer for literals or lengths */ ++ ++ uInt lit_bufsize; ++ /* Size of match buffer for literals/lengths. There are 4 reasons for ++ * limiting lit_bufsize to 64K: ++ * - frequencies can be kept in 16 bit counters ++ * - if compression is not successful for the first block, all input ++ * data is still in the window so we can still emit a stored block even ++ * when input comes from standard input. (This can also be done for ++ * all blocks if lit_bufsize is not greater than 32K.) ++ * - if compression is not successful for a file smaller than 64K, we can ++ * even emit a stored file instead of a stored block (saving 5 bytes). ++ * This is applicable only for zip (not gzip or zlib). ++ * - creating new Huffman trees less frequently may not provide fast ++ * adaptation to changes in the input data statistics. (Take for ++ * example a binary file with poorly compressible code followed by ++ * a highly compressible string table.) Smaller buffer sizes give ++ * fast adaptation but have of course the overhead of transmitting ++ * trees more frequently. ++ * - I can't count above 4 ++ */ ++ ++ uInt last_lit; /* running index in l_buf */ ++ ++ ushf *d_buf; ++ /* Buffer for distances. To simplify the code, d_buf and l_buf have ++ * the same number of elements. To use different lengths, an extra flag ++ * array would be necessary. ++ */ ++ ++ ulg opt_len; /* bit length of current block with optimal trees */ ++ ulg static_len; /* bit length of current block with static trees */ ++ uInt matches; /* number of string matches in current block */ ++ int last_eob_len; /* bit length of EOB code for last block */ ++ ++#ifdef DEBUG ++ ulg compressed_len; /* total bit length of compressed file mod 2^32 */ ++ ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ ++#endif ++ ++ ush bi_buf; ++ /* Output buffer. bits are inserted starting at the bottom (least ++ * significant bits). ++ */ ++ int bi_valid; ++ /* Number of valid bits in bi_buf. All bits above the last valid bit ++ * are always zero. ++ */ ++ ++} FAR deflate_state; ++ ++/* Output a byte on the stream. ++ * IN assertion: there is enough room in pending_buf. ++ */ ++#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} ++ ++ ++#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) ++/* Minimum amount of lookahead, except at the end of the input file. ++ * See deflate.c for comments about the MIN_MATCH+1. ++ */ ++ ++#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) ++/* In order to simplify the code, particularly on 16 bit machines, match ++ * distances are limited to MAX_DIST instead of WSIZE. ++ */ ++ ++ /* in trees.c */ ++void _tr_init OF((deflate_state *s)); ++int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); ++void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, ++ int eof)); ++void _tr_align OF((deflate_state *s)); ++void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, ++ int eof)); ++ ++#define d_code(dist) \ ++ ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) ++/* Mapping from a distance to a distance code. dist is the distance - 1 and ++ * must not have side effects. _dist_code[256] and _dist_code[257] are never ++ * used. ++ */ ++ ++#ifndef DEBUG ++/* Inline versions of _tr_tally for speed: */ ++ ++#if defined(GEN_TREES_H) || !defined(STDC) ++ extern uch _length_code[]; ++ extern uch _dist_code[]; ++#else ++ extern const uch _length_code[]; ++ extern const uch _dist_code[]; ++#endif ++ ++# define _tr_tally_lit(s, c, flush) \ ++ { uch cc = (c); \ ++ s->d_buf[s->last_lit] = 0; \ ++ s->l_buf[s->last_lit++] = cc; \ ++ s->dyn_ltree[cc].Freq++; \ ++ flush = (s->last_lit == s->lit_bufsize-1); \ ++ } ++# define _tr_tally_dist(s, distance, length, flush) \ ++ { uch len = (length); \ ++ ush dist = (distance); \ ++ s->d_buf[s->last_lit] = dist; \ ++ s->l_buf[s->last_lit++] = len; \ ++ dist--; \ ++ s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ ++ s->dyn_dtree[d_code(dist)].Freq++; \ ++ flush = (s->last_lit == s->lit_bufsize-1); \ ++ } ++#else ++# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) ++# define _tr_tally_dist(s, distance, length, flush) \ ++ flush = _tr_tally(s, distance, length) ++#endif ++ ++#endif /* _DEFLATE_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/COPYRIGHT Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,50 @@ ++Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++All rights reserved. ++ ++This package is an DES implementation written by Eric Young (eay@cryptsoft.com). ++The implementation was written so as to conform with MIT's libdes. ++ ++This library is free for commercial and non-commercial use as long as ++the following conditions are aheared to. The following conditions ++apply to all code found in this distribution. ++ ++Copyright remains Eric Young's, and as such any Copyright notices in ++the code are not to be removed. ++If this package is used in a product, Eric Young should be given attribution ++as the author of that the SSL library. This can be in the form of a textual ++message at program startup or in documentation (online or textual) provided ++with the package. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions ++are met: ++1. Redistributions of source code must retain the copyright ++ notice, this list of conditions and the following disclaimer. ++2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++3. All advertising materials mentioning features or use of this software ++ must display the following acknowledgement: ++ This product includes software developed by Eric Young (eay@cryptsoft.com) ++ ++THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++SUCH DAMAGE. ++ ++The license and distribution terms for any publically available version or ++derivative of this code cannot be changed. i.e. this code cannot simply be ++copied and put under another distrubution license ++[including the GNU Public License.] ++ ++The reason behind this being stated in this direct manner is past ++experience in code simply being copied and the attribution removed ++from it and then being distributed as part of other packages. This ++implementation was a non-trivial and unpaid effort. +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/INSTALL Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,69 @@ ++Check the CC and CFLAGS lines in the makefile ++ ++If your C library does not support the times(3) function, change the ++#define TIMES to ++#undef TIMES in speed.c ++If it does, check the HZ value for the times(3) function. ++If your system does not define CLK_TCK it will be assumed to ++be 100.0. ++ ++If possible use gcc v 2.7.? ++Turn on the maximum optimising (normally '-O3 -fomit-frame-pointer' for gcc) ++In recent times, some system compilers give better performace. ++ ++type 'make' ++ ++run './destest' to check things are ok. ++run './rpw' to check the tty code for reading passwords works. ++run './speed' to see how fast those optimisations make the library run :-) ++run './des_opts' to determin the best compile time options. ++ ++The output from des_opts should be put in the makefile options and des_enc.c ++should be rebuilt. For 64 bit computers, do not use the DES_PTR option. ++For the DEC Alpha, edit des.h and change DES_LONG to 'unsigned int' ++and then you can use the 'DES_PTR' option. ++ ++The file options.txt has the options listed for best speed on quite a ++few systems. Look and the options (UNROLL, PTR, RISC2 etc) and then ++turn on the relevent option in the Makefile ++ ++There are some special Makefile targets that make life easier. ++make cc - standard cc build ++make gcc - standard gcc build ++make x86-elf - x86 assembler (elf), linux-elf. ++make x86-out - x86 assembler (a.out), FreeBSD ++make x86-solaris- x86 assembler ++make x86-bsdi - x86 assembler (a.out with primative assembler). ++ ++If at all possible use the assembler (for Windows NT/95, use ++asm/win32.obj to link with). The x86 assembler is very very fast. ++ ++A make install will by default install ++libdes.a in /usr/local/lib/libdes.a ++des in /usr/local/bin/des ++des_crypt.man in /usr/local/man/man3/des_crypt.3 ++des.man in /usr/local/man/man1/des.1 ++des.h in /usr/include/des.h ++ ++des(1) should be compatible with sunOS's but I have been unable to ++test it. ++ ++These routines should compile on MSDOS, most 32bit and 64bit version ++of Unix (BSD and SYSV) and VMS, without modification. ++The only problems should be #include files that are in the wrong places. ++ ++These routines can be compiled under MSDOS. ++I have successfully encrypted files using des(1) under MSDOS and then ++decrypted the files on a SparcStation. ++I have been able to compile and test the routines with ++Microsoft C v 5.1 and Turbo C v 2.0. ++The code in this library is in no way optimised for the 16bit ++operation of MSDOS. ++ ++When building for glibc, ignore all of the above and just unpack into ++glibc-1.??/des and then gmake as per normal. ++ ++As a final note on performace. Certain CPUs like sparcs and Alpha often give ++a %10 speed difference depending on the link order. It is rather anoying ++when one program reports 'x' DES encrypts a second and another reports ++'x*0.9' the speed. +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/Makefile Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,63 @@ ++# Makefile for KLIPS kernel code as a module for 2.6 kernels ++# ++# Makefile for KLIPS kernel code as a module ++# Copyright (C) 1998, 1999, 2000,2001 Richard Guy Briggs. ++# Copyright (C) 2002-2004 Michael Richardson ++# ++# This program is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by the ++# Free Software Foundation; either version 2 of the License, or (at your ++# option) any later version. See . ++# ++# This program is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# for more details. ++# ++# RCSID $Id: Makefile.fs2_6,v 1.2.2.1 2005-08-12 16:10:57 ken Exp $ ++# ++# Note! Dependencies are done automagically by 'make dep', which also ++# removes any old dependencies. DON'T put your own dependencies here ++# unless it's something special (ie not a .c file). ++# ++ ++obj-$(CONFIG_KLIPS_ENC_3DES) += ipsec_alg_3des.o ++obj-$(CONFIG_KLIPS_ENC_3DES) += cbc_enc.o ++obj-$(CONFIG_KLIPS_ENC_3DES) += ecb_enc.o ++obj-$(CONFIG_KLIPS_ENC_3DES) += set_key.o ++ ++ifeq ($(strip ${SUBARCH}),) ++SUBARCH:=${ARCH} ++endif ++ ++# the assembly version expects frame pointers, which are ++# optional in many kernel builds. If you want speed, you should ++# probably use cryptoapi code instead. ++USEASSEMBLY=${SUBARCH}${CONFIG_FRAME_POINTER} ++ifeq (${USEASSEMBLY},i386y) ++obj-$(CONFIG_KLIPS_ENC_3DES) += dx86unix.o ++else ++obj-$(CONFIG_KLIPS_ENC_3DES) += des_enc.o ++endif ++ ++# ++# $Log: Makefile.fs2_6,v $ ++# Revision 1.2.2.1 2005-08-12 16:10:57 ken ++# do not use assembly code with there are no frame pointers ++# ++# Revision 1.3 2005/08/12 14:13:59 mcr ++# do not use assembly code with there are no frame pointers, ++# as it does not have the right linkages. ++# ++# Revision 1.2 2005/04/29 05:13:07 mcr ++# 3DES algorithm code. ++# ++# Revision 1.1 2004/08/17 03:27:30 mcr ++# klips 2.6 edits. ++# ++# ++# Local Variables: ++# compile-command: "(cd ../../.. && source umlsetup.sh && make -C ${POOLSPACE} module/ipsec.o)" ++# End Variables: ++# ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/README Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,54 @@ ++ ++ libdes, Version 4.01 10-Jan-97 ++ ++ Copyright (c) 1997, Eric Young ++ All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms specified in COPYRIGHT. ++ ++-- ++The primary ftp site for this library is ++ftp://ftp.psy.uq.oz.au/pub/Crypto/DES/libdes-x.xx.tar.gz ++libdes is now also shipped with SSLeay. Primary ftp site of ++ftp://ftp.psy.uq.oz.au/pub/Crypto/SSL/SSLeay-x.x.x.tar.gz ++ ++The best way to build this library is to build it as part of SSLeay. ++ ++This kit builds a DES encryption library and a DES encryption program. ++It supports ecb, cbc, ofb, cfb, triple ecb, triple cbc, triple ofb, ++triple cfb, desx, and MIT's pcbc encryption modes and also has a fast ++implementation of crypt(3). ++It contains support routines to read keys from a terminal, ++generate a random key, generate a key from an arbitrary length string, ++read/write encrypted data from/to a file descriptor. ++ ++The implementation was written so as to conform with the manual entry ++for the des_crypt(3) library routines from MIT's project Athena. ++ ++destest should be run after compilation to test the des routines. ++rpw should be run after compilation to test the read password routines. ++The des program is a replacement for the sun des command. I believe it ++conforms to the sun version. ++ ++The Imakefile is setup for use in the kerberos distribution. ++ ++These routines are best compiled with gcc or any other good ++optimising compiler. ++Just turn you optimiser up to the highest settings and run destest ++after the build to make sure everything works. ++ ++I believe these routines are close to the fastest and most portable DES ++routines that use small lookup tables (4.5k) that are publicly available. ++The fcrypt routine is faster than ufc's fcrypt (when compiling with ++gcc2 -O2) on the sparc 2 (1410 vs 1270) but is not so good on other machines ++(on a sun3/260 168 vs 336). It is a function of CPU on chip cache size. ++[ 10-Jan-97 and a function of an incorrect speed testing program in ++ ufc which gave much better test figures that reality ]. ++ ++It is worth noting that on sparc and Alpha CPUs, performance of the DES ++library can vary by upto %10 due to the positioning of files after application ++linkage. ++ ++Eric Young (eay@cryptsoft.com) ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/README.freeswan Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,33 @@ ++The only changes the FreeS/WAN project has made to libdes-lite 4.04b are: ++ ++We #ifdef-ed the declaration of DES_LONG in des.h, so it's more efficient ++on the Alpha, instead of just noting the issue in a comment. ++ ++We #ifdef-ed out the des_options() function in ecb_enc.c, because we don't ++use it, and its call to sprintf() can cause subtle difficulties when KLIPS ++is built as a module (depending on details of Linux configuration options). ++ ++We changed some instances of CC=$(CC) in the Makefile to CC='$(CC)' to make ++it cope better with Linux kernel Makefile stupidities, and took out an ++explicit CC=gcc (unwise on systems with strange compilers). ++ ++We deleted some references to and , and a declaration ++of one function found only in the full libdes (not in libdes-lite), to ++avoid dragging in bits of stdio/stdlib unnecessarily. (Our thanks to Hans ++Schultz for spotting this and pointing out the fixes.) ++ ++We deleted a couple of .obj files in the asm subdirectory, which appear to ++have been included in the original library by accident. ++ ++We have added an include of our Makefile.inc file, to permit overriding ++things like choice of compiler (although the libdes Makefile would ++probably need some work to make this effective). ++ ++ ++ ++Note that Eric Young is no longer at the email address listed in these ++files, and is (alas) no longer working on free crypto software. ++ ++ ++ ++This file is RCSID $Id: README.freeswan,v 1.12 2004-07-10 08:06:51 mcr Exp $ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/VERSION Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,406 @@ ++Version 4.04 ++ Fixed a few tests in destest. Also added x86 assember for ++ des_ncbc_encrypt() which is the standard cbc mode function. ++ This makes a very very large performace difference. ++ Ariel Glenn ariel@columbia.edu reports that the terminal ++ 'turn echo off' can return (errno == EINVAL) under solaris ++ when redirection is used. So I now catch that as well as ENOTTY. ++ ++ ++Version 4.03 ++ Left a static out of enc_write.c, which caused to buffer to be ++ continiously malloc()ed. Does anyone use these functions? I keep ++ on feeling like removing them since I only had these in there ++ for a version of kerberised login. Anyway, this was pointed out ++ by Theo de Raadt ++ The 'n' bit ofb code was wrong, it was not shifting the shift ++ register. It worked correctly for n == 64. Thanks to ++ Gigi Ankeny for pointing this one out. ++ ++Version 4.02 ++ I was doing 'if (memcmp(weak_keys[i],key,sizeof(key)) == 0)' ++ when checking for weak keys which is wrong :-(, pointed out by ++ Markus F.X.J. Oberhumer . ++ ++Version 4.01 ++ Even faster inner loop in the DES assembler for x86 and a modification ++ for IP/FP which is faster on x86. Both of these changes are ++ from Svend Olaf Mikkelsen . His ++ changes make the assembler run %40 faster on a pentium. This is just ++ a case of getting the instruction sequence 'just right'. ++ All credit to 'Svend' :-) ++ Quite a few special x86 'make' targets. ++ A libdes-l (lite) distribution. ++ ++Version 4.00 ++ After a bit of a pause, I'll up the major version number since this ++ is mostly a performace release. I've added x86 assembler and ++ added more options for performance. A %28 speedup for gcc ++ on a pentium and the assembler is a %50 speedup. ++ MIPS CPU's, sparc and Alpha are the main CPU's with speedups. ++ Run des_opts to work out which options should be used. ++ DES_RISC1/DES_RISC2 use alternative inner loops which use ++ more registers but should give speedups on any CPU that does ++ dual issue (pentium). DES_UNROLL unrolls the inner loop, ++ which costs in code size. ++ ++Version 3.26 ++ I've finally removed one of the shifts in D_ENCRYPT. This ++ meant I've changed the des_SPtrans table (spr.h), the set_key() ++ function and some things in des_enc.c. This has definitly ++ made things faster :-). I've known about this one for some ++ time but I've been too lazy to follow it up :-). ++ Noticed that in the D_ENCRYPT() macro, we can just do L^=(..)^(..)^.. ++ instead of L^=((..)|(..)|(..).. This should save a register at ++ least. ++ Assember for x86. The file to replace is des_enc.c, which is replaced ++ by one of the assembler files found in asm. Look at des/asm/readme ++ for more info. ++ ++ /* Modification to fcrypt so it can be compiled to support ++ HPUX 10.x's long password format, define -DLONGCRYPT to use this. ++ Thanks to Jens Kupferschmidt . */ ++ ++ SIGWINCH case put in des_read_passwd() so the function does not ++ 'exit' if this function is recieved. ++ ++Version 3.25 17/07/96 ++ Modified read_pwd.c so that stdin can be read if not a tty. ++ Thanks to Jeff Barber for the patches. ++ des_init_random_number_generator() shortened due to VMS linker ++ limits. ++ Added RSA's DESX cbc mode. It is a form of cbc encryption, with 2 ++ 8 byte quantites xored before and after encryption. ++ des_xcbc_encryption() - the name is funny to preserve the des_ ++ prefix on all functions. ++ ++Version 3.24 20/04/96 ++ The DES_PTR macro option checked and used by SSLeay configuration ++ ++Version 3.23 11/04/96 ++ Added DES_LONG. If defined to 'unsigned int' on the DEC Alpha, ++ it gives a %20 speedup :-) ++ Fixed the problem with des.pl under perl5. The patches were ++ sent by Ed Kubaitis (ejk@uiuc.edu). ++ if fcrypt.c, changed values to handle illegal salt values the way ++ normal crypt() implementations do. Some programs apparently use ++ them :-(. The patch was sent by Bjorn Gronvall ++ ++Version 3.22 29/11/95 ++ Bug in des(1), an error with the uuencoding stuff when the ++ 'data' is small, thanks to Geoff Keating ++ for the patch. ++ ++Version 3.21 22/11/95 ++ After some emailing back and forth with ++ Colin Plumb , I've tweaked a few things ++ and in a future version I will probably put in some of the ++ optimisation he suggested for use with the DES_USE_PTR option. ++ Extra routines from Mark Murray for use in ++ freeBSD. They mostly involve random number generation for use ++ with kerberos. They involve evil machine specific system calls ++ etc so I would normally suggest pushing this stuff into the ++ application and/or using RAND_seed()/RAND_bytes() if you are ++ using this DES library as part of SSLeay. ++ Redone the read_pw() function so that it is cleaner and ++ supports termios, thanks to Sameer Parekh ++ for the initial patches for this. ++ Renamed 3ecb_encrypt() to ecb3_encrypt(). This has been ++ done just to make things more consistent. ++ I have also now added triple DES versions of cfb and ofb. ++ ++Version 3.20 ++ Damn, Damn, Damn, as pointed out by Mike_Spreitzer.PARC@xerox.com, ++ my des_random_seed() function was only copying 4 bytes of the ++ passed seed into the init structure. It is now fixed to copy 8. ++ My own suggestion is to used something like MD5 :-) ++ ++Version 3.19 ++ While looking at my code one day, I though, why do I keep on ++ calling des_encrypt(in,out,ks,enc) when every function that ++ calls it has in and out the same. So I dropped the 'out' ++ parameter, people should not be using this function. ++ ++Version 3.18 30/08/95 ++ Fixed a few bit with the distribution and the filenames. ++ 3.17 had been munged via a move to DOS and back again. ++ NO CODE CHANGES ++ ++Version 3.17 14/07/95 ++ Fixed ede3 cbc which I had broken in 3.16. I have also ++ removed some unneeded variables in 7-8 of the routines. ++ ++Version 3.16 26/06/95 ++ Added des_encrypt2() which does not use IP/FP, used by triple ++ des routines. Tweaked things a bit elsewhere. %13 speedup on ++ sparc and %6 on a R4400 for ede3 cbc mode. ++ ++Version 3.15 06/06/95 ++ Added des_ncbc_encrypt(), it is des_cbc mode except that it is ++ 'normal' and copies the new iv value back over the top of the ++ passed parameter. ++ CHANGED des_ede3_cbc_encrypt() so that it too now overwrites ++ the iv. THIS WILL BREAK EXISTING CODE, but since this function ++ only new, I feel I can change it, not so with des_cbc_encrypt :-(. ++ I need to update the documentation. ++ ++Version 3.14 31/05/95 ++ New release upon the world, as part of my SSL implementation. ++ New copyright and usage stuff. Basically free for all to use ++ as long as you say it came from me :-) ++ ++Version 3.13 31/05/95 ++ A fix in speed.c, if HZ is not defined, I set it to 100.0 ++ which is reasonable for most unixes except SunOS 4.x. ++ I now have a #ifdef sun but timing for SunOS 4.x looked very ++ good :-(. At my last job where I used SunOS 4.x, it was ++ defined to be 60.0 (look at the old INSTALL documentation), at ++ the last release had it changed to 100.0 since I now work with ++ Solaris2 and SVR4 boxes. ++ Thanks to Rory Chisholm for pointing this ++ one out. ++ ++Version 3.12 08/05/95 ++ As pointed out by The Crypt Keeper , ++ my D_ENCRYPT macro in crypt() had an un-necessary variable. ++ It has been removed. ++ ++Version 3.11 03/05/95 ++ Added des_ede3_cbc_encrypt() which is cbc mode des with 3 keys ++ and one iv. It is a standard and I needed it for my SSL code. ++ It makes more sense to use this for triple DES than ++ 3cbc_encrypt(). I have also added (or should I say tested :-) ++ cfb64_encrypt() which is cfb64 but it will encrypt a partial ++ number of bytes - 3 bytes in 3 bytes out. Again this is for ++ my SSL library, as a form of encryption to use with SSL ++ telnet. ++ ++Version 3.10 22/03/95 ++ Fixed a bug in 3cbc_encrypt() :-(. When making repeated calls ++ to cbc3_encrypt, the 2 iv values that were being returned to ++ be used in the next call were reversed :-(. ++ Many thanks to Bill Wade for pointing out ++ this error. ++ ++Version 3.09 01/02/95 ++ Fixed des_random_key to far more random, it was rather feeble ++ with regards to picking the initial seed. The problem was ++ pointed out by Olaf Kirch . ++ ++Version 3.08 14/12/94 ++ Added Makefile.PL so libdes can be built into perl5. ++ Changed des_locl.h so RAND is always defined. ++ ++Version 3.07 05/12/94 ++ Added GNUmake and stuff so the library can be build with ++ glibc. ++ ++Version 3.06 30/08/94 ++ Added rpc_enc.c which contains _des_crypt. This is for use in ++ secure_rpc v 4.0 ++ Finally fixed the cfb_enc problems. ++ Fixed a few parameter parsing bugs in des (-3 and -b), thanks ++ to Rob McMillan ++ ++Version 3.05 21/04/94 ++ for unsigned long l; gcc does not produce ((l>>34) == 0) ++ This causes bugs in cfb_enc. ++ Thanks to Hadmut Danisch ++ ++Version 3.04 20/04/94 ++ Added a version number to des.c and libdes.a ++ ++Version 3.03 12/01/94 ++ Fixed a bug in non zero iv in 3cbc_enc. ++ ++Version 3.02 29/10/93 ++ I now work in a place where there are 6+ architectures and 14+ ++ OS versions :-). ++ Fixed TERMIO definition so the most sys V boxes will work :-) ++ ++Release upon comp.sources.misc ++Version 3.01 08/10/93 ++ Added des_3cbc_encrypt() ++ ++Version 3.00 07/10/93 ++ Fixed up documentation. ++ quad_cksum definitely compatible with MIT's now. ++ ++Version 2.30 24/08/93 ++ Triple DES now defaults to triple cbc but can do triple ecb ++ with the -b flag. ++ Fixed some MSDOS uuen/uudecoding problems, thanks to ++ Added prototypes. ++ ++Version 2.22 29/06/93 ++ Fixed a bug in des_is_weak_key() which stopped it working :-( ++ thanks to engineering@MorningStar.Com. ++ ++Version 2.21 03/06/93 ++ des(1) with no arguments gives quite a bit of help. ++ Added -c (generate ckecksum) flag to des(1). ++ Added -3 (triple DES) flag to des(1). ++ Added cfb and ofb routines to the library. ++ ++Version 2.20 11/03/93 ++ Added -u (uuencode) flag to des(1). ++ I have been playing with byte order in quad_cksum to make it ++ compatible with MIT's version. All I can say is avid this ++ function if possible since MIT's output is endian dependent. ++ ++Version 2.12 14/10/92 ++ Added MSDOS specific macro in ecb_encrypt which gives a %70 ++ speed up when the code is compiled with turbo C. ++ ++Version 2.11 12/10/92 ++ Speedup in set_key (recoding of PC-1) ++ I now do it in 47 simple operations, down from 60. ++ Thanks to John Fletcher (john_fletcher@lccmail.ocf.llnl.gov) ++ for motivating me to look for a faster system :-) ++ The speedup is probably less that 1% but it is still 13 ++ instructions less :-). ++ ++Version 2.10 06/10/92 ++ The code now works on the 64bit ETA10 and CRAY without modifications or ++ #defines. I believe the code should work on any machine that ++ defines long, int or short to be 8 bytes long. ++ Thanks to Shabbir J. Safdar (shabby@mentor.cc.purdue.edu) ++ for helping me fix the code to run on 64bit machines (he had ++ access to an ETA10). ++ Thanks also to John Fletcher ++ for testing the routines on a CRAY. ++ read_password.c has been renamed to read_passwd.c ++ string_to_key.c has been renamed to string2key.c ++ ++Version 2.00 14/09/92 ++ Made mods so that the library should work on 64bit CPU's. ++ Removed all my uchar and ulong defs. To many different ++ versions of unix define them in their header files in too many ++ different combinations :-) ++ IRIX - Sillicon Graphics mods (mostly in read_password.c). ++ Thanks to Andrew Daviel (advax@erich.triumf.ca) ++ ++Version 1.99 26/08/92 ++ Fixed a bug or 2 in enc_read.c ++ Fixed a bug in enc_write.c ++ Fixed a pseudo bug in fcrypt.c (very obscure). ++ ++Version 1.98 31/07/92 ++ Support for the ETA10. This is a strange machine that defines ++ longs and ints as 8 bytes and shorts as 4 bytes. ++ Since I do evil things with long * that assume that they are 4 ++ bytes. Look in the Makefile for the option to compile for ++ this machine. quad_cksum appears to have problems but I ++ will don't have the time to fix it right now, and this is not ++ a function that uses DES and so will not effect the main uses ++ of the library. ++ ++Version 1.97 20/05/92 eay ++ Fixed the Imakefile and made some changes to des.h to fix some ++ problems when building this package with Kerberos v 4. ++ ++Version 1.96 18/05/92 eay ++ Fixed a small bug in string_to_key() where problems could ++ occur if des_check_key was set to true and the string ++ generated a weak key. ++ ++Patch2 posted to comp.sources.misc ++Version 1.95 13/05/92 eay ++ Added an alternative version of the D_ENCRYPT macro in ++ ecb_encrypt and fcrypt. Depending on the compiler, one version or the ++ other will be faster. This was inspired by ++ Dana How , and her pointers about doing the ++ *(ulong *)((uchar *)ptr+(value&0xfc)) ++ vs ++ ptr[value&0x3f] ++ to stop the C compiler doing a <<2 to convert the long array index. ++ ++Version 1.94 05/05/92 eay ++ Fixed an incompatibility between my string_to_key and the MIT ++ version. When the key is longer than 8 chars, I was wrapping ++ with a different method. To use the old version, define ++ OLD_STR_TO_KEY in the makefile. Thanks to ++ viktor@newsu.shearson.com (Viktor Dukhovni). ++ ++Version 1.93 28/04/92 eay ++ Fixed the VMS mods so that echo is now turned off in ++ read_password. Thanks again to brennan@coco.cchs.su.oz.AU. ++ MSDOS support added. The routines can be compiled with ++ Turbo C (v2.0) and MSC (v5.1). Make sure MSDOS is defined. ++ ++Patch1 posted to comp.sources.misc ++Version 1.92 13/04/92 eay ++ Changed D_ENCRYPT so that the rotation of R occurs outside of ++ the loop. This required rotating all the longs in sp.h (now ++ called spr.h). Thanks to Richard Outerbridge <71755.204@CompuServe.COM> ++ speed.c has been changed so it will work without SIGALRM. If ++ times(3) is not present it will try to use ftime() instead. ++ ++Version 1.91 08/04/92 eay ++ Added -E/-D options to des(1) so it can use string_to_key. ++ Added SVR4 mods suggested by witr@rwwa.COM ++ Added VMS mods suggested by brennan@coco.cchs.su.oz.AU. If ++ anyone knows how to turn of tty echo in VMS please tell me or ++ implement it yourself :-). ++ Changed FILE *IN/*OUT to *DES_IN/*DES_OUT since it appears VMS ++ does not like IN/OUT being used. ++ ++Libdes posted to comp.sources.misc ++Version 1.9 24/03/92 eay ++ Now contains a fast small crypt replacement. ++ Added des(1) command. ++ Added des_rw_mode so people can use cbc encryption with ++ enc_read and enc_write. ++ ++Version 1.8 15/10/91 eay ++ Bug in cbc_cksum. ++ Many thanks to Keith Reynolds (keithr@sco.COM) for pointing this ++ one out. ++ ++Version 1.7 24/09/91 eay ++ Fixed set_key :-) ++ set_key is 4 times faster and takes less space. ++ There are a few minor changes that could be made. ++ ++Version 1.6 19/09/1991 eay ++ Finally go IP and FP finished. ++ Now I need to fix set_key. ++ This version is quite a bit faster that 1.51 ++ ++Version 1.52 15/06/1991 eay ++ 20% speedup in ecb_encrypt by changing the E bit selection ++ to use 2 32bit words. This also required modification of the ++ sp table. There is still a way to speedup the IP and IP-1 ++ (hints from outer@sq.com) still working on this one :-(. ++ ++Version 1.51 07/06/1991 eay ++ Faster des_encrypt by loop unrolling ++ Fixed bug in quad_cksum.c (thanks to hughes@logos.ucs.indiana.edu) ++ ++Version 1.50 28/05/1991 eay ++ Optimised the code a bit more for the sparc. I have improved the ++ speed of the inner des_encrypt by speeding up the initial and ++ final permutations. ++ ++Version 1.40 23/10/1990 eay ++ Fixed des_random_key, it did not produce a random key :-( ++ ++Version 1.30 2/10/1990 eay ++ Have made des_quad_cksum the same as MIT's, the full package ++ should be compatible with MIT's ++ Have tested on a DECstation 3100 ++ Still need to fix des_set_key (make it faster). ++ Does des_cbc_encrypts at 70.5k/sec on a 3100. ++ ++Version 1.20 18/09/1990 eay ++ Fixed byte order dependencies. ++ Fixed (I hope) all the word alignment problems. ++ Speedup in des_ecb_encrypt. ++ ++Version 1.10 11/09/1990 eay ++ Added des_enc_read and des_enc_write. ++ Still need to fix des_quad_cksum. ++ Still need to document des_enc_read and des_enc_write. ++ ++Version 1.00 27/08/1990 eay ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/asm/des-586.pl Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,251 @@ ++#!/usr/local/bin/perl ++# ++# The inner loop instruction sequence and the IP/FP modifications are from ++# Svend Olaf Mikkelsen ++# ++ ++push(@INC,"perlasm","../../perlasm"); ++require "x86asm.pl"; ++require "cbc.pl"; ++require "desboth.pl"; ++ ++# base code is in microsft ++# op dest, source ++# format. ++# ++ ++&asm_init($ARGV[0],"des-586.pl"); ++ ++$L="edi"; ++$R="esi"; ++ ++&external_label("des_SPtrans"); ++&des_encrypt("des_encrypt",1); ++&des_encrypt("des_encrypt2",0); ++&des_encrypt3("des_encrypt3",1); ++&des_encrypt3("des_decrypt3",0); ++&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1); ++&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5); ++ ++&asm_finish(); ++ ++sub des_encrypt ++ { ++ local($name,$do_ip)=@_; ++ ++ &function_begin_B($name,"EXTRN _des_SPtrans:DWORD"); ++ ++ &push("esi"); ++ &push("edi"); ++ ++ &comment(""); ++ &comment("Load the 2 words"); ++ $ks="ebp"; ++ ++ if ($do_ip) ++ { ++ &mov($R,&wparam(0)); ++ &xor( "ecx", "ecx" ); ++ ++ &push("ebx"); ++ &push("ebp"); ++ ++ &mov("eax",&DWP(0,$R,"",0)); ++ &mov("ebx",&wparam(2)); # get encrypt flag ++ &mov($L,&DWP(4,$R,"",0)); ++ &comment(""); ++ &comment("IP"); ++ &IP_new("eax",$L,$R,3); ++ } ++ else ++ { ++ &mov("eax",&wparam(0)); ++ &xor( "ecx", "ecx" ); ++ ++ &push("ebx"); ++ &push("ebp"); ++ ++ &mov($R,&DWP(0,"eax","",0)); ++ &mov("ebx",&wparam(2)); # get encrypt flag ++ &rotl($R,3); ++ &mov($L,&DWP(4,"eax","",0)); ++ &rotl($L,3); ++ } ++ ++ &mov( $ks, &wparam(1) ); ++ &cmp("ebx","0"); ++ &je(&label("start_decrypt")); ++ ++ for ($i=0; $i<16; $i+=2) ++ { ++ &comment(""); ++ &comment("Round $i"); ++ &D_ENCRYPT($i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); ++ ++ &comment(""); ++ &comment("Round ".sprintf("%d",$i+1)); ++ &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); ++ } ++ &jmp(&label("end")); ++ ++ &set_label("start_decrypt"); ++ ++ for ($i=15; $i>0; $i-=2) ++ { ++ &comment(""); ++ &comment("Round $i"); ++ &D_ENCRYPT(15-$i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); ++ &comment(""); ++ &comment("Round ".sprintf("%d",$i-1)); ++ &D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx"); ++ } ++ ++ &set_label("end"); ++ ++ if ($do_ip) ++ { ++ &comment(""); ++ &comment("FP"); ++ &mov("edx",&wparam(0)); ++ &FP_new($L,$R,"eax",3); ++ ++ &mov(&DWP(0,"edx","",0),"eax"); ++ &mov(&DWP(4,"edx","",0),$R); ++ } ++ else ++ { ++ &comment(""); ++ &comment("Fixup"); ++ &rotr($L,3); # r ++ &mov("eax",&wparam(0)); ++ &rotr($R,3); # l ++ &mov(&DWP(0,"eax","",0),$L); ++ &mov(&DWP(4,"eax","",0),$R); ++ } ++ ++ &pop("ebp"); ++ &pop("ebx"); ++ &pop("edi"); ++ &pop("esi"); ++ &ret(); ++ ++ &function_end_B($name); ++ } ++ ++sub D_ENCRYPT ++ { ++ local($r,$L,$R,$S,$ks,$desSP,$u,$tmp1,$tmp2,$t)=@_; ++ ++ &mov( $u, &DWP(&n2a($S*4),$ks,"",0)); ++ &xor( $tmp1, $tmp1); ++ &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0)); ++ &xor( $u, $R); ++ &xor( $t, $R); ++ &and( $u, "0xfcfcfcfc" ); ++ &and( $t, "0xcfcfcfcf" ); ++ &movb( &LB($tmp1), &LB($u) ); ++ &movb( &LB($tmp2), &HB($u) ); ++ &rotr( $t, 4 ); ++ &mov( $ks, &DWP(" $desSP",$tmp1,"",0)); ++ &movb( &LB($tmp1), &LB($t) ); ++ &xor( $L, $ks); ++ &mov( $ks, &DWP("0x200+$desSP",$tmp2,"",0)); ++ &xor( $L, $ks); ###### ++ &movb( &LB($tmp2), &HB($t) ); ++ &shr( $u, 16); ++ &mov( $ks, &DWP("0x100+$desSP",$tmp1,"",0)); ++ &xor( $L, $ks); ###### ++ &movb( &LB($tmp1), &HB($u) ); ++ &shr( $t, 16); ++ &mov( $ks, &DWP("0x300+$desSP",$tmp2,"",0)); ++ &xor( $L, $ks); ++ &mov( $ks, &wparam(1) ); ++ &movb( &LB($tmp2), &HB($t) ); ++ &and( $u, "0xff" ); ++ &and( $t, "0xff" ); ++ &mov( $tmp1, &DWP("0x600+$desSP",$tmp1,"",0)); ++ &xor( $L, $tmp1); ++ &mov( $tmp1, &DWP("0x700+$desSP",$tmp2,"",0)); ++ &xor( $L, $tmp1); ++ &mov( $tmp1, &DWP("0x400+$desSP",$u,"",0)); ++ &xor( $L, $tmp1); ++ &mov( $tmp1, &DWP("0x500+$desSP",$t,"",0)); ++ &xor( $L, $tmp1); ++ } ++ ++sub n2a ++ { ++ sprintf("%d",$_[0]); ++ } ++ ++# now has a side affect of rotating $a by $shift ++sub R_PERM_OP ++ { ++ local($a,$b,$tt,$shift,$mask,$last)=@_; ++ ++ &rotl( $a, $shift ) if ($shift != 0); ++ &mov( $tt, $a ); ++ &xor( $a, $b ); ++ &and( $a, $mask ); ++ if (!$last eq $b) ++ { ++ &xor( $b, $a ); ++ &xor( $tt, $a ); ++ } ++ else ++ { ++ &xor( $tt, $a ); ++ &xor( $b, $a ); ++ } ++ &comment(""); ++ } ++ ++sub IP_new ++ { ++ local($l,$r,$tt,$lr)=@_; ++ ++ &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); ++ &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); ++ &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); ++ &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); ++ &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); ++ ++ if ($lr != 3) ++ { ++ if (($lr-3) < 0) ++ { &rotr($tt, 3-$lr); } ++ else { &rotl($tt, $lr-3); } ++ } ++ if ($lr != 2) ++ { ++ if (($lr-2) < 0) ++ { &rotr($r, 2-$lr); } ++ else { &rotl($r, $lr-2); } ++ } ++ } ++ ++sub FP_new ++ { ++ local($l,$r,$tt,$lr)=@_; ++ ++ if ($lr != 2) ++ { ++ if (($lr-2) < 0) ++ { &rotl($r, 2-$lr); } ++ else { &rotr($r, $lr-2); } ++ } ++ if ($lr != 3) ++ { ++ if (($lr-3) < 0) ++ { &rotl($l, 3-$lr); } ++ else { &rotr($l, $lr-3); } ++ } ++ ++ &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); ++ &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); ++ &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); ++ &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); ++ &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); ++ &rotr($tt , 4); ++ } ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/asm/des686.pl Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,230 @@ ++#!/usr/local/bin/perl ++ ++$prog="des686.pl"; ++ ++# base code is in microsft ++# op dest, source ++# format. ++# ++ ++# WILL NOT WORK ANYMORE WITH desboth.pl ++require "desboth.pl"; ++ ++if ( ($ARGV[0] eq "elf")) ++ { require "x86unix.pl"; } ++elsif ( ($ARGV[0] eq "a.out")) ++ { $aout=1; require "x86unix.pl"; } ++elsif ( ($ARGV[0] eq "sol")) ++ { $sol=1; require "x86unix.pl"; } ++elsif ( ($ARGV[0] eq "cpp")) ++ { $cpp=1; require "x86unix.pl"; } ++elsif ( ($ARGV[0] eq "win32")) ++ { require "x86ms.pl"; } ++else ++ { ++ print STDERR <<"EOF"; ++Pick one target type from ++ elf - linux, FreeBSD etc ++ a.out - old linux ++ sol - x86 solaris ++ cpp - format so x86unix.cpp can be used ++ win32 - Windows 95/Windows NT ++EOF ++ exit(1); ++ } ++ ++&comment("Don't even think of reading this code"); ++&comment("It was automatically generated by $prog"); ++&comment("Which is a perl program used to generate the x86 assember for"); ++&comment("any of elf, a.out, Win32, or Solaris"); ++&comment("It can be found in SSLeay 0.6.5+ or in libdes 3.26+"); ++&comment("eric "); ++&comment(""); ++ ++&file("dx86xxxx"); ++ ++$L="edi"; ++$R="esi"; ++ ++&des_encrypt("des_encrypt",1); ++&des_encrypt("des_encrypt2",0); ++ ++&des_encrypt3("des_encrypt3",1); ++&des_encrypt3("des_decrypt3",0); ++ ++&file_end(); ++ ++sub des_encrypt ++ { ++ local($name,$do_ip)=@_; ++ ++ &function_begin($name,"EXTRN _des_SPtrans:DWORD"); ++ ++ &comment(""); ++ &comment("Load the 2 words"); ++ &mov("eax",&wparam(0)); ++ &mov($L,&DWP(0,"eax","",0)); ++ &mov($R,&DWP(4,"eax","",0)); ++ ++ $ksp=&wparam(1); ++ ++ if ($do_ip) ++ { ++ &comment(""); ++ &comment("IP"); ++ &IP_new($L,$R,"eax"); ++ } ++ ++ &comment(""); ++ &comment("fixup rotate"); ++ &rotl($R,3); ++ &rotl($L,3); ++ &exch($L,$R); ++ ++ &comment(""); ++ &comment("load counter, key_schedule and enc flag"); ++ &mov("eax",&wparam(2)); # get encrypt flag ++ &mov("ebp",&wparam(1)); # get ks ++ &cmp("eax","0"); ++ &je(&label("start_decrypt")); ++ ++ # encrypting part ++ ++ for ($i=0; $i<16; $i+=2) ++ { ++ &comment(""); ++ &comment("Round $i"); ++ &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); ++ ++ &comment(""); ++ &comment("Round ".sprintf("%d",$i+1)); ++ &D_ENCRYPT($R,$L,($i+1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); ++ } ++ &jmp(&label("end")); ++ ++ &set_label("start_decrypt"); ++ ++ for ($i=15; $i>0; $i-=2) ++ { ++ &comment(""); ++ &comment("Round $i"); ++ &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); ++ &comment(""); ++ &comment("Round ".sprintf("%d",$i-1)); ++ &D_ENCRYPT($R,$L,($i-1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx"); ++ } ++ ++ &set_label("end"); ++ ++ &comment(""); ++ &comment("Fixup"); ++ &rotr($L,3); # r ++ &rotr($R,3); # l ++ ++ if ($do_ip) ++ { ++ &comment(""); ++ &comment("FP"); ++ &FP_new($R,$L,"eax"); ++ } ++ ++ &mov("eax",&wparam(0)); ++ &mov(&DWP(0,"eax","",0),$L); ++ &mov(&DWP(4,"eax","",0),$R); ++ ++ &function_end($name); ++ } ++ ++ ++# The logic is to load R into 2 registers and operate on both at the same time. ++# We also load the 2 R's into 2 more registers so we can do the 'move word down a byte' ++# while also masking the other copy and doing a lookup. We then also accumulate the ++# L value in 2 registers then combine them at the end. ++sub D_ENCRYPT ++ { ++ local($L,$R,$S,$ks,$desSP,$u,$t,$tmp1,$tmp2,$tmp3)=@_; ++ ++ &mov( $u, &DWP(&n2a($S*4),$ks,"",0)); ++ &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0)); ++ &xor( $u, $R ); ++ &xor( $t, $R ); ++ &rotr( $t, 4 ); ++ ++ # the numbers at the end of the line are origional instruction order ++ &mov( $tmp2, $u ); # 1 2 ++ &mov( $tmp1, $t ); # 1 1 ++ &and( $tmp2, "0xfc" ); # 1 4 ++ &and( $tmp1, "0xfc" ); # 1 3 ++ &shr( $t, 8 ); # 1 5 ++ &xor( $L, &DWP("0x100+$desSP",$tmp1,"",0)); # 1 7 ++ &shr( $u, 8 ); # 1 6 ++ &mov( $tmp1, &DWP(" $desSP",$tmp2,"",0)); # 1 8 ++ ++ &mov( $tmp2, $u ); # 2 2 ++ &xor( $L, $tmp1 ); # 1 9 ++ &and( $tmp2, "0xfc" ); # 2 4 ++ &mov( $tmp1, $t ); # 2 1 ++ &and( $tmp1, "0xfc" ); # 2 3 ++ &shr( $t, 8 ); # 2 5 ++ &xor( $L, &DWP("0x300+$desSP",$tmp1,"",0)); # 2 7 ++ &shr( $u, 8 ); # 2 6 ++ &mov( $tmp1, &DWP("0x200+$desSP",$tmp2,"",0)); # 2 8 ++ &mov( $tmp2, $u ); # 3 2 ++ ++ &xor( $L, $tmp1 ); # 2 9 ++ &and( $tmp2, "0xfc" ); # 3 4 ++ ++ &mov( $tmp1, $t ); # 3 1 ++ &shr( $u, 8 ); # 3 6 ++ &and( $tmp1, "0xfc" ); # 3 3 ++ &shr( $t, 8 ); # 3 5 ++ &xor( $L, &DWP("0x500+$desSP",$tmp1,"",0)); # 3 7 ++ &mov( $tmp1, &DWP("0x400+$desSP",$tmp2,"",0)); # 3 8 ++ ++ &and( $t, "0xfc" ); # 4 1 ++ &xor( $L, $tmp1 ); # 3 9 ++ ++ &and( $u, "0xfc" ); # 4 2 ++ &xor( $L, &DWP("0x700+$desSP",$t,"",0)); # 4 3 ++ &xor( $L, &DWP("0x600+$desSP",$u,"",0)); # 4 4 ++ } ++ ++sub PERM_OP ++ { ++ local($a,$b,$tt,$shift,$mask)=@_; ++ ++ &mov( $tt, $a ); ++ &shr( $tt, $shift ); ++ &xor( $tt, $b ); ++ &and( $tt, $mask ); ++ &xor( $b, $tt ); ++ &shl( $tt, $shift ); ++ &xor( $a, $tt ); ++ } ++ ++sub IP_new ++ { ++ local($l,$r,$tt)=@_; ++ ++ &PERM_OP($r,$l,$tt, 4,"0x0f0f0f0f"); ++ &PERM_OP($l,$r,$tt,16,"0x0000ffff"); ++ &PERM_OP($r,$l,$tt, 2,"0x33333333"); ++ &PERM_OP($l,$r,$tt, 8,"0x00ff00ff"); ++ &PERM_OP($r,$l,$tt, 1,"0x55555555"); ++ } ++ ++sub FP_new ++ { ++ local($l,$r,$tt)=@_; ++ ++ &PERM_OP($l,$r,$tt, 1,"0x55555555"); ++ &PERM_OP($r,$l,$tt, 8,"0x00ff00ff"); ++ &PERM_OP($l,$r,$tt, 2,"0x33333333"); ++ &PERM_OP($r,$l,$tt,16,"0x0000ffff"); ++ &PERM_OP($l,$r,$tt, 4,"0x0f0f0f0f"); ++ } ++ ++sub n2a ++ { ++ sprintf("%d",$_[0]); ++ } +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/asm/desboth.pl Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,79 @@ ++#!/usr/local/bin/perl ++ ++$L="edi"; ++$R="esi"; ++ ++sub des_encrypt3 ++ { ++ local($name,$enc)=@_; ++ ++ &function_begin_B($name,""); ++ &push("ebx"); ++ &mov("ebx",&wparam(0)); ++ ++ &push("ebp"); ++ &push("esi"); ++ ++ &push("edi"); ++ ++ &comment(""); ++ &comment("Load the data words"); ++ &mov($L,&DWP(0,"ebx","",0)); ++ &mov($R,&DWP(4,"ebx","",0)); ++ &stack_push(3); ++ ++ &comment(""); ++ &comment("IP"); ++ &IP_new($L,$R,"edx",0); ++ ++ # put them back ++ ++ if ($enc) ++ { ++ &mov(&DWP(4,"ebx","",0),$R); ++ &mov("eax",&wparam(1)); ++ &mov(&DWP(0,"ebx","",0),"edx"); ++ &mov("edi",&wparam(2)); ++ &mov("esi",&wparam(3)); ++ } ++ else ++ { ++ &mov(&DWP(4,"ebx","",0),$R); ++ &mov("esi",&wparam(1)); ++ &mov(&DWP(0,"ebx","",0),"edx"); ++ &mov("edi",&wparam(2)); ++ &mov("eax",&wparam(3)); ++ } ++ &mov(&swtmp(2), (($enc)?"1":"0")); ++ &mov(&swtmp(1), "eax"); ++ &mov(&swtmp(0), "ebx"); ++ &call("des_encrypt2"); ++ &mov(&swtmp(2), (($enc)?"0":"1")); ++ &mov(&swtmp(1), "edi"); ++ &mov(&swtmp(0), "ebx"); ++ &call("des_encrypt2"); ++ &mov(&swtmp(2), (($enc)?"1":"0")); ++ &mov(&swtmp(1), "esi"); ++ &mov(&swtmp(0), "ebx"); ++ &call("des_encrypt2"); ++ ++ &stack_pop(3); ++ &mov($L,&DWP(0,"ebx","",0)); ++ &mov($R,&DWP(4,"ebx","",0)); ++ ++ &comment(""); ++ &comment("FP"); ++ &FP_new($L,$R,"eax",0); ++ ++ &mov(&DWP(0,"ebx","",0),"eax"); ++ &mov(&DWP(4,"ebx","",0),$R); ++ ++ &pop("edi"); ++ &pop("esi"); ++ &pop("ebp"); ++ &pop("ebx"); ++ &ret(); ++ &function_end_B($name); ++ } ++ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/asm/readme Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,131 @@ ++First up, let me say I don't like writing in assembler. It is not portable, ++dependant on the particular CPU architecture release and is generally a pig ++to debug and get right. Having said that, the x86 architecture is probably ++the most important for speed due to number of boxes and since ++it appears to be the worst architecture to to get ++good C compilers for. So due to this, I have lowered myself to do ++assembler for the inner DES routines in libdes :-). ++ ++The file to implement in assembler is des_enc.c. Replace the following ++4 functions ++des_encrypt(DES_LONG data[2],des_key_schedule ks, int encrypt); ++des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt); ++des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); ++des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); ++ ++They encrypt/decrypt the 64 bits held in 'data' using ++the 'ks' key schedules. The only difference between the 4 functions is that ++des_encrypt2() does not perform IP() or FP() on the data (this is an ++optimization for when doing triple DES and des_encrypt3() and des_decrypt3() ++perform triple des. The triple DES routines are in here because it does ++make a big difference to have them located near the des_encrypt2 function ++at link time.. ++ ++Now as we all know, there are lots of different operating systems running on ++x86 boxes, and unfortunately they normally try to make sure their assembler ++formating is not the same as the other peoples. ++The 4 main formats I know of are ++Microsoft Windows 95/Windows NT ++Elf Includes Linux and FreeBSD(?). ++a.out The older Linux. ++Solaris Same as Elf but different comments :-(. ++ ++Now I was not overly keen to write 4 different copies of the same code, ++so I wrote a few perl routines to output the correct assembler, given ++a target assembler type. This code is ugly and is just a hack. ++The libraries are x86unix.pl and x86ms.pl. ++des586.pl, des686.pl and des-som[23].pl are the programs to actually ++generate the assembler. ++ ++So to generate elf assembler ++perl des-som3.pl elf >dx86-elf.s ++For Windows 95/NT ++perl des-som2.pl win32 >win32.asm ++ ++[ update 4 Jan 1996 ] ++I have added another way to do things. ++perl des-som3.pl cpp >dx86-cpp.s ++generates a file that will be included by dx86unix.cpp when it is compiled. ++To build for elf, a.out, solaris, bsdi etc, ++cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o ++cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o ++cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o ++cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o ++This was done to cut down the number of files in the distribution. ++ ++Now the ugly part. I acquired my copy of Intels ++"Optimization's For Intel's 32-Bit Processors" and found a few interesting ++things. First, the aim of the exersize is to 'extract' one byte at a time ++from a word and do an array lookup. This involves getting the byte from ++the 4 locations in the word and moving it to a new word and doing the lookup. ++The most obvious way to do this is ++xor eax, eax # clear word ++movb al, cl # get low byte ++xor edi DWORD PTR 0x100+des_SP[eax] # xor in word ++movb al, ch # get next byte ++xor edi DWORD PTR 0x300+des_SP[eax] # xor in word ++shr ecx 16 ++which seems ok. For the pentium, this system appears to be the best. ++One has to do instruction interleaving to keep both functional units ++operating, but it is basically very efficient. ++ ++Now the crunch. When a full register is used after a partial write, eg. ++mov al, cl ++xor edi, DWORD PTR 0x100+des_SP[eax] ++386 - 1 cycle stall ++486 - 1 cycle stall ++586 - 0 cycle stall ++686 - at least 7 cycle stall (page 22 of the above mentioned document). ++ ++So the technique that produces the best results on a pentium, according to ++the documentation, will produce hideous results on a pentium pro. ++ ++To get around this, des686.pl will generate code that is not as fast on ++a pentium, should be very good on a pentium pro. ++mov eax, ecx # copy word ++shr ecx, 8 # line up next byte ++and eax, 0fch # mask byte ++xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup ++mov eax, ecx # get word ++shr ecx 8 # line up next byte ++and eax, 0fch # mask byte ++xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup ++ ++Due to the execution units in the pentium, this actually works quite well. ++For a pentium pro it should be very good. This is the type of output ++Visual C++ generates. ++ ++There is a third option. instead of using ++mov al, ch ++which is bad on the pentium pro, one may be able to use ++movzx eax, ch ++which may not incur the partial write penalty. On the pentium, ++this instruction takes 4 cycles so is not worth using but on the ++pentium pro it appears it may be worth while. I need access to one to ++experiment :-). ++ ++eric (20 Oct 1996) ++ ++22 Nov 1996 - I have asked people to run the 2 different version on pentium ++pros and it appears that the intel documentation is wrong. The ++mov al,bh is still faster on a pentium pro, so just use the des586.pl ++install des686.pl ++ ++3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these ++functions into des_enc.c because it does make a massive performance ++difference on some boxes to have the functions code located close to ++the des_encrypt2() function. ++ ++9 Jan 1997 - des-som2.pl is now the correct perl script to use for ++pentiums. It contains an inner loop from ++Svend Olaf Mikkelsen which does raw ecb DES calls at ++273,000 per second. He had a previous version at 250,000 and the best ++I was able to get was 203,000. The content has not changed, this is all ++due to instruction sequencing (and actual instructions choice) which is able ++to keep both functional units of the pentium going. ++We may have lost the ugly register usage restrictions when x86 went 32 bit ++but for the pentium it has been replaced by evil instruction ordering tricks. ++ ++13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf. ++raw DES at 281,000 per second on a pentium 100. ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/cbc_enc.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,135 @@ ++/* crypto/des/cbc_enc.c */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++#include "des/des_locl.h" ++ ++void des_cbc_encrypt(input, output, length, schedule, ivec, enc) ++des_cblock (*input); ++des_cblock (*output); ++long length; ++des_key_schedule schedule; ++des_cblock (*ivec); ++int enc; ++ { ++ register DES_LONG tin0,tin1; ++ register DES_LONG tout0,tout1,xor0,xor1; ++ register unsigned char *in,*out; ++ register long l=length; ++ DES_LONG tin[2]; ++ unsigned char *iv; ++ ++ in=(unsigned char *)input; ++ out=(unsigned char *)output; ++ iv=(unsigned char *)ivec; ++ ++ if (enc) ++ { ++ c2l(iv,tout0); ++ c2l(iv,tout1); ++ for (l-=8; l>=0; l-=8) ++ { ++ c2l(in,tin0); ++ c2l(in,tin1); ++ tin0^=tout0; tin[0]=tin0; ++ tin1^=tout1; tin[1]=tin1; ++ des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); ++ tout0=tin[0]; l2c(tout0,out); ++ tout1=tin[1]; l2c(tout1,out); ++ } ++ if (l != -8) ++ { ++ c2ln(in,tin0,tin1,l+8); ++ tin0^=tout0; tin[0]=tin0; ++ tin1^=tout1; tin[1]=tin1; ++ des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); ++ tout0=tin[0]; l2c(tout0,out); ++ tout1=tin[1]; l2c(tout1,out); ++ } ++ } ++ else ++ { ++ c2l(iv,xor0); ++ c2l(iv,xor1); ++ for (l-=8; l>=0; l-=8) ++ { ++ c2l(in,tin0); tin[0]=tin0; ++ c2l(in,tin1); tin[1]=tin1; ++ des_encrypt((DES_LONG *)tin,schedule,DES_DECRYPT); ++ tout0=tin[0]^xor0; ++ tout1=tin[1]^xor1; ++ l2c(tout0,out); ++ l2c(tout1,out); ++ xor0=tin0; ++ xor1=tin1; ++ } ++ if (l != -8) ++ { ++ c2l(in,tin0); tin[0]=tin0; ++ c2l(in,tin1); tin[1]=tin1; ++ des_encrypt((DES_LONG *)tin,schedule,DES_DECRYPT); ++ tout0=tin[0]^xor0; ++ tout1=tin[1]^xor1; ++ l2cn(tout0,tout1,out,l+8); ++ /* xor0=tin0; ++ xor1=tin1; */ ++ } ++ } ++ tin0=tin1=tout0=tout1=xor0=xor1=0; ++ tin[0]=tin[1]=0; ++ } ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/des.doc Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,505 @@ ++The DES library. ++ ++Please note that this library was originally written to operate with ++eBones, a version of Kerberos that had had encryption removed when it left ++the USA and then put back in. As such there are some routines that I will ++advise not using but they are still in the library for historical reasons. ++For all calls that have an 'input' and 'output' variables, they can be the ++same. ++ ++This library requires the inclusion of 'des.h'. ++ ++All of the encryption functions take what is called a des_key_schedule as an ++argument. A des_key_schedule is an expanded form of the des key. ++A des_key is 8 bytes of odd parity, the type used to hold the key is a ++des_cblock. A des_cblock is an array of 8 bytes, often in this library ++description I will refer to input bytes when the function specifies ++des_cblock's as input or output, this just means that the variable should ++be a multiple of 8 bytes. ++ ++The define DES_ENCRYPT is passed to specify encryption, DES_DECRYPT to ++specify decryption. The functions and global variable are as follows: ++ ++int des_check_key; ++ DES keys are supposed to be odd parity. If this variable is set to ++ a non-zero value, des_set_key() will check that the key has odd ++ parity and is not one of the known weak DES keys. By default this ++ variable is turned off; ++ ++void des_set_odd_parity( ++des_cblock *key ); ++ This function takes a DES key (8 bytes) and sets the parity to odd. ++ ++int des_is_weak_key( ++des_cblock *key ); ++ This function returns a non-zero value if the DES key passed is a ++ weak, DES key. If it is a weak key, don't use it, try a different ++ one. If you are using 'random' keys, the chances of hitting a weak ++ key are 1/2^52 so it is probably not worth checking for them. ++ ++int des_set_key( ++des_cblock *key, ++des_key_schedule schedule); ++ Des_set_key converts an 8 byte DES key into a des_key_schedule. ++ A des_key_schedule is an expanded form of the key which is used to ++ perform actual encryption. It can be regenerated from the DES key ++ so it only needs to be kept when encryption or decryption is about ++ to occur. Don't save or pass around des_key_schedule's since they ++ are CPU architecture dependent, DES keys are not. If des_check_key ++ is non zero, zero is returned if the key has the wrong parity or ++ the key is a weak key, else 1 is returned. ++ ++int des_key_sched( ++des_cblock *key, ++des_key_schedule schedule); ++ An alternative name for des_set_key(). ++ ++int des_rw_mode; /* defaults to DES_PCBC_MODE */ ++ This flag holds either DES_CBC_MODE or DES_PCBC_MODE (default). ++ This specifies the function to use in the enc_read() and enc_write() ++ functions. ++ ++void des_encrypt( ++unsigned long *data, ++des_key_schedule ks, ++int enc); ++ This is the DES encryption function that gets called by just about ++ every other DES routine in the library. You should not use this ++ function except to implement 'modes' of DES. I say this because the ++ functions that call this routine do the conversion from 'char *' to ++ long, and this needs to be done to make sure 'non-aligned' memory ++ access do not occur. The characters are loaded 'little endian', ++ have a look at my source code for more details on how I use this ++ function. ++ Data is a pointer to 2 unsigned long's and ks is the ++ des_key_schedule to use. enc, is non zero specifies encryption, ++ zero if decryption. ++ ++void des_encrypt2( ++unsigned long *data, ++des_key_schedule ks, ++int enc); ++ This functions is the same as des_encrypt() except that the DES ++ initial permutation (IP) and final permutation (FP) have been left ++ out. As for des_encrypt(), you should not use this function. ++ It is used by the routines in my library that implement triple DES. ++ IP() des_encrypt2() des_encrypt2() des_encrypt2() FP() is the same ++ as des_encrypt() des_encrypt() des_encrypt() except faster :-). ++ ++void des_ecb_encrypt( ++des_cblock *input, ++des_cblock *output, ++des_key_schedule ks, ++int enc); ++ This is the basic Electronic Code Book form of DES, the most basic ++ form. Input is encrypted into output using the key represented by ++ ks. If enc is non zero (DES_ENCRYPT), encryption occurs, otherwise ++ decryption occurs. Input is 8 bytes long and output is 8 bytes. ++ (the des_cblock structure is 8 chars). ++ ++void des_ecb3_encrypt( ++des_cblock *input, ++des_cblock *output, ++des_key_schedule ks1, ++des_key_schedule ks2, ++des_key_schedule ks3, ++int enc); ++ This is the 3 key EDE mode of ECB DES. What this means is that ++ the 8 bytes of input is encrypted with ks1, decrypted with ks2 and ++ then encrypted again with ks3, before being put into output; ++ C=E(ks3,D(ks2,E(ks1,M))). There is a macro, des_ecb2_encrypt() ++ that only takes 2 des_key_schedules that implements, ++ C=E(ks1,D(ks2,E(ks1,M))) in that the final encrypt is done with ks1. ++ ++void des_cbc_encrypt( ++des_cblock *input, ++des_cblock *output, ++long length, ++des_key_schedule ks, ++des_cblock *ivec, ++int enc); ++ This routine implements DES in Cipher Block Chaining mode. ++ Input, which should be a multiple of 8 bytes is encrypted ++ (or decrypted) to output which will also be a multiple of 8 bytes. ++ The number of bytes is in length (and from what I've said above, ++ should be a multiple of 8). If length is not a multiple of 8, I'm ++ not being held responsible :-). ivec is the initialisation vector. ++ This function does not modify this variable. To correctly implement ++ cbc mode, you need to do one of 2 things; copy the last 8 bytes of ++ cipher text for use as the next ivec in your application, ++ or use des_ncbc_encrypt(). ++ Only this routine has this problem with updating the ivec, all ++ other routines that are implementing cbc mode update ivec. ++ ++void des_ncbc_encrypt( ++des_cblock *input, ++des_cblock *output, ++long length, ++des_key_schedule sk, ++des_cblock *ivec, ++int enc); ++ For historical reasons, des_cbc_encrypt() did not update the ++ ivec with the value requires so that subsequent calls to ++ des_cbc_encrypt() would 'chain'. This was needed so that the same ++ 'length' values would not need to be used when decrypting. ++ des_ncbc_encrypt() does the right thing. It is the same as ++ des_cbc_encrypt accept that ivec is updates with the correct value ++ to pass in subsequent calls to des_ncbc_encrypt(). I advise using ++ des_ncbc_encrypt() instead of des_cbc_encrypt(); ++ ++void des_xcbc_encrypt( ++des_cblock *input, ++des_cblock *output, ++long length, ++des_key_schedule sk, ++des_cblock *ivec, ++des_cblock *inw, ++des_cblock *outw, ++int enc); ++ This is RSA's DESX mode of DES. It uses inw and outw to ++ 'whiten' the encryption. inw and outw are secret (unlike the iv) ++ and are as such, part of the key. So the key is sort of 24 bytes. ++ This is much better than cbc des. ++ ++void des_3cbc_encrypt( ++des_cblock *input, ++des_cblock *output, ++long length, ++des_key_schedule sk1, ++des_key_schedule sk2, ++des_cblock *ivec1, ++des_cblock *ivec2, ++int enc); ++ This function is flawed, do not use it. I have left it in the ++ library because it is used in my des(1) program and will function ++ correctly when used by des(1). If I removed the function, people ++ could end up unable to decrypt files. ++ This routine implements outer triple cbc encryption using 2 ks and ++ 2 ivec's. Use des_ede2_cbc_encrypt() instead. ++ ++void des_ede3_cbc_encrypt( ++des_cblock *input, ++des_cblock *output, ++long length, ++des_key_schedule ks1, ++des_key_schedule ks2, ++des_key_schedule ks3, ++des_cblock *ivec, ++int enc); ++ This function implements inner triple CBC DES encryption with 3 ++ keys. What this means is that each 'DES' operation ++ inside the cbc mode is really an C=E(ks3,D(ks2,E(ks1,M))). ++ Again, this is cbc mode so an ivec is requires. ++ This mode is used by SSL. ++ There is also a des_ede2_cbc_encrypt() that only uses 2 ++ des_key_schedule's, the first being reused for the final ++ encryption. C=E(ks1,D(ks2,E(ks1,M))). This form of triple DES ++ is used by the RSAref library. ++ ++void des_pcbc_encrypt( ++des_cblock *input, ++des_cblock *output, ++long length, ++des_key_schedule ks, ++des_cblock *ivec, ++int enc); ++ This is Propagating Cipher Block Chaining mode of DES. It is used ++ by Kerberos v4. It's parameters are the same as des_ncbc_encrypt(). ++ ++void des_cfb_encrypt( ++unsigned char *in, ++unsigned char *out, ++int numbits, ++long length, ++des_key_schedule ks, ++des_cblock *ivec, ++int enc); ++ Cipher Feedback Back mode of DES. This implementation 'feeds back' ++ in numbit blocks. The input (and output) is in multiples of numbits ++ bits. numbits should to be a multiple of 8 bits. Length is the ++ number of bytes input. If numbits is not a multiple of 8 bits, ++ the extra bits in the bytes will be considered padding. So if ++ numbits is 12, for each 2 input bytes, the 4 high bits of the ++ second byte will be ignored. So to encode 72 bits when using ++ a numbits of 12 take 12 bytes. To encode 72 bits when using ++ numbits of 9 will take 16 bytes. To encode 80 bits when using ++ numbits of 16 will take 10 bytes. etc, etc. This padding will ++ apply to both input and output. ++ ++ ++void des_cfb64_encrypt( ++unsigned char *in, ++unsigned char *out, ++long length, ++des_key_schedule ks, ++des_cblock *ivec, ++int *num, ++int enc); ++ This is one of the more useful functions in this DES library, it ++ implements CFB mode of DES with 64bit feedback. Why is this ++ useful you ask? Because this routine will allow you to encrypt an ++ arbitrary number of bytes, no 8 byte padding. Each call to this ++ routine will encrypt the input bytes to output and then update ivec ++ and num. num contains 'how far' we are though ivec. If this does ++ not make much sense, read more about cfb mode of DES :-). ++ ++void des_ede3_cfb64_encrypt( ++unsigned char *in, ++unsigned char *out, ++long length, ++des_key_schedule ks1, ++des_key_schedule ks2, ++des_key_schedule ks3, ++des_cblock *ivec, ++int *num, ++int enc); ++ Same as des_cfb64_encrypt() accept that the DES operation is ++ triple DES. As usual, there is a macro for ++ des_ede2_cfb64_encrypt() which reuses ks1. ++ ++void des_ofb_encrypt( ++unsigned char *in, ++unsigned char *out, ++int numbits, ++long length, ++des_key_schedule ks, ++des_cblock *ivec); ++ This is a implementation of Output Feed Back mode of DES. It is ++ the same as des_cfb_encrypt() in that numbits is the size of the ++ units dealt with during input and output (in bits). ++ ++void des_ofb64_encrypt( ++unsigned char *in, ++unsigned char *out, ++long length, ++des_key_schedule ks, ++des_cblock *ivec, ++int *num); ++ The same as des_cfb64_encrypt() except that it is Output Feed Back ++ mode. ++ ++void des_ede3_ofb64_encrypt( ++unsigned char *in, ++unsigned char *out, ++long length, ++des_key_schedule ks1, ++des_key_schedule ks2, ++des_key_schedule ks3, ++des_cblock *ivec, ++int *num); ++ Same as des_ofb64_encrypt() accept that the DES operation is ++ triple DES. As usual, there is a macro for ++ des_ede2_ofb64_encrypt() which reuses ks1. ++ ++int des_read_pw_string( ++char *buf, ++int length, ++char *prompt, ++int verify); ++ This routine is used to get a password from the terminal with echo ++ turned off. Buf is where the string will end up and length is the ++ size of buf. Prompt is a string presented to the 'user' and if ++ verify is set, the key is asked for twice and unless the 2 copies ++ match, an error is returned. A return code of -1 indicates a ++ system error, 1 failure due to use interaction, and 0 is success. ++ ++unsigned long des_cbc_cksum( ++des_cblock *input, ++des_cblock *output, ++long length, ++des_key_schedule ks, ++des_cblock *ivec); ++ This function produces an 8 byte checksum from input that it puts in ++ output and returns the last 4 bytes as a long. The checksum is ++ generated via cbc mode of DES in which only the last 8 byes are ++ kept. I would recommend not using this function but instead using ++ the EVP_Digest routines, or at least using MD5 or SHA. This ++ function is used by Kerberos v4 so that is why it stays in the ++ library. ++ ++char *des_fcrypt( ++const char *buf, ++const char *salt ++char *ret); ++ This is my fast version of the unix crypt(3) function. This version ++ takes only a small amount of space relative to other fast ++ crypt() implementations. This is different to the normal crypt ++ in that the third parameter is the buffer that the return value ++ is written into. It needs to be at least 14 bytes long. This ++ function is thread safe, unlike the normal crypt. ++ ++char *crypt( ++const char *buf, ++const char *salt); ++ This function calls des_fcrypt() with a static array passed as the ++ third parameter. This emulates the normal non-thread safe semantics ++ of crypt(3). ++ ++void des_string_to_key( ++char *str, ++des_cblock *key); ++ This function takes str and converts it into a DES key. I would ++ recommend using MD5 instead and use the first 8 bytes of output. ++ When I wrote the first version of these routines back in 1990, MD5 ++ did not exist but I feel these routines are still sound. This ++ routines is compatible with the one in MIT's libdes. ++ ++void des_string_to_2keys( ++char *str, ++des_cblock *key1, ++des_cblock *key2); ++ This function takes str and converts it into 2 DES keys. ++ I would recommend using MD5 and using the 16 bytes as the 2 keys. ++ I have nothing against these 2 'string_to_key' routines, it's just ++ that if you say that your encryption key is generated by using the ++ 16 bytes of an MD5 hash, every-one knows how you generated your ++ keys. ++ ++int des_read_password( ++des_cblock *key, ++char *prompt, ++int verify); ++ This routine combines des_read_pw_string() with des_string_to_key(). ++ ++int des_read_2passwords( ++des_cblock *key1, ++des_cblock *key2, ++char *prompt, ++int verify); ++ This routine combines des_read_pw_string() with des_string_to_2key(). ++ ++void des_random_seed( ++des_cblock key); ++ This routine sets a starting point for des_random_key(). ++ ++void des_random_key( ++des_cblock ret); ++ This function return a random key. Make sure to 'seed' the random ++ number generator (with des_random_seed()) before using this function. ++ I personally now use a MD5 based random number system. ++ ++int des_enc_read( ++int fd, ++char *buf, ++int len, ++des_key_schedule ks, ++des_cblock *iv); ++ This function will write to a file descriptor the encrypted data ++ from buf. This data will be preceded by a 4 byte 'byte count' and ++ will be padded out to 8 bytes. The encryption is either CBC of ++ PCBC depending on the value of des_rw_mode. If it is DES_PCBC_MODE, ++ pcbc is used, if DES_CBC_MODE, cbc is used. The default is to use ++ DES_PCBC_MODE. ++ ++int des_enc_write( ++int fd, ++char *buf, ++int len, ++des_key_schedule ks, ++des_cblock *iv); ++ This routines read stuff written by des_enc_read() and decrypts it. ++ I have used these routines quite a lot but I don't believe they are ++ suitable for non-blocking io. If you are after a full ++ authentication/encryption over networks, have a look at SSL instead. ++ ++unsigned long des_quad_cksum( ++des_cblock *input, ++des_cblock *output, ++long length, ++int out_count, ++des_cblock *seed); ++ This is a function from Kerberos v4 that is not anything to do with ++ DES but was needed. It is a cksum that is quicker to generate than ++ des_cbc_cksum(); I personally would use MD5 routines now. ++===== ++Modes of DES ++Quite a bit of the following information has been taken from ++ AS 2805.5.2 ++ Australian Standard ++ Electronic funds transfer - Requirements for interfaces, ++ Part 5.2: Modes of operation for an n-bit block cipher algorithm ++ Appendix A ++ ++There are several different modes in which DES can be used, they are ++as follows. ++ ++Electronic Codebook Mode (ECB) (des_ecb_encrypt()) ++- 64 bits are enciphered at a time. ++- The order of the blocks can be rearranged without detection. ++- The same plaintext block always produces the same ciphertext block ++ (for the same key) making it vulnerable to a 'dictionary attack'. ++- An error will only affect one ciphertext block. ++ ++Cipher Block Chaining Mode (CBC) (des_cbc_encrypt()) ++- a multiple of 64 bits are enciphered at a time. ++- The CBC mode produces the same ciphertext whenever the same ++ plaintext is encrypted using the same key and starting variable. ++- The chaining operation makes the ciphertext blocks dependent on the ++ current and all preceding plaintext blocks and therefore blocks can not ++ be rearranged. ++- The use of different starting variables prevents the same plaintext ++ enciphering to the same ciphertext. ++- An error will affect the current and the following ciphertext blocks. ++ ++Cipher Feedback Mode (CFB) (des_cfb_encrypt()) ++- a number of bits (j) <= 64 are enciphered at a time. ++- The CFB mode produces the same ciphertext whenever the same ++ plaintext is encrypted using the same key and starting variable. ++- The chaining operation makes the ciphertext variables dependent on the ++ current and all preceding variables and therefore j-bit variables are ++ chained together and can not be rearranged. ++- The use of different starting variables prevents the same plaintext ++ enciphering to the same ciphertext. ++- The strength of the CFB mode depends on the size of k (maximal if ++ j == k). In my implementation this is always the case. ++- Selection of a small value for j will require more cycles through ++ the encipherment algorithm per unit of plaintext and thus cause ++ greater processing overheads. ++- Only multiples of j bits can be enciphered. ++- An error will affect the current and the following ciphertext variables. ++ ++Output Feedback Mode (OFB) (des_ofb_encrypt()) ++- a number of bits (j) <= 64 are enciphered at a time. ++- The OFB mode produces the same ciphertext whenever the same ++ plaintext enciphered using the same key and starting variable. More ++ over, in the OFB mode the same key stream is produced when the same ++ key and start variable are used. Consequently, for security reasons ++ a specific start variable should be used only once for a given key. ++- The absence of chaining makes the OFB more vulnerable to specific attacks. ++- The use of different start variables values prevents the same ++ plaintext enciphering to the same ciphertext, by producing different ++ key streams. ++- Selection of a small value for j will require more cycles through ++ the encipherment algorithm per unit of plaintext and thus cause ++ greater processing overheads. ++- Only multiples of j bits can be enciphered. ++- OFB mode of operation does not extend ciphertext errors in the ++ resultant plaintext output. Every bit error in the ciphertext causes ++ only one bit to be in error in the deciphered plaintext. ++- OFB mode is not self-synchronising. If the two operation of ++ encipherment and decipherment get out of synchronism, the system needs ++ to be re-initialised. ++- Each re-initialisation should use a value of the start variable ++ different from the start variable values used before with the same ++ key. The reason for this is that an identical bit stream would be ++ produced each time from the same parameters. This would be ++ susceptible to a ' known plaintext' attack. ++ ++Triple ECB Mode (des_ecb3_encrypt()) ++- Encrypt with key1, decrypt with key2 and encrypt with key3 again. ++- As for ECB encryption but increases the key length to 168 bits. ++ There are theoretic attacks that can be used that make the effective ++ key length 112 bits, but this attack also requires 2^56 blocks of ++ memory, not very likely, even for the NSA. ++- If both keys are the same it is equivalent to encrypting once with ++ just one key. ++- If the first and last key are the same, the key length is 112 bits. ++ There are attacks that could reduce the key space to 55 bit's but it ++ requires 2^56 blocks of memory. ++- If all 3 keys are the same, this is effectively the same as normal ++ ecb mode. ++ ++Triple CBC Mode (des_ede3_cbc_encrypt()) ++- Encrypt with key1, decrypt with key2 and then encrypt with key3. ++- As for CBC encryption but increases the key length to 168 bits with ++ the same restrictions as for triple ecb mode. +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/des_enc.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,502 @@ ++/* crypto/des/des_enc.c */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++#include "des/des_locl.h" ++ ++void des_encrypt(data, ks, enc) ++DES_LONG *data; ++des_key_schedule ks; ++int enc; ++ { ++ register DES_LONG l,r,t,u; ++#ifdef DES_PTR ++ register unsigned char *des_SP=(unsigned char *)des_SPtrans; ++#endif ++#ifndef DES_UNROLL ++ register int i; ++#endif ++ register DES_LONG *s; ++ ++ r=data[0]; ++ l=data[1]; ++ ++ IP(r,l); ++ /* Things have been modified so that the initial rotate is ++ * done outside the loop. This required the ++ * des_SPtrans values in sp.h to be rotated 1 bit to the right. ++ * One perl script later and things have a 5% speed up on a sparc2. ++ * Thanks to Richard Outerbridge <71755.204@CompuServe.COM> ++ * for pointing this out. */ ++ /* clear the top bits on machines with 8byte longs */ ++ /* shift left by 2 */ ++ r=ROTATE(r,29)&0xffffffffL; ++ l=ROTATE(l,29)&0xffffffffL; ++ ++ s=(DES_LONG *)ks; ++ /* I don't know if it is worth the effort of loop unrolling the ++ * inner loop */ ++ if (enc) ++ { ++#ifdef DES_UNROLL ++ D_ENCRYPT(l,r, 0); /* 1 */ ++ D_ENCRYPT(r,l, 2); /* 2 */ ++ D_ENCRYPT(l,r, 4); /* 3 */ ++ D_ENCRYPT(r,l, 6); /* 4 */ ++ D_ENCRYPT(l,r, 8); /* 5 */ ++ D_ENCRYPT(r,l,10); /* 6 */ ++ D_ENCRYPT(l,r,12); /* 7 */ ++ D_ENCRYPT(r,l,14); /* 8 */ ++ D_ENCRYPT(l,r,16); /* 9 */ ++ D_ENCRYPT(r,l,18); /* 10 */ ++ D_ENCRYPT(l,r,20); /* 11 */ ++ D_ENCRYPT(r,l,22); /* 12 */ ++ D_ENCRYPT(l,r,24); /* 13 */ ++ D_ENCRYPT(r,l,26); /* 14 */ ++ D_ENCRYPT(l,r,28); /* 15 */ ++ D_ENCRYPT(r,l,30); /* 16 */ ++#else ++ for (i=0; i<32; i+=8) ++ { ++ D_ENCRYPT(l,r,i+0); /* 1 */ ++ D_ENCRYPT(r,l,i+2); /* 2 */ ++ D_ENCRYPT(l,r,i+4); /* 3 */ ++ D_ENCRYPT(r,l,i+6); /* 4 */ ++ } ++#endif ++ } ++ else ++ { ++#ifdef DES_UNROLL ++ D_ENCRYPT(l,r,30); /* 16 */ ++ D_ENCRYPT(r,l,28); /* 15 */ ++ D_ENCRYPT(l,r,26); /* 14 */ ++ D_ENCRYPT(r,l,24); /* 13 */ ++ D_ENCRYPT(l,r,22); /* 12 */ ++ D_ENCRYPT(r,l,20); /* 11 */ ++ D_ENCRYPT(l,r,18); /* 10 */ ++ D_ENCRYPT(r,l,16); /* 9 */ ++ D_ENCRYPT(l,r,14); /* 8 */ ++ D_ENCRYPT(r,l,12); /* 7 */ ++ D_ENCRYPT(l,r,10); /* 6 */ ++ D_ENCRYPT(r,l, 8); /* 5 */ ++ D_ENCRYPT(l,r, 6); /* 4 */ ++ D_ENCRYPT(r,l, 4); /* 3 */ ++ D_ENCRYPT(l,r, 2); /* 2 */ ++ D_ENCRYPT(r,l, 0); /* 1 */ ++#else ++ for (i=30; i>0; i-=8) ++ { ++ D_ENCRYPT(l,r,i-0); /* 16 */ ++ D_ENCRYPT(r,l,i-2); /* 15 */ ++ D_ENCRYPT(l,r,i-4); /* 14 */ ++ D_ENCRYPT(r,l,i-6); /* 13 */ ++ } ++#endif ++ } ++ ++ /* rotate and clear the top bits on machines with 8byte longs */ ++ l=ROTATE(l,3)&0xffffffffL; ++ r=ROTATE(r,3)&0xffffffffL; ++ ++ FP(r,l); ++ data[0]=l; ++ data[1]=r; ++ l=r=t=u=0; ++ } ++ ++void des_encrypt2(data, ks, enc) ++DES_LONG *data; ++des_key_schedule ks; ++int enc; ++ { ++ register DES_LONG l,r,t,u; ++#ifdef DES_PTR ++ register unsigned char *des_SP=(unsigned char *)des_SPtrans; ++#endif ++#ifndef DES_UNROLL ++ register int i; ++#endif ++ register DES_LONG *s; ++ ++ r=data[0]; ++ l=data[1]; ++ ++ /* Things have been modified so that the initial rotate is ++ * done outside the loop. This required the ++ * des_SPtrans values in sp.h to be rotated 1 bit to the right. ++ * One perl script later and things have a 5% speed up on a sparc2. ++ * Thanks to Richard Outerbridge <71755.204@CompuServe.COM> ++ * for pointing this out. */ ++ /* clear the top bits on machines with 8byte longs */ ++ r=ROTATE(r,29)&0xffffffffL; ++ l=ROTATE(l,29)&0xffffffffL; ++ ++ s=(DES_LONG *)ks; ++ /* I don't know if it is worth the effort of loop unrolling the ++ * inner loop */ ++ if (enc) ++ { ++#ifdef DES_UNROLL ++ D_ENCRYPT(l,r, 0); /* 1 */ ++ D_ENCRYPT(r,l, 2); /* 2 */ ++ D_ENCRYPT(l,r, 4); /* 3 */ ++ D_ENCRYPT(r,l, 6); /* 4 */ ++ D_ENCRYPT(l,r, 8); /* 5 */ ++ D_ENCRYPT(r,l,10); /* 6 */ ++ D_ENCRYPT(l,r,12); /* 7 */ ++ D_ENCRYPT(r,l,14); /* 8 */ ++ D_ENCRYPT(l,r,16); /* 9 */ ++ D_ENCRYPT(r,l,18); /* 10 */ ++ D_ENCRYPT(l,r,20); /* 11 */ ++ D_ENCRYPT(r,l,22); /* 12 */ ++ D_ENCRYPT(l,r,24); /* 13 */ ++ D_ENCRYPT(r,l,26); /* 14 */ ++ D_ENCRYPT(l,r,28); /* 15 */ ++ D_ENCRYPT(r,l,30); /* 16 */ ++#else ++ for (i=0; i<32; i+=8) ++ { ++ D_ENCRYPT(l,r,i+0); /* 1 */ ++ D_ENCRYPT(r,l,i+2); /* 2 */ ++ D_ENCRYPT(l,r,i+4); /* 3 */ ++ D_ENCRYPT(r,l,i+6); /* 4 */ ++ } ++#endif ++ } ++ else ++ { ++#ifdef DES_UNROLL ++ D_ENCRYPT(l,r,30); /* 16 */ ++ D_ENCRYPT(r,l,28); /* 15 */ ++ D_ENCRYPT(l,r,26); /* 14 */ ++ D_ENCRYPT(r,l,24); /* 13 */ ++ D_ENCRYPT(l,r,22); /* 12 */ ++ D_ENCRYPT(r,l,20); /* 11 */ ++ D_ENCRYPT(l,r,18); /* 10 */ ++ D_ENCRYPT(r,l,16); /* 9 */ ++ D_ENCRYPT(l,r,14); /* 8 */ ++ D_ENCRYPT(r,l,12); /* 7 */ ++ D_ENCRYPT(l,r,10); /* 6 */ ++ D_ENCRYPT(r,l, 8); /* 5 */ ++ D_ENCRYPT(l,r, 6); /* 4 */ ++ D_ENCRYPT(r,l, 4); /* 3 */ ++ D_ENCRYPT(l,r, 2); /* 2 */ ++ D_ENCRYPT(r,l, 0); /* 1 */ ++#else ++ for (i=30; i>0; i-=8) ++ { ++ D_ENCRYPT(l,r,i-0); /* 16 */ ++ D_ENCRYPT(r,l,i-2); /* 15 */ ++ D_ENCRYPT(l,r,i-4); /* 14 */ ++ D_ENCRYPT(r,l,i-6); /* 13 */ ++ } ++#endif ++ } ++ /* rotate and clear the top bits on machines with 8byte longs */ ++ data[0]=ROTATE(l,3)&0xffffffffL; ++ data[1]=ROTATE(r,3)&0xffffffffL; ++ l=r=t=u=0; ++ } ++ ++void des_encrypt3(data,ks1,ks2,ks3) ++DES_LONG *data; ++des_key_schedule ks1; ++des_key_schedule ks2; ++des_key_schedule ks3; ++ { ++ register DES_LONG l,r; ++ ++ l=data[0]; ++ r=data[1]; ++ IP(l,r); ++ data[0]=l; ++ data[1]=r; ++ des_encrypt2((DES_LONG *)data,ks1,DES_ENCRYPT); ++ des_encrypt2((DES_LONG *)data,ks2,DES_DECRYPT); ++ des_encrypt2((DES_LONG *)data,ks3,DES_ENCRYPT); ++ l=data[0]; ++ r=data[1]; ++ FP(r,l); ++ data[0]=l; ++ data[1]=r; ++ } ++ ++void des_decrypt3(data,ks1,ks2,ks3) ++DES_LONG *data; ++des_key_schedule ks1; ++des_key_schedule ks2; ++des_key_schedule ks3; ++ { ++ register DES_LONG l,r; ++ ++ l=data[0]; ++ r=data[1]; ++ IP(l,r); ++ data[0]=l; ++ data[1]=r; ++ des_encrypt2((DES_LONG *)data,ks3,DES_DECRYPT); ++ des_encrypt2((DES_LONG *)data,ks2,DES_ENCRYPT); ++ des_encrypt2((DES_LONG *)data,ks1,DES_DECRYPT); ++ l=data[0]; ++ r=data[1]; ++ FP(r,l); ++ data[0]=l; ++ data[1]=r; ++ } ++ ++#ifndef DES_DEFAULT_OPTIONS ++ ++void des_ncbc_encrypt(input, output, length, schedule, ivec, enc) ++des_cblock (*input); ++des_cblock (*output); ++long length; ++des_key_schedule schedule; ++des_cblock (*ivec); ++int enc; ++ { ++ register DES_LONG tin0,tin1; ++ register DES_LONG tout0,tout1,xor0,xor1; ++ register unsigned char *in,*out; ++ register long l=length; ++ DES_LONG tin[2]; ++ unsigned char *iv; ++ ++ in=(unsigned char *)input; ++ out=(unsigned char *)output; ++ iv=(unsigned char *)ivec; ++ ++ if (enc) ++ { ++ c2l(iv,tout0); ++ c2l(iv,tout1); ++ for (l-=8; l>=0; l-=8) ++ { ++ c2l(in,tin0); ++ c2l(in,tin1); ++ tin0^=tout0; tin[0]=tin0; ++ tin1^=tout1; tin[1]=tin1; ++ des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); ++ tout0=tin[0]; l2c(tout0,out); ++ tout1=tin[1]; l2c(tout1,out); ++ } ++ if (l != -8) ++ { ++ c2ln(in,tin0,tin1,l+8); ++ tin0^=tout0; tin[0]=tin0; ++ tin1^=tout1; tin[1]=tin1; ++ des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); ++ tout0=tin[0]; l2c(tout0,out); ++ tout1=tin[1]; l2c(tout1,out); ++ } ++ iv=(unsigned char *)ivec; ++ l2c(tout0,iv); ++ l2c(tout1,iv); ++ } ++ else ++ { ++ c2l(iv,xor0); ++ c2l(iv,xor1); ++ for (l-=8; l>=0; l-=8) ++ { ++ c2l(in,tin0); tin[0]=tin0; ++ c2l(in,tin1); tin[1]=tin1; ++ des_encrypt((DES_LONG *)tin,schedule,DES_DECRYPT); ++ tout0=tin[0]^xor0; ++ tout1=tin[1]^xor1; ++ l2c(tout0,out); ++ l2c(tout1,out); ++ xor0=tin0; ++ xor1=tin1; ++ } ++ if (l != -8) ++ { ++ c2l(in,tin0); tin[0]=tin0; ++ c2l(in,tin1); tin[1]=tin1; ++ des_encrypt((DES_LONG *)tin,schedule,DES_DECRYPT); ++ tout0=tin[0]^xor0; ++ tout1=tin[1]^xor1; ++ l2cn(tout0,tout1,out,l+8); ++ xor0=tin0; ++ xor1=tin1; ++ } ++ ++ iv=(unsigned char *)ivec; ++ l2c(xor0,iv); ++ l2c(xor1,iv); ++ } ++ tin0=tin1=tout0=tout1=xor0=xor1=0; ++ tin[0]=tin[1]=0; ++ } ++ ++void des_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc) ++des_cblock (*input); ++des_cblock (*output); ++long length; ++des_key_schedule ks1; ++des_key_schedule ks2; ++des_key_schedule ks3; ++des_cblock (*ivec); ++int enc; ++ { ++ register DES_LONG tin0,tin1; ++ register DES_LONG tout0,tout1,xor0,xor1; ++ register unsigned char *in,*out; ++ register long l=length; ++ DES_LONG tin[2]; ++ unsigned char *iv; ++ ++ in=(unsigned char *)input; ++ out=(unsigned char *)output; ++ iv=(unsigned char *)ivec; ++ ++ if (enc) ++ { ++ c2l(iv,tout0); ++ c2l(iv,tout1); ++ for (l-=8; l>=0; l-=8) ++ { ++ c2l(in,tin0); ++ c2l(in,tin1); ++ tin0^=tout0; ++ tin1^=tout1; ++ ++ tin[0]=tin0; ++ tin[1]=tin1; ++ des_encrypt3((DES_LONG *)tin,ks1,ks2,ks3); ++ tout0=tin[0]; ++ tout1=tin[1]; ++ ++ l2c(tout0,out); ++ l2c(tout1,out); ++ } ++ if (l != -8) ++ { ++ c2ln(in,tin0,tin1,l+8); ++ tin0^=tout0; ++ tin1^=tout1; ++ ++ tin[0]=tin0; ++ tin[1]=tin1; ++ des_encrypt3((DES_LONG *)tin,ks1,ks2,ks3); ++ tout0=tin[0]; ++ tout1=tin[1]; ++ ++ l2c(tout0,out); ++ l2c(tout1,out); ++ } ++ iv=(unsigned char *)ivec; ++ l2c(tout0,iv); ++ l2c(tout1,iv); ++ } ++ else ++ { ++ register DES_LONG t0,t1; ++ ++ c2l(iv,xor0); ++ c2l(iv,xor1); ++ for (l-=8; l>=0; l-=8) ++ { ++ c2l(in,tin0); ++ c2l(in,tin1); ++ ++ t0=tin0; ++ t1=tin1; ++ ++ tin[0]=tin0; ++ tin[1]=tin1; ++ des_decrypt3((DES_LONG *)tin,ks1,ks2,ks3); ++ tout0=tin[0]; ++ tout1=tin[1]; ++ ++ tout0^=xor0; ++ tout1^=xor1; ++ l2c(tout0,out); ++ l2c(tout1,out); ++ xor0=t0; ++ xor1=t1; ++ } ++ if (l != -8) ++ { ++ c2l(in,tin0); ++ c2l(in,tin1); ++ ++ t0=tin0; ++ t1=tin1; ++ ++ tin[0]=tin0; ++ tin[1]=tin1; ++ des_decrypt3((DES_LONG *)tin,ks1,ks2,ks3); ++ tout0=tin[0]; ++ tout1=tin[1]; ++ ++ tout0^=xor0; ++ tout1^=xor1; ++ l2cn(tout0,tout1,out,l+8); ++ xor0=t0; ++ xor1=t1; ++ } ++ ++ iv=(unsigned char *)ivec; ++ l2c(xor0,iv); ++ l2c(xor1,iv); ++ } ++ tin0=tin1=tout0=tout1=xor0=xor1=0; ++ tin[0]=tin[1]=0; ++ } ++ ++#endif /* DES_DEFAULT_OPTIONS */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/des_opts.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,620 @@ ++/* crypto/des/des_opts.c */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++/* define PART1, PART2, PART3 or PART4 to build only with a few of the options. ++ * This is for machines with 64k code segment size restrictions. */ ++ ++#ifndef MSDOS ++#define TIMES ++#endif ++ ++#include ++#ifndef MSDOS ++#include ++#else ++#include ++extern void exit(); ++#endif ++#include ++#ifndef VMS ++#ifndef _IRIX ++#include ++#endif ++#ifdef TIMES ++#include ++#include ++#endif ++#else /* VMS */ ++#include ++struct tms { ++ time_t tms_utime; ++ time_t tms_stime; ++ time_t tms_uchild; /* I dunno... */ ++ time_t tms_uchildsys; /* so these names are a guess :-) */ ++ } ++#endif ++#ifndef TIMES ++#include ++#endif ++ ++#ifdef sun ++#include ++#include ++#endif ++ ++#include "des/des_locl.h" ++#include "des/spr.h" ++ ++#define DES_DEFAULT_OPTIONS ++ ++#if !defined(PART1) && !defined(PART2) && !defined(PART3) && !defined(PART4) ++#define PART1 ++#define PART2 ++#define PART3 ++#define PART4 ++#endif ++ ++#ifdef PART1 ++ ++#undef DES_UNROLL ++#undef DES_RISC1 ++#undef DES_RISC2 ++#undef DES_PTR ++#undef D_ENCRYPT ++#define des_encrypt des_encrypt_u4_cisc_idx ++#define des_encrypt2 des_encrypt2_u4_cisc_idx ++#define des_encrypt3 des_encrypt3_u4_cisc_idx ++#define des_decrypt3 des_decrypt3_u4_cisc_idx ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#define DES_UNROLL ++#undef DES_RISC1 ++#undef DES_RISC2 ++#undef DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u16_cisc_idx ++#define des_encrypt2 des_encrypt2_u16_cisc_idx ++#define des_encrypt3 des_encrypt3_u16_cisc_idx ++#define des_decrypt3 des_decrypt3_u16_cisc_idx ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#undef DES_UNROLL ++#define DES_RISC1 ++#undef DES_RISC2 ++#undef DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u4_risc1_idx ++#define des_encrypt2 des_encrypt2_u4_risc1_idx ++#define des_encrypt3 des_encrypt3_u4_risc1_idx ++#define des_decrypt3 des_decrypt3_u4_risc1_idx ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#endif ++ ++#ifdef PART2 ++ ++#undef DES_UNROLL ++#undef DES_RISC1 ++#define DES_RISC2 ++#undef DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u4_risc2_idx ++#define des_encrypt2 des_encrypt2_u4_risc2_idx ++#define des_encrypt3 des_encrypt3_u4_risc2_idx ++#define des_decrypt3 des_decrypt3_u4_risc2_idx ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#define DES_UNROLL ++#define DES_RISC1 ++#undef DES_RISC2 ++#undef DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u16_risc1_idx ++#define des_encrypt2 des_encrypt2_u16_risc1_idx ++#define des_encrypt3 des_encrypt3_u16_risc1_idx ++#define des_decrypt3 des_decrypt3_u16_risc1_idx ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#define DES_UNROLL ++#undef DES_RISC1 ++#define DES_RISC2 ++#undef DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u16_risc2_idx ++#define des_encrypt2 des_encrypt2_u16_risc2_idx ++#define des_encrypt3 des_encrypt3_u16_risc2_idx ++#define des_decrypt3 des_decrypt3_u16_risc2_idx ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#endif ++ ++#ifdef PART3 ++ ++#undef DES_UNROLL ++#undef DES_RISC1 ++#undef DES_RISC2 ++#define DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u4_cisc_ptr ++#define des_encrypt2 des_encrypt2_u4_cisc_ptr ++#define des_encrypt3 des_encrypt3_u4_cisc_ptr ++#define des_decrypt3 des_decrypt3_u4_cisc_ptr ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#define DES_UNROLL ++#undef DES_RISC1 ++#undef DES_RISC2 ++#define DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u16_cisc_ptr ++#define des_encrypt2 des_encrypt2_u16_cisc_ptr ++#define des_encrypt3 des_encrypt3_u16_cisc_ptr ++#define des_decrypt3 des_decrypt3_u16_cisc_ptr ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#undef DES_UNROLL ++#define DES_RISC1 ++#undef DES_RISC2 ++#define DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u4_risc1_ptr ++#define des_encrypt2 des_encrypt2_u4_risc1_ptr ++#define des_encrypt3 des_encrypt3_u4_risc1_ptr ++#define des_decrypt3 des_decrypt3_u4_risc1_ptr ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#endif ++ ++#ifdef PART4 ++ ++#undef DES_UNROLL ++#undef DES_RISC1 ++#define DES_RISC2 ++#define DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u4_risc2_ptr ++#define des_encrypt2 des_encrypt2_u4_risc2_ptr ++#define des_encrypt3 des_encrypt3_u4_risc2_ptr ++#define des_decrypt3 des_decrypt3_u4_risc2_ptr ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#define DES_UNROLL ++#define DES_RISC1 ++#undef DES_RISC2 ++#define DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u16_risc1_ptr ++#define des_encrypt2 des_encrypt2_u16_risc1_ptr ++#define des_encrypt3 des_encrypt3_u16_risc1_ptr ++#define des_decrypt3 des_decrypt3_u16_risc1_ptr ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#define DES_UNROLL ++#undef DES_RISC1 ++#define DES_RISC2 ++#define DES_PTR ++#undef D_ENCRYPT ++#undef des_encrypt ++#undef des_encrypt2 ++#undef des_encrypt3 ++#undef des_decrypt3 ++#define des_encrypt des_encrypt_u16_risc2_ptr ++#define des_encrypt2 des_encrypt2_u16_risc2_ptr ++#define des_encrypt3 des_encrypt3_u16_risc2_ptr ++#define des_decrypt3 des_decrypt3_u16_risc2_ptr ++#undef HEADER_DES_LOCL_H ++#include "des_enc.c" ++ ++#endif ++ ++/* The following if from times(3) man page. It may need to be changed */ ++#ifndef HZ ++# ifndef CLK_TCK ++# ifndef _BSD_CLK_TCK_ /* FreeBSD fix */ ++# ifndef VMS ++# define HZ 100.0 ++# else /* VMS */ ++# define HZ 100.0 ++# endif ++# else /* _BSD_CLK_TCK_ */ ++# define HZ ((double)_BSD_CLK_TCK_) ++# endif ++# else /* CLK_TCK */ ++# define HZ ((double)CLK_TCK) ++# endif ++#endif ++ ++#define BUFSIZE ((long)1024) ++long run=0; ++ ++#ifndef NOPROTO ++double Time_F(int s); ++#else ++double Time_F(); ++#endif ++ ++#ifdef SIGALRM ++#if defined(__STDC__) || defined(sgi) ++#define SIGRETTYPE void ++#else ++#define SIGRETTYPE int ++#endif ++ ++#ifndef NOPROTO ++SIGRETTYPE sig_done(int sig); ++#else ++SIGRETTYPE sig_done(); ++#endif ++ ++SIGRETTYPE sig_done(sig) ++int sig; ++ { ++ signal(SIGALRM,sig_done); ++ run=0; ++#ifdef LINT ++ sig=sig; ++#endif ++ } ++#endif ++ ++#define START 0 ++#define STOP 1 ++ ++double Time_F(s) ++int s; ++ { ++ double ret; ++#ifdef TIMES ++ static struct tms tstart,tend; ++ ++ if (s == START) ++ { ++ times(&tstart); ++ return(0); ++ } ++ else ++ { ++ times(&tend); ++ ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ; ++ return((ret == 0.0)?1e-6:ret); ++ } ++#else /* !times() */ ++ static struct timeb tstart,tend; ++ long i; ++ ++ if (s == START) ++ { ++ ftime(&tstart); ++ return(0); ++ } ++ else ++ { ++ ftime(&tend); ++ i=(long)tend.millitm-(long)tstart.millitm; ++ ret=((double)(tend.time-tstart.time))+((double)i)/1000.0; ++ return((ret == 0.0)?1e-6:ret); ++ } ++#endif ++ } ++ ++#ifdef SIGALRM ++#define print_name(name) fprintf(stderr,"Doing %s's for 10 seconds\n",name); alarm(10); ++#else ++#define print_name(name) fprintf(stderr,"Doing %s %ld times\n",name,cb); ++#endif ++ ++#define time_it(func,name,index) \ ++ print_name(name); \ ++ Time_F(START); \ ++ for (count=0,run=1; COND(cb); count++) \ ++ { \ ++ unsigned long d[2]; \ ++ func(d,&(sch[0]),DES_ENCRYPT); \ ++ } \ ++ tm[index]=Time_F(STOP); \ ++ fprintf(stderr,"%ld %s's in %.2f second\n",count,name,tm[index]); \ ++ tm[index]=((double)COUNT(cb))/tm[index]; ++ ++#define print_it(name,index) \ ++ fprintf(stderr,"%s bytes per sec = %12.2f (%5.1fuS)\n",name, \ ++ tm[index]*8,1.0e6/tm[index]); ++ ++int main(argc,argv) ++int argc; ++char **argv; ++ { ++ long count; ++ static unsigned char buf[BUFSIZE]; ++ static des_cblock key ={0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0}; ++ static des_cblock key2={0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0x12}; ++ static des_cblock key3={0x56,0x78,0x9a,0xbc,0xde,0xf0,0x12,0x34}; ++ des_key_schedule sch,sch2,sch3; ++ double d,tm[16],max=0; ++ int rank[16]; ++ char *str[16]; ++ int max_idx=0,i,num=0,j; ++#ifndef SIGALARM ++ long ca,cb,cc,cd,ce; ++#endif ++ ++ for (i=0; i<12; i++) ++ { ++ tm[i]=0.0; ++ rank[i]=0; ++ } ++ ++#ifndef TIMES ++ fprintf(stderr,"To get the most acurate results, try to run this\n"); ++ fprintf(stderr,"program when this computer is idle.\n"); ++#endif ++ ++ des_set_key((C_Block *)key,sch); ++ des_set_key((C_Block *)key2,sch2); ++ des_set_key((C_Block *)key3,sch3); ++ ++#ifndef SIGALRM ++ fprintf(stderr,"First we calculate the approximate speed ...\n"); ++ des_set_key((C_Block *)key,sch); ++ count=10; ++ do { ++ long i; ++ unsigned long data[2]; ++ ++ count*=2; ++ Time_F(START); ++ for (i=count; i; i--) ++ des_encrypt(data,&(sch[0]),DES_ENCRYPT); ++ d=Time_F(STOP); ++ } while (d < 3.0); ++ ca=count; ++ cb=count*3; ++ cc=count*3*8/BUFSIZE+1; ++ cd=count*8/BUFSIZE+1; ++ ++ ce=count/20+1; ++#define COND(d) (count != (d)) ++#define COUNT(d) (d) ++#else ++#define COND(c) (run) ++#define COUNT(d) (count) ++ signal(SIGALRM,sig_done); ++ alarm(10); ++#endif ++ ++#ifdef PART1 ++ time_it(des_encrypt_u4_cisc_idx, "des_encrypt_u4_cisc_idx ", 0); ++ time_it(des_encrypt_u16_cisc_idx, "des_encrypt_u16_cisc_idx ", 1); ++ time_it(des_encrypt_u4_risc1_idx, "des_encrypt_u4_risc1_idx ", 2); ++ num+=3; ++#endif ++#ifdef PART2 ++ time_it(des_encrypt_u16_risc1_idx,"des_encrypt_u16_risc1_idx", 3); ++ time_it(des_encrypt_u4_risc2_idx, "des_encrypt_u4_risc2_idx ", 4); ++ time_it(des_encrypt_u16_risc2_idx,"des_encrypt_u16_risc2_idx", 5); ++ num+=3; ++#endif ++#ifdef PART3 ++ time_it(des_encrypt_u4_cisc_ptr, "des_encrypt_u4_cisc_ptr ", 6); ++ time_it(des_encrypt_u16_cisc_ptr, "des_encrypt_u16_cisc_ptr ", 7); ++ time_it(des_encrypt_u4_risc1_ptr, "des_encrypt_u4_risc1_ptr ", 8); ++ num+=3; ++#endif ++#ifdef PART4 ++ time_it(des_encrypt_u16_risc1_ptr,"des_encrypt_u16_risc1_ptr", 9); ++ time_it(des_encrypt_u4_risc2_ptr, "des_encrypt_u4_risc2_ptr ",10); ++ time_it(des_encrypt_u16_risc2_ptr,"des_encrypt_u16_risc2_ptr",11); ++ num+=3; ++#endif ++ ++#ifdef PART1 ++ str[0]=" 4 c i"; ++ print_it("des_encrypt_u4_cisc_idx ",0); ++ max=tm[0]; ++ max_idx=0; ++ str[1]="16 c i"; ++ print_it("des_encrypt_u16_cisc_idx ",1); ++ if (max < tm[1]) { max=tm[1]; max_idx=1; } ++ str[2]=" 4 r1 i"; ++ print_it("des_encrypt_u4_risc1_idx ",2); ++ if (max < tm[2]) { max=tm[2]; max_idx=2; } ++#endif ++#ifdef PART2 ++ str[3]="16 r1 i"; ++ print_it("des_encrypt_u16_risc1_idx",3); ++ if (max < tm[3]) { max=tm[3]; max_idx=3; } ++ str[4]=" 4 r2 i"; ++ print_it("des_encrypt_u4_risc2_idx ",4); ++ if (max < tm[4]) { max=tm[4]; max_idx=4; } ++ str[5]="16 r2 i"; ++ print_it("des_encrypt_u16_risc2_idx",5); ++ if (max < tm[5]) { max=tm[5]; max_idx=5; } ++#endif ++#ifdef PART3 ++ str[6]=" 4 c p"; ++ print_it("des_encrypt_u4_cisc_ptr ",6); ++ if (max < tm[6]) { max=tm[6]; max_idx=6; } ++ str[7]="16 c p"; ++ print_it("des_encrypt_u16_cisc_ptr ",7); ++ if (max < tm[7]) { max=tm[7]; max_idx=7; } ++ str[8]=" 4 r1 p"; ++ print_it("des_encrypt_u4_risc1_ptr ",8); ++ if (max < tm[8]) { max=tm[8]; max_idx=8; } ++#endif ++#ifdef PART4 ++ str[9]="16 r1 p"; ++ print_it("des_encrypt_u16_risc1_ptr",9); ++ if (max < tm[9]) { max=tm[9]; max_idx=9; } ++ str[10]=" 4 r2 p"; ++ print_it("des_encrypt_u4_risc2_ptr ",10); ++ if (max < tm[10]) { max=tm[10]; max_idx=10; } ++ str[11]="16 r2 p"; ++ print_it("des_encrypt_u16_risc2_ptr",11); ++ if (max < tm[11]) { max=tm[11]; max_idx=11; } ++#endif ++ printf("options des ecb/s\n"); ++ printf("%s %12.2f 100.0%%\n",str[max_idx],tm[max_idx]); ++ d=tm[max_idx]; ++ tm[max_idx]= -2.0; ++ max= -1.0; ++ for (;;) ++ { ++ for (i=0; i<12; i++) ++ { ++ if (max < tm[i]) { max=tm[i]; j=i; } ++ } ++ if (max < 0.0) break; ++ printf("%s %12.2f %4.1f%%\n",str[j],tm[j],tm[j]/d*100.0); ++ tm[j]= -2.0; ++ max= -1.0; ++ } ++ ++ switch (max_idx) ++ { ++ case 0: ++ printf("-DDES_DEFAULT_OPTIONS\n"); ++ break; ++ case 1: ++ printf("-DDES_UNROLL\n"); ++ break; ++ case 2: ++ printf("-DDES_RISC1\n"); ++ break; ++ case 3: ++ printf("-DDES_UNROLL -DDES_RISC1\n"); ++ break; ++ case 4: ++ printf("-DDES_RISC2\n"); ++ break; ++ case 5: ++ printf("-DDES_UNROLL -DDES_RISC2\n"); ++ break; ++ case 6: ++ printf("-DDES_PTR\n"); ++ break; ++ case 7: ++ printf("-DDES_UNROLL -DDES_PTR\n"); ++ break; ++ case 8: ++ printf("-DDES_RISC1 -DDES_PTR\n"); ++ break; ++ case 9: ++ printf("-DDES_UNROLL -DDES_RISC1 -DDES_PTR\n"); ++ break; ++ case 10: ++ printf("-DDES_RISC2 -DDES_PTR\n"); ++ break; ++ case 11: ++ printf("-DDES_UNROLL -DDES_RISC2 -DDES_PTR\n"); ++ break; ++ } ++ exit(0); ++#if defined(LINT) || defined(MSDOS) ++ return(0); ++#endif ++ } +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/dx86unix.S Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,3160 @@ ++/* ++ * This file was originally generated by Michael Richardson ++ * via the perl scripts found in the ASM subdir. It remains copyright of ++ * Eric Young, see the file COPYRIGHT. ++ * ++ * This was last done on October 9, 2002. ++ * ++ * While this file does not need to go through cpp, we pass it through ++ * CPP by naming it dx86unix.S instead of dx86unix.s because there is ++ * a bug in Rules.make for .s builds - specifically it references EXTRA_CFLAGS ++ * which may contain stuff that AS doesn't understand instead of ++ * referencing EXTRA_AFLAGS. ++ */ ++ ++ .file "dx86unix.S" ++ .version "01.01" ++.text ++ .align 16 ++.globl des_encrypt ++ .type des_encrypt , @function ++des_encrypt: ++ pushl %esi ++ pushl %edi ++ ++ ++ movl 12(%esp), %esi ++ xorl %ecx, %ecx ++ pushl %ebx ++ pushl %ebp ++ movl (%esi), %eax ++ movl 28(%esp), %ebx ++ movl 4(%esi), %edi ++ ++ ++ roll $4, %eax ++ movl %eax, %esi ++ xorl %edi, %eax ++ andl $0xf0f0f0f0, %eax ++ xorl %eax, %esi ++ xorl %eax, %edi ++ ++ roll $20, %edi ++ movl %edi, %eax ++ xorl %esi, %edi ++ andl $0xfff0000f, %edi ++ xorl %edi, %eax ++ xorl %edi, %esi ++ ++ roll $14, %eax ++ movl %eax, %edi ++ xorl %esi, %eax ++ andl $0x33333333, %eax ++ xorl %eax, %edi ++ xorl %eax, %esi ++ ++ roll $22, %esi ++ movl %esi, %eax ++ xorl %edi, %esi ++ andl $0x03fc03fc, %esi ++ xorl %esi, %eax ++ xorl %esi, %edi ++ ++ roll $9, %eax ++ movl %eax, %esi ++ xorl %edi, %eax ++ andl $0xaaaaaaaa, %eax ++ xorl %eax, %esi ++ xorl %eax, %edi ++ ++.byte 209 ++.byte 199 ++ movl 24(%esp), %ebp ++ cmpl $0, %ebx ++ je .L000start_decrypt ++ ++ ++ movl (%ebp), %eax ++ xorl %ebx, %ebx ++ movl 4(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 8(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 12(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 16(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 20(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 24(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 28(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 32(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 36(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 40(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 44(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 48(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 52(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 56(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 60(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 64(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 68(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 72(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 76(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 80(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 84(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 88(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 92(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 96(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 100(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 104(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 108(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 112(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 116(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 120(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 124(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ jmp .L001end ++.L000start_decrypt: ++ ++ ++ movl 120(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 124(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 112(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 116(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 104(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 108(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 96(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 100(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 88(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 92(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 80(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 84(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 72(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 76(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 64(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 68(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 56(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 60(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 48(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 52(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 40(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 44(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 32(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 36(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 24(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 28(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 16(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 20(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 8(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 12(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl (%ebp), %eax ++ xorl %ebx, %ebx ++ movl 4(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++.L001end: ++ ++ ++ movl 20(%esp), %edx ++.byte 209 ++.byte 206 ++ movl %edi, %eax ++ xorl %esi, %edi ++ andl $0xaaaaaaaa, %edi ++ xorl %edi, %eax ++ xorl %edi, %esi ++ ++ roll $23, %eax ++ movl %eax, %edi ++ xorl %esi, %eax ++ andl $0x03fc03fc, %eax ++ xorl %eax, %edi ++ xorl %eax, %esi ++ ++ roll $10, %edi ++ movl %edi, %eax ++ xorl %esi, %edi ++ andl $0x33333333, %edi ++ xorl %edi, %eax ++ xorl %edi, %esi ++ ++ roll $18, %esi ++ movl %esi, %edi ++ xorl %eax, %esi ++ andl $0xfff0000f, %esi ++ xorl %esi, %edi ++ xorl %esi, %eax ++ ++ roll $12, %edi ++ movl %edi, %esi ++ xorl %eax, %edi ++ andl $0xf0f0f0f0, %edi ++ xorl %edi, %esi ++ xorl %edi, %eax ++ ++ rorl $4, %eax ++ movl %eax, (%edx) ++ movl %esi, 4(%edx) ++ popl %ebp ++ popl %ebx ++ popl %edi ++ popl %esi ++ ret ++.des_encrypt_end: ++ .size des_encrypt , .des_encrypt_end-des_encrypt ++.ident "desasm.pl" ++.text ++ .align 16 ++.globl des_encrypt2 ++ .type des_encrypt2 , @function ++des_encrypt2: ++ pushl %esi ++ pushl %edi ++ ++ ++ movl 12(%esp), %eax ++ xorl %ecx, %ecx ++ pushl %ebx ++ pushl %ebp ++ movl (%eax), %esi ++ movl 28(%esp), %ebx ++ roll $3, %esi ++ movl 4(%eax), %edi ++ roll $3, %edi ++ movl 24(%esp), %ebp ++ cmpl $0, %ebx ++ je .L002start_decrypt ++ ++ ++ movl (%ebp), %eax ++ xorl %ebx, %ebx ++ movl 4(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 8(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 12(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 16(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 20(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 24(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 28(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 32(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 36(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 40(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 44(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 48(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 52(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 56(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 60(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 64(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 68(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 72(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 76(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 80(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 84(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 88(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 92(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 96(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 100(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 104(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 108(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 112(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 116(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 120(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 124(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ jmp .L003end ++.L002start_decrypt: ++ ++ ++ movl 120(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 124(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 112(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 116(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 104(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 108(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 96(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 100(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 88(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 92(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 80(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 84(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 72(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 76(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 64(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 68(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 56(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 60(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 48(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 52(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 40(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 44(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 32(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 36(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 24(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 28(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl 16(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 20(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++ ++ ++ movl 8(%ebp), %eax ++ xorl %ebx, %ebx ++ movl 12(%ebp), %edx ++ xorl %esi, %eax ++ xorl %esi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %edi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %edi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %edi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %edi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %edi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %edi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %edi ++ ++ ++ movl (%ebp), %eax ++ xorl %ebx, %ebx ++ movl 4(%ebp), %edx ++ xorl %edi, %eax ++ xorl %edi, %edx ++ andl $0xfcfcfcfc, %eax ++ andl $0xcfcfcfcf, %edx ++ movb %al, %bl ++ movb %ah, %cl ++ rorl $4, %edx ++ movl des_SPtrans(%ebx),%ebp ++ movb %dl, %bl ++ xorl %ebp, %esi ++ movl 0x200+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movb %dh, %cl ++ shrl $16, %eax ++ movl 0x100+des_SPtrans(%ebx),%ebp ++ xorl %ebp, %esi ++ movb %ah, %bl ++ shrl $16, %edx ++ movl 0x300+des_SPtrans(%ecx),%ebp ++ xorl %ebp, %esi ++ movl 24(%esp), %ebp ++ movb %dh, %cl ++ andl $0xff, %eax ++ andl $0xff, %edx ++ movl 0x600+des_SPtrans(%ebx),%ebx ++ xorl %ebx, %esi ++ movl 0x700+des_SPtrans(%ecx),%ebx ++ xorl %ebx, %esi ++ movl 0x400+des_SPtrans(%eax),%ebx ++ xorl %ebx, %esi ++ movl 0x500+des_SPtrans(%edx),%ebx ++ xorl %ebx, %esi ++.L003end: ++ ++ ++ rorl $3, %edi ++ movl 20(%esp), %eax ++ rorl $3, %esi ++ movl %edi, (%eax) ++ movl %esi, 4(%eax) ++ popl %ebp ++ popl %ebx ++ popl %edi ++ popl %esi ++ ret ++.des_encrypt2_end: ++ .size des_encrypt2 , .des_encrypt2_end-des_encrypt2 ++.ident "desasm.pl" ++.text ++ .align 16 ++.globl des_encrypt3 ++ .type des_encrypt3 , @function ++des_encrypt3: ++ pushl %ebx ++ movl 8(%esp), %ebx ++ pushl %ebp ++ pushl %esi ++ pushl %edi ++ ++ ++ movl (%ebx), %edi ++ movl 4(%ebx), %esi ++ subl $12, %esp ++ ++ ++ roll $4, %edi ++ movl %edi, %edx ++ xorl %esi, %edi ++ andl $0xf0f0f0f0, %edi ++ xorl %edi, %edx ++ xorl %edi, %esi ++ ++ roll $20, %esi ++ movl %esi, %edi ++ xorl %edx, %esi ++ andl $0xfff0000f, %esi ++ xorl %esi, %edi ++ xorl %esi, %edx ++ ++ roll $14, %edi ++ movl %edi, %esi ++ xorl %edx, %edi ++ andl $0x33333333, %edi ++ xorl %edi, %esi ++ xorl %edi, %edx ++ ++ roll $22, %edx ++ movl %edx, %edi ++ xorl %esi, %edx ++ andl $0x03fc03fc, %edx ++ xorl %edx, %edi ++ xorl %edx, %esi ++ ++ roll $9, %edi ++ movl %edi, %edx ++ xorl %esi, %edi ++ andl $0xaaaaaaaa, %edi ++ xorl %edi, %edx ++ xorl %edi, %esi ++ ++ rorl $3, %edx ++ rorl $2, %esi ++ movl %esi, 4(%ebx) ++ movl 36(%esp), %eax ++ movl %edx, (%ebx) ++ movl 40(%esp), %edi ++ movl 44(%esp), %esi ++ movl $1, 8(%esp) ++ movl %eax, 4(%esp) ++ movl %ebx, (%esp) ++ call des_encrypt2 ++ movl $0, 8(%esp) ++ movl %edi, 4(%esp) ++ movl %ebx, (%esp) ++ call des_encrypt2 ++ movl $1, 8(%esp) ++ movl %esi, 4(%esp) ++ movl %ebx, (%esp) ++ call des_encrypt2 ++ addl $12, %esp ++ movl (%ebx), %edi ++ movl 4(%ebx), %esi ++ ++ ++ roll $2, %esi ++ roll $3, %edi ++ movl %edi, %eax ++ xorl %esi, %edi ++ andl $0xaaaaaaaa, %edi ++ xorl %edi, %eax ++ xorl %edi, %esi ++ ++ roll $23, %eax ++ movl %eax, %edi ++ xorl %esi, %eax ++ andl $0x03fc03fc, %eax ++ xorl %eax, %edi ++ xorl %eax, %esi ++ ++ roll $10, %edi ++ movl %edi, %eax ++ xorl %esi, %edi ++ andl $0x33333333, %edi ++ xorl %edi, %eax ++ xorl %edi, %esi ++ ++ roll $18, %esi ++ movl %esi, %edi ++ xorl %eax, %esi ++ andl $0xfff0000f, %esi ++ xorl %esi, %edi ++ xorl %esi, %eax ++ ++ roll $12, %edi ++ movl %edi, %esi ++ xorl %eax, %edi ++ andl $0xf0f0f0f0, %edi ++ xorl %edi, %esi ++ xorl %edi, %eax ++ ++ rorl $4, %eax ++ movl %eax, (%ebx) ++ movl %esi, 4(%ebx) ++ popl %edi ++ popl %esi ++ popl %ebp ++ popl %ebx ++ ret ++.des_encrypt3_end: ++ .size des_encrypt3 , .des_encrypt3_end-des_encrypt3 ++.ident "desasm.pl" ++.text ++ .align 16 ++.globl des_decrypt3 ++ .type des_decrypt3 , @function ++des_decrypt3: ++ pushl %ebx ++ movl 8(%esp), %ebx ++ pushl %ebp ++ pushl %esi ++ pushl %edi ++ ++ ++ movl (%ebx), %edi ++ movl 4(%ebx), %esi ++ subl $12, %esp ++ ++ ++ roll $4, %edi ++ movl %edi, %edx ++ xorl %esi, %edi ++ andl $0xf0f0f0f0, %edi ++ xorl %edi, %edx ++ xorl %edi, %esi ++ ++ roll $20, %esi ++ movl %esi, %edi ++ xorl %edx, %esi ++ andl $0xfff0000f, %esi ++ xorl %esi, %edi ++ xorl %esi, %edx ++ ++ roll $14, %edi ++ movl %edi, %esi ++ xorl %edx, %edi ++ andl $0x33333333, %edi ++ xorl %edi, %esi ++ xorl %edi, %edx ++ ++ roll $22, %edx ++ movl %edx, %edi ++ xorl %esi, %edx ++ andl $0x03fc03fc, %edx ++ xorl %edx, %edi ++ xorl %edx, %esi ++ ++ roll $9, %edi ++ movl %edi, %edx ++ xorl %esi, %edi ++ andl $0xaaaaaaaa, %edi ++ xorl %edi, %edx ++ xorl %edi, %esi ++ ++ rorl $3, %edx ++ rorl $2, %esi ++ movl %esi, 4(%ebx) ++ movl 36(%esp), %esi ++ movl %edx, (%ebx) ++ movl 40(%esp), %edi ++ movl 44(%esp), %eax ++ movl $0, 8(%esp) ++ movl %eax, 4(%esp) ++ movl %ebx, (%esp) ++ call des_encrypt2 ++ movl $1, 8(%esp) ++ movl %edi, 4(%esp) ++ movl %ebx, (%esp) ++ call des_encrypt2 ++ movl $0, 8(%esp) ++ movl %esi, 4(%esp) ++ movl %ebx, (%esp) ++ call des_encrypt2 ++ addl $12, %esp ++ movl (%ebx), %edi ++ movl 4(%ebx), %esi ++ ++ ++ roll $2, %esi ++ roll $3, %edi ++ movl %edi, %eax ++ xorl %esi, %edi ++ andl $0xaaaaaaaa, %edi ++ xorl %edi, %eax ++ xorl %edi, %esi ++ ++ roll $23, %eax ++ movl %eax, %edi ++ xorl %esi, %eax ++ andl $0x03fc03fc, %eax ++ xorl %eax, %edi ++ xorl %eax, %esi ++ ++ roll $10, %edi ++ movl %edi, %eax ++ xorl %esi, %edi ++ andl $0x33333333, %edi ++ xorl %edi, %eax ++ xorl %edi, %esi ++ ++ roll $18, %esi ++ movl %esi, %edi ++ xorl %eax, %esi ++ andl $0xfff0000f, %esi ++ xorl %esi, %edi ++ xorl %esi, %eax ++ ++ roll $12, %edi ++ movl %edi, %esi ++ xorl %eax, %edi ++ andl $0xf0f0f0f0, %edi ++ xorl %edi, %esi ++ xorl %edi, %eax ++ ++ rorl $4, %eax ++ movl %eax, (%ebx) ++ movl %esi, 4(%ebx) ++ popl %edi ++ popl %esi ++ popl %ebp ++ popl %ebx ++ ret ++.des_decrypt3_end: ++ .size des_decrypt3 , .des_decrypt3_end-des_decrypt3 ++.ident "desasm.pl" ++.text ++ .align 16 ++.globl des_ncbc_encrypt ++ .type des_ncbc_encrypt , @function ++des_ncbc_encrypt: ++ ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 28(%esp), %ebp ++ ++ movl 36(%esp), %ebx ++ movl (%ebx), %esi ++ movl 4(%ebx), %edi ++ pushl %edi ++ pushl %esi ++ pushl %edi ++ pushl %esi ++ movl %esp, %ebx ++ movl 36(%esp), %esi ++ movl 40(%esp), %edi ++ ++ movl 56(%esp), %ecx ++ ++ pushl %ecx ++ ++ movl 52(%esp), %eax ++ pushl %eax ++ pushl %ebx ++ cmpl $0, %ecx ++ jz .L004decrypt ++ andl $4294967288, %ebp ++ movl 12(%esp), %eax ++ movl 16(%esp), %ebx ++ jz .L005encrypt_finish ++.L006encrypt_loop: ++ movl (%esi), %ecx ++ movl 4(%esi), %edx ++ xorl %ecx, %eax ++ xorl %edx, %ebx ++ movl %eax, 12(%esp) ++ movl %ebx, 16(%esp) ++ call des_encrypt ++ movl 12(%esp), %eax ++ movl 16(%esp), %ebx ++ movl %eax, (%edi) ++ movl %ebx, 4(%edi) ++ addl $8, %esi ++ addl $8, %edi ++ subl $8, %ebp ++ jnz .L006encrypt_loop ++.L005encrypt_finish: ++ movl 56(%esp), %ebp ++ andl $7, %ebp ++ jz .L007finish ++ xorl %ecx, %ecx ++ xorl %edx, %edx ++ movl .L008cbc_enc_jmp_table(,%ebp,4),%ebp ++ jmp *%ebp ++.L009ej7: ++ movb 6(%esi), %dh ++ sall $8, %edx ++.L010ej6: ++ movb 5(%esi), %dh ++.L011ej5: ++ movb 4(%esi), %dl ++.L012ej4: ++ movl (%esi), %ecx ++ jmp .L013ejend ++.L014ej3: ++ movb 2(%esi), %ch ++ sall $8, %ecx ++.L015ej2: ++ movb 1(%esi), %ch ++.L016ej1: ++ movb (%esi), %cl ++.L013ejend: ++ xorl %ecx, %eax ++ xorl %edx, %ebx ++ movl %eax, 12(%esp) ++ movl %ebx, 16(%esp) ++ call des_encrypt ++ movl 12(%esp), %eax ++ movl 16(%esp), %ebx ++ movl %eax, (%edi) ++ movl %ebx, 4(%edi) ++ jmp .L007finish ++.align 16 ++.L004decrypt: ++ andl $4294967288, %ebp ++ movl 20(%esp), %eax ++ movl 24(%esp), %ebx ++ jz .L017decrypt_finish ++.L018decrypt_loop: ++ movl (%esi), %eax ++ movl 4(%esi), %ebx ++ movl %eax, 12(%esp) ++ movl %ebx, 16(%esp) ++ call des_encrypt ++ movl 12(%esp), %eax ++ movl 16(%esp), %ebx ++ movl 20(%esp), %ecx ++ movl 24(%esp), %edx ++ xorl %eax, %ecx ++ xorl %ebx, %edx ++ movl (%esi), %eax ++ movl 4(%esi), %ebx ++ movl %ecx, (%edi) ++ movl %edx, 4(%edi) ++ movl %eax, 20(%esp) ++ movl %ebx, 24(%esp) ++ addl $8, %esi ++ addl $8, %edi ++ subl $8, %ebp ++ jnz .L018decrypt_loop ++.L017decrypt_finish: ++ movl 56(%esp), %ebp ++ andl $7, %ebp ++ jz .L007finish ++ movl (%esi), %eax ++ movl 4(%esi), %ebx ++ movl %eax, 12(%esp) ++ movl %ebx, 16(%esp) ++ call des_encrypt ++ movl 12(%esp), %eax ++ movl 16(%esp), %ebx ++ movl 20(%esp), %ecx ++ movl 24(%esp), %edx ++ xorl %eax, %ecx ++ xorl %ebx, %edx ++ movl (%esi), %eax ++ movl 4(%esi), %ebx ++.L019dj7: ++ rorl $16, %edx ++ movb %dl, 6(%edi) ++ shrl $16, %edx ++.L020dj6: ++ movb %dh, 5(%edi) ++.L021dj5: ++ movb %dl, 4(%edi) ++.L022dj4: ++ movl %ecx, (%edi) ++ jmp .L023djend ++.L024dj3: ++ rorl $16, %ecx ++ movb %cl, 2(%edi) ++ sall $16, %ecx ++.L025dj2: ++ movb %ch, 1(%esi) ++.L026dj1: ++ movb %cl, (%esi) ++.L023djend: ++ jmp .L007finish ++.align 16 ++.L007finish: ++ movl 64(%esp), %ecx ++ addl $28, %esp ++ movl %eax, (%ecx) ++ movl %ebx, 4(%ecx) ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 16 ++.L008cbc_enc_jmp_table: ++ .long 0 ++ .long .L016ej1 ++ .long .L015ej2 ++ .long .L014ej3 ++ .long .L012ej4 ++ .long .L011ej5 ++ .long .L010ej6 ++ .long .L009ej7 ++.align 16 ++.L027cbc_dec_jmp_table: ++ .long 0 ++ .long .L026dj1 ++ .long .L025dj2 ++ .long .L024dj3 ++ .long .L022dj4 ++ .long .L021dj5 ++ .long .L020dj6 ++ .long .L019dj7 ++.des_ncbc_encrypt_end: ++ .size des_ncbc_encrypt , .des_ncbc_encrypt_end-des_ncbc_encrypt ++.ident "desasm.pl" ++.text ++ .align 16 ++.globl des_ede3_cbc_encrypt ++ .type des_ede3_cbc_encrypt , @function ++des_ede3_cbc_encrypt: ++ ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 28(%esp), %ebp ++ ++ movl 44(%esp), %ebx ++ movl (%ebx), %esi ++ movl 4(%ebx), %edi ++ pushl %edi ++ pushl %esi ++ pushl %edi ++ pushl %esi ++ movl %esp, %ebx ++ movl 36(%esp), %esi ++ movl 40(%esp), %edi ++ ++ movl 64(%esp), %ecx ++ ++ movl 56(%esp), %eax ++ pushl %eax ++ ++ movl 56(%esp), %eax ++ pushl %eax ++ ++ movl 56(%esp), %eax ++ pushl %eax ++ pushl %ebx ++ cmpl $0, %ecx ++ jz .L028decrypt ++ andl $4294967288, %ebp ++ movl 16(%esp), %eax ++ movl 20(%esp), %ebx ++ jz .L029encrypt_finish ++.L030encrypt_loop: ++ movl (%esi), %ecx ++ movl 4(%esi), %edx ++ xorl %ecx, %eax ++ xorl %edx, %ebx ++ movl %eax, 16(%esp) ++ movl %ebx, 20(%esp) ++ call des_encrypt3 ++ movl 16(%esp), %eax ++ movl 20(%esp), %ebx ++ movl %eax, (%edi) ++ movl %ebx, 4(%edi) ++ addl $8, %esi ++ addl $8, %edi ++ subl $8, %ebp ++ jnz .L030encrypt_loop ++.L029encrypt_finish: ++ movl 60(%esp), %ebp ++ andl $7, %ebp ++ jz .L031finish ++ xorl %ecx, %ecx ++ xorl %edx, %edx ++ movl .L032cbc_enc_jmp_table(,%ebp,4),%ebp ++ jmp *%ebp ++.L033ej7: ++ movb 6(%esi), %dh ++ sall $8, %edx ++.L034ej6: ++ movb 5(%esi), %dh ++.L035ej5: ++ movb 4(%esi), %dl ++.L036ej4: ++ movl (%esi), %ecx ++ jmp .L037ejend ++.L038ej3: ++ movb 2(%esi), %ch ++ sall $8, %ecx ++.L039ej2: ++ movb 1(%esi), %ch ++.L040ej1: ++ movb (%esi), %cl ++.L037ejend: ++ xorl %ecx, %eax ++ xorl %edx, %ebx ++ movl %eax, 16(%esp) ++ movl %ebx, 20(%esp) ++ call des_encrypt3 ++ movl 16(%esp), %eax ++ movl 20(%esp), %ebx ++ movl %eax, (%edi) ++ movl %ebx, 4(%edi) ++ jmp .L031finish ++.align 16 ++.L028decrypt: ++ andl $4294967288, %ebp ++ movl 24(%esp), %eax ++ movl 28(%esp), %ebx ++ jz .L041decrypt_finish ++.L042decrypt_loop: ++ movl (%esi), %eax ++ movl 4(%esi), %ebx ++ movl %eax, 16(%esp) ++ movl %ebx, 20(%esp) ++ call des_decrypt3 ++ movl 16(%esp), %eax ++ movl 20(%esp), %ebx ++ movl 24(%esp), %ecx ++ movl 28(%esp), %edx ++ xorl %eax, %ecx ++ xorl %ebx, %edx ++ movl (%esi), %eax ++ movl 4(%esi), %ebx ++ movl %ecx, (%edi) ++ movl %edx, 4(%edi) ++ movl %eax, 24(%esp) ++ movl %ebx, 28(%esp) ++ addl $8, %esi ++ addl $8, %edi ++ subl $8, %ebp ++ jnz .L042decrypt_loop ++.L041decrypt_finish: ++ movl 60(%esp), %ebp ++ andl $7, %ebp ++ jz .L031finish ++ movl (%esi), %eax ++ movl 4(%esi), %ebx ++ movl %eax, 16(%esp) ++ movl %ebx, 20(%esp) ++ call des_decrypt3 ++ movl 16(%esp), %eax ++ movl 20(%esp), %ebx ++ movl 24(%esp), %ecx ++ movl 28(%esp), %edx ++ xorl %eax, %ecx ++ xorl %ebx, %edx ++ movl (%esi), %eax ++ movl 4(%esi), %ebx ++.L043dj7: ++ rorl $16, %edx ++ movb %dl, 6(%edi) ++ shrl $16, %edx ++.L044dj6: ++ movb %dh, 5(%edi) ++.L045dj5: ++ movb %dl, 4(%edi) ++.L046dj4: ++ movl %ecx, (%edi) ++ jmp .L047djend ++.L048dj3: ++ rorl $16, %ecx ++ movb %cl, 2(%edi) ++ sall $16, %ecx ++.L049dj2: ++ movb %ch, 1(%esi) ++.L050dj1: ++ movb %cl, (%esi) ++.L047djend: ++ jmp .L031finish ++.align 16 ++.L031finish: ++ movl 76(%esp), %ecx ++ addl $32, %esp ++ movl %eax, (%ecx) ++ movl %ebx, 4(%ecx) ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 16 ++.L032cbc_enc_jmp_table: ++ .long 0 ++ .long .L040ej1 ++ .long .L039ej2 ++ .long .L038ej3 ++ .long .L036ej4 ++ .long .L035ej5 ++ .long .L034ej6 ++ .long .L033ej7 ++.align 16 ++.L051cbc_dec_jmp_table: ++ .long 0 ++ .long .L050dj1 ++ .long .L049dj2 ++ .long .L048dj3 ++ .long .L046dj4 ++ .long .L045dj5 ++ .long .L044dj6 ++ .long .L043dj7 ++.des_ede3_cbc_encrypt_end: ++ .size des_ede3_cbc_encrypt , .des_ede3_cbc_encrypt_end-des_ede3_cbc_encrypt ++.ident "desasm.pl" +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/ecb_enc.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,128 @@ ++/* crypto/des/ecb_enc.c */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++#include "des/des_locl.h" ++#include "des/spr.h" ++ ++char *libdes_version="libdes v 3.24 - 20-Apr-1996 - eay"; ++char *DES_version="DES part of SSLeay 0.8.2b 08-Jan-1998"; ++ ++/* RCSID $Id: ecb_enc.c,v 1.8 2004-08-04 15:57:22 mcr Exp $ */ ++/* This function ifdef'ed out for FreeS/WAN project. */ ++#ifdef notdef ++char *des_options() ++ { ++ static int init=1; ++ static char buf[32]; ++ ++ if (init) ++ { ++ char *ptr,*unroll,*risc,*size; ++ ++ init=0; ++#ifdef DES_PTR ++ ptr="ptr"; ++#else ++ ptr="idx"; ++#endif ++#if defined(DES_RISC1) || defined(DES_RISC2) ++#ifdef DES_RISC1 ++ risc="risc1"; ++#endif ++#ifdef DES_RISC2 ++ risc="risc2"; ++#endif ++#else ++ risc="cisc"; ++#endif ++#ifdef DES_UNROLL ++ unroll="16"; ++#else ++ unroll="4"; ++#endif ++ if (sizeof(DES_LONG) != sizeof(long)) ++ size="int"; ++ else ++ size="long"; ++ sprintf(buf,"des(%s,%s,%s,%s)",ptr,risc,unroll,size); ++ } ++ return(buf); ++ } ++#endif ++ ++ ++void des_ecb_encrypt(input, output, ks, enc) ++des_cblock (*input); ++des_cblock (*output); ++des_key_schedule ks; ++int enc; ++ { ++ register DES_LONG l; ++ register unsigned char *in,*out; ++ DES_LONG ll[2]; ++ ++ in=(unsigned char *)input; ++ out=(unsigned char *)output; ++ c2l(in,l); ll[0]=l; ++ c2l(in,l); ll[1]=l; ++ des_encrypt(ll,ks,enc); ++ l=ll[0]; l2c(l,out); ++ l=ll[1]; l2c(l,out); ++ l=ll[0]=ll[1]=0; ++ } ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/ipsec_alg_3des.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,181 @@ ++/* ++ * ipsec_alg 3DES cipher stubs ++ * ++ * Copyright (C) 2005 Michael Richardson ++ * ++ * Adapted from ipsec_alg_aes.c by JuanJo Ciarlante ++ * ++ * ipsec_alg_aes.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ */ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++/* ++ * special case: ipsec core modular with this static algo inside: ++ * must avoid MODULE magic for this file ++ */ ++#if defined(CONFIG_KLIPS_MODULE) && defined(CONFIG_KLIPS_ENC_3DES) ++#undef MODULE ++#endif ++ ++#include ++#include ++ ++#include /* printk() */ ++#include /* error codes */ ++#include /* size_t */ ++#include ++ ++/* Low freeswan header coupling */ ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_alg.h" ++#include "crypto/des.h" ++#include "openswan/ipsec_alg_3des.h" ++ ++#define AES_CONTEXT_T aes_context ++static int debug_3des=0; ++static int test_3des=0; ++static int excl_3des=0; ++ ++#if defined(CONFIG_KLIPS_ENC_3DES_MODULE) ++MODULE_AUTHOR("Michael Richardson "); ++#ifdef module_param ++module_param(debug_3des,int,0600) ++module_param(test_des,int,0600) ++module_param(excl_des,int,0600) ++#else ++MODULE_PARM(debug_3des, "i"); ++MODULE_PARM(test_des, "i"); ++MODULE_PARM(excl_des, "i"); ++#endif ++#endif ++ ++#define ESP_AES_MAC_KEY_SZ 16 /* 128 bit MAC key */ ++#define ESP_AES_MAC_BLK_LEN 16 /* 128 bit block */ ++ ++static int _3des_set_key(struct ipsec_alg_enc *alg, ++ __u8 * key_e, const __u8 * key, ++ size_t keysize) ++{ ++ int ret = 0; ++ TripleDES_context *ctx = (TripleDES_context*)key_e; ++ ++ if(keysize != 192/8) { ++ return EINVAL; ++ } ++ ++ des_set_key((des_cblock *)(key + DES_KEY_SZ*0), ctx->s1); ++ des_set_key((des_cblock *)(key + DES_KEY_SZ*1), ctx->s2); ++ des_set_key((des_cblock *)(key + DES_KEY_SZ*2), ctx->s3); ++ ++ if (debug_3des > 0) ++ printk(KERN_DEBUG "klips_debug:_3des_set_key:" ++ "ret=%d key_e=%p key=%p keysize=%ld\n", ++ ret, key_e, key, (unsigned long int) keysize); ++ return ret; ++} ++ ++static int _3des_cbc_encrypt(struct ipsec_alg_enc *alg, ++ __u8 * key_e, ++ __u8 * in, ++ int ilen, const __u8 * iv, ++ int encrypt) ++{ ++ TripleDES_context *ctx=(TripleDES_context*)key_e; ++ des_cblock miv; ++ ++ memcpy(&miv, iv, sizeof(miv)); ++ ++ if (debug_3des > 0) ++ printk(KERN_DEBUG "klips_debug:_aes_cbc_encrypt:" ++ "key_e=%p in=%p ilen=%d iv=%p encrypt=%d\n", ++ key_e, in, ilen, iv, encrypt); ++ ++ des_ede3_cbc_encrypt((des_cblock *)in, ++ (des_cblock *)in, ++ ilen, ++ ctx->s1, ++ ctx->s2, ++ ctx->s3, ++ &miv, encrypt); ++ return 1; ++} ++ ++static struct ipsec_alg_enc ipsec_alg_3DES = { ++ ixt_common: { ixt_version: IPSEC_ALG_VERSION, ++ ixt_refcnt: ATOMIC_INIT(0), ++ ixt_name: "3des", ++ ixt_blocksize: ESP_3DES_CBC_BLK_LEN, ++ ixt_support: { ++ ias_exttype: IPSEC_ALG_TYPE_ENCRYPT, ++ ias_id: ESP_3DES, ++ ias_keyminbits: ESP_3DES_KEY_SZ*8, ++ ias_keymaxbits: ESP_3DES_KEY_SZ*8, ++ }, ++ }, ++#if defined(MODULE_KLIPS_ENC_3DES_MODULE) ++ ixt_module: THIS_MODULE, ++#endif ++ ixt_e_keylen: ESP_3DES_KEY_SZ*8, ++ ixt_e_ctx_size: sizeof(TripleDES_context), ++ ixt_e_set_key: _3des_set_key, ++ ixt_e_cbc_encrypt:_3des_cbc_encrypt, ++}; ++ ++#if defined(CONFIG_KLIPS_ENC_3DES_MODULE) ++IPSEC_ALG_MODULE_INIT_MOD( ipsec_3des_init ) ++#else ++IPSEC_ALG_MODULE_INIT_STATIC( ipsec_3des_init ) ++#endif ++{ ++ int ret, test_ret; ++ ++ if (excl_3des) ipsec_alg_3DES.ixt_common.ixt_state |= IPSEC_ALG_ST_EXCL; ++ ret=register_ipsec_alg_enc(&ipsec_alg_3DES); ++ printk("ipsec_3des_init(alg_type=%d alg_id=%d name=%s): ret=%d\n", ++ ipsec_alg_3DES.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_3DES.ixt_common.ixt_support.ias_id, ++ ipsec_alg_3DES.ixt_common.ixt_name, ++ ret); ++ if (ret==0 && test_3des) { ++ test_ret=ipsec_alg_test( ++ ipsec_alg_3DES.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_3DES.ixt_common.ixt_support.ias_id, ++ test_3des); ++ printk("ipsec_aes_init(alg_type=%d alg_id=%d): test_ret=%d\n", ++ ipsec_alg_3DES.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_3DES.ixt_common.ixt_support.ias_id, ++ test_ret); ++ } ++ return ret; ++} ++ ++#if defined(CONFIG_KLIPS_ENC_3DES_MODULE) ++IPSEC_ALG_MODULE_EXIT_MOD( ipsec_3des_fini ) ++#else ++IPSEC_ALG_MODULE_EXIT_STATIC( ipsec_3des_fini ) ++#endif ++{ ++ unregister_ipsec_alg_enc(&ipsec_alg_3DES); ++ return; ++} ++ ++/* Dual, because 3des code is 4-clause BSD licensed */ ++#ifdef MODULE_LICENSE ++MODULE_LICENSE("Dual BSD/GPL"); ++#endif ++ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/des/set_key.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,246 @@ ++/* crypto/des/set_key.c */ ++/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] ++ */ ++ ++/* set_key.c v 1.4 eay 24/9/91 ++ * 1.4 Speed up by 400% :-) ++ * 1.3 added register declarations. ++ * 1.2 unrolled make_key_sched a bit more ++ * 1.1 added norm_expand_bits ++ * 1.0 First working version ++ */ ++#include "des/des_locl.h" ++#include "des/podd.h" ++#include "des/sk.h" ++ ++#ifndef NOPROTO ++static int check_parity(des_cblock (*key)); ++#else ++static int check_parity(); ++#endif ++ ++int des_check_key=0; ++ ++void des_set_odd_parity(key) ++des_cblock (*key); ++ { ++ int i; ++ ++ for (i=0; i>(n))^(b))&(m)),\ ++ * (b)^=(t),\ ++ * (a)=((a)^((t)<<(n)))) ++ */ ++ ++#define HPERM_OP(a,t,n,m) ((t)=((((a)<<(16-(n)))^(a))&(m)),\ ++ (a)=(a)^(t)^(t>>(16-(n)))) ++ ++/* return 0 if key parity is odd (correct), ++ * return -1 if key parity error, ++ * return -2 if illegal weak key. ++ */ ++int des_set_key(key, schedule) ++des_cblock (*key); ++des_key_schedule schedule; ++ { ++ static int shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0}; ++ register DES_LONG c,d,t,s,t2; ++ register unsigned char *in; ++ register DES_LONG *k; ++ register int i; ++ ++ if (des_check_key) ++ { ++ if (!check_parity(key)) ++ return(-1); ++ ++ if (des_is_weak_key(key)) ++ return(-2); ++ } ++ ++ k=(DES_LONG *)schedule; ++ in=(unsigned char *)key; ++ ++ c2l(in,c); ++ c2l(in,d); ++ ++ /* do PC1 in 60 simple operations */ ++/* PERM_OP(d,c,t,4,0x0f0f0f0fL); ++ HPERM_OP(c,t,-2, 0xcccc0000L); ++ HPERM_OP(c,t,-1, 0xaaaa0000L); ++ HPERM_OP(c,t, 8, 0x00ff0000L); ++ HPERM_OP(c,t,-1, 0xaaaa0000L); ++ HPERM_OP(d,t,-8, 0xff000000L); ++ HPERM_OP(d,t, 8, 0x00ff0000L); ++ HPERM_OP(d,t, 2, 0x33330000L); ++ d=((d&0x00aa00aaL)<<7L)|((d&0x55005500L)>>7L)|(d&0xaa55aa55L); ++ d=(d>>8)|((c&0xf0000000L)>>4); ++ c&=0x0fffffffL; */ ++ ++ /* I now do it in 47 simple operations :-) ++ * Thanks to John Fletcher (john_fletcher@lccmail.ocf.llnl.gov) ++ * for the inspiration. :-) */ ++ PERM_OP (d,c,t,4,0x0f0f0f0fL); ++ HPERM_OP(c,t,-2,0xcccc0000L); ++ HPERM_OP(d,t,-2,0xcccc0000L); ++ PERM_OP (d,c,t,1,0x55555555L); ++ PERM_OP (c,d,t,8,0x00ff00ffL); ++ PERM_OP (d,c,t,1,0x55555555L); ++ d= (((d&0x000000ffL)<<16L)| (d&0x0000ff00L) | ++ ((d&0x00ff0000L)>>16L)|((c&0xf0000000L)>>4L)); ++ c&=0x0fffffffL; ++ ++ for (i=0; i>2L)|(c<<26L)); d=((d>>2L)|(d<<26L)); } ++ else ++ { c=((c>>1L)|(c<<27L)); d=((d>>1L)|(d<<27L)); } ++ c&=0x0fffffffL; ++ d&=0x0fffffffL; ++ /* could be a few less shifts but I am to lazy at this ++ * point in time to investigate */ ++ s= des_skb[0][ (c )&0x3f ]| ++ des_skb[1][((c>> 6)&0x03)|((c>> 7L)&0x3c)]| ++ des_skb[2][((c>>13)&0x0f)|((c>>14L)&0x30)]| ++ des_skb[3][((c>>20)&0x01)|((c>>21L)&0x06) | ++ ((c>>22L)&0x38)]; ++ t= des_skb[4][ (d )&0x3f ]| ++ des_skb[5][((d>> 7L)&0x03)|((d>> 8L)&0x3c)]| ++ des_skb[6][ (d>>15L)&0x3f ]| ++ des_skb[7][((d>>21L)&0x0f)|((d>>22L)&0x30)]; ++ ++ /* table contained 0213 4657 */ ++ t2=((t<<16L)|(s&0x0000ffffL))&0xffffffffL; ++ *(k++)=ROTATE(t2,30)&0xffffffffL; ++ ++ t2=((s>>16L)|(t&0xffff0000L)); ++ *(k++)=ROTATE(t2,26)&0xffffffffL; ++ } ++ return(0); ++ } ++ ++int des_key_sched(key, schedule) ++des_cblock (*key); ++des_key_schedule schedule; ++ { ++ return(des_set_key(key,schedule)); ++ } +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/goodmask.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,100 @@ ++/* ++ * minor utilities for subnet-mask manipulation ++ * Copyright (C) 1998, 1999 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: goodmask.c,v 1.12 2004-07-10 07:43:47 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++#ifndef ABITS ++#define ABITS 32 /* bits in an IPv4 address */ ++#endif ++ ++/* ++ - goodmask - is this a good (^1*0*$) subnet mask? ++ * You are not expected to understand this. See Henry S. Warren Jr, ++ * "Functions realizable with word-parallel logical and two's-complement ++ * addition instructions", CACM 20.6 (June 1977), p.439. ++ */ ++int /* predicate */ ++goodmask(mask) ++struct in_addr mask; ++{ ++ unsigned long x = ntohl(mask.s_addr); ++ /* clear rightmost contiguous string of 1-bits */ ++# define CRCS1B(x) (((x|(x-1))+1)&x) ++# define TOPBIT (1UL << 31) ++ ++ /* either zero, or has one string of 1-bits which is left-justified */ ++ if (x == 0 || (CRCS1B(x) == 0 && (x&TOPBIT))) ++ return 1; ++ return 0; ++} ++ ++/* ++ - masktobits - how many bits in this mask? ++ * The algorithm is essentially a binary search, but highly optimized ++ * for this particular task. ++ */ ++int /* -1 means !goodmask() */ ++masktobits(mask) ++struct in_addr mask; ++{ ++ unsigned long m = ntohl(mask.s_addr); ++ int masklen; ++ ++ if (!goodmask(mask)) ++ return -1; ++ ++ if (m&0x00000001UL) ++ return 32; ++ masklen = 0; ++ if (m&(0x0000ffffUL<<1)) { /* <<1 for 1-origin numbering */ ++ masklen |= 0x10; ++ m <<= 16; ++ } ++ if (m&(0x00ff0000UL<<1)) { ++ masklen |= 0x08; ++ m <<= 8; ++ } ++ if (m&(0x0f000000UL<<1)) { ++ masklen |= 0x04; ++ m <<= 4; ++ } ++ if (m&(0x30000000UL<<1)) { ++ masklen |= 0x02; ++ m <<= 2; ++ } ++ if (m&(0x40000000UL<<1)) ++ masklen |= 0x01; ++ ++ return masklen; ++} ++ ++/* ++ - bitstomask - return a mask with this many high bits on ++ */ ++struct in_addr ++bitstomask(n) ++int n; ++{ ++ struct in_addr result; ++ ++ if (n > 0 && n <= ABITS) ++ result.s_addr = htonl(~((1UL << (ABITS - n)) - 1)); ++ else if (n == 0) ++ result.s_addr = 0; ++ else ++ result.s_addr = 0; /* best error report we can do */ ++ return result; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/infblock.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,403 @@ ++/* infblock.c -- interpret and process block types to last block ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "infblock.h" ++#include "inftrees.h" ++#include "infcodes.h" ++#include "infutil.h" ++ ++struct inflate_codes_state {int dummy;}; /* for buggy compilers */ ++ ++/* simplify the use of the inflate_huft type with some defines */ ++#define exop word.what.Exop ++#define bits word.what.Bits ++ ++/* Table for deflate from PKZIP's appnote.txt. */ ++local const uInt border[] = { /* Order of the bit length code lengths */ ++ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; ++ ++/* ++ Notes beyond the 1.93a appnote.txt: ++ ++ 1. Distance pointers never point before the beginning of the output ++ stream. ++ 2. Distance pointers can point back across blocks, up to 32k away. ++ 3. There is an implied maximum of 7 bits for the bit length table and ++ 15 bits for the actual data. ++ 4. If only one code exists, then it is encoded using one bit. (Zero ++ would be more efficient, but perhaps a little confusing.) If two ++ codes exist, they are coded using one bit each (0 and 1). ++ 5. There is no way of sending zero distance codes--a dummy must be ++ sent if there are none. (History: a pre 2.0 version of PKZIP would ++ store blocks with no distance codes, but this was discovered to be ++ too harsh a criterion.) Valid only for 1.93a. 2.04c does allow ++ zero distance codes, which is sent as one code of zero bits in ++ length. ++ 6. There are up to 286 literal/length codes. Code 256 represents the ++ end-of-block. Note however that the static length tree defines ++ 288 codes just to fill out the Huffman codes. Codes 286 and 287 ++ cannot be used though, since there is no length base or extra bits ++ defined for them. Similarily, there are up to 30 distance codes. ++ However, static trees define 32 codes (all 5 bits) to fill out the ++ Huffman codes, but the last two had better not show up in the data. ++ 7. Unzip can check dynamic Huffman blocks for complete code sets. ++ The exception is that a single code would not be complete (see #4). ++ 8. The five bits following the block type is really the number of ++ literal codes sent minus 257. ++ 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits ++ (1+6+6). Therefore, to output three times the length, you output ++ three codes (1+1+1), whereas to output four times the same length, ++ you only need two codes (1+3). Hmm. ++ 10. In the tree reconstruction algorithm, Code = Code + Increment ++ only if BitLength(i) is not zero. (Pretty obvious.) ++ 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) ++ 12. Note: length code 284 can represent 227-258, but length code 285 ++ really is 258. The last length deserves its own, short code ++ since it gets used a lot in very redundant files. The length ++ 258 is special since 258 - 3 (the min match length) is 255. ++ 13. The literal/length and distance code bit lengths are read as a ++ single stream of lengths. It is possible (and advantageous) for ++ a repeat code (16, 17, or 18) to go across the boundary between ++ the two sets of lengths. ++ */ ++ ++ ++void inflate_blocks_reset(s, z, c) ++inflate_blocks_statef *s; ++z_streamp z; ++uLongf *c; ++{ ++ if (c != Z_NULL) ++ *c = s->check; ++ if (s->mode == BTREE || s->mode == DTREE) ++ ZFREE(z, s->sub.trees.blens); ++ if (s->mode == CODES) ++ inflate_codes_free(s->sub.decode.codes, z); ++ s->mode = TYPE; ++ s->bitk = 0; ++ s->bitb = 0; ++ s->read = s->write = s->window; ++ if (s->checkfn != Z_NULL) ++ z->adler = s->check = (*s->checkfn)(0L, (const Bytef *)Z_NULL, 0); ++ Tracev((stderr, "inflate: blocks reset\n")); ++} ++ ++ ++inflate_blocks_statef *inflate_blocks_new(z, c, w) ++z_streamp z; ++check_func c; ++uInt w; ++{ ++ inflate_blocks_statef *s; ++ ++ if ((s = (inflate_blocks_statef *)ZALLOC ++ (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) ++ return s; ++ if ((s->hufts = ++ (inflate_huft *)ZALLOC(z, sizeof(inflate_huft), MANY)) == Z_NULL) ++ { ++ ZFREE(z, s); ++ return Z_NULL; ++ } ++ if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) ++ { ++ ZFREE(z, s->hufts); ++ ZFREE(z, s); ++ return Z_NULL; ++ } ++ s->end = s->window + w; ++ s->checkfn = c; ++ s->mode = TYPE; ++ Tracev((stderr, "inflate: blocks allocated\n")); ++ inflate_blocks_reset(s, z, Z_NULL); ++ return s; ++} ++ ++ ++int inflate_blocks(s, z, r) ++inflate_blocks_statef *s; ++z_streamp z; ++int r; ++{ ++ uInt t; /* temporary storage */ ++ uLong b; /* bit buffer */ ++ uInt k; /* bits in bit buffer */ ++ Bytef *p; /* input data pointer */ ++ uInt n; /* bytes available there */ ++ Bytef *q; /* output window write pointer */ ++ uInt m; /* bytes to end of window or read pointer */ ++ ++ /* copy input/output information to locals (UPDATE macro restores) */ ++ LOAD ++ ++ /* process input based on current state */ ++ while (1) switch (s->mode) ++ { ++ case TYPE: ++ NEEDBITS(3) ++ t = (uInt)b & 7; ++ s->last = t & 1; ++ switch (t >> 1) ++ { ++ case 0: /* stored */ ++ Tracev((stderr, "inflate: stored block%s\n", ++ s->last ? " (last)" : "")); ++ DUMPBITS(3) ++ t = k & 7; /* go to byte boundary */ ++ DUMPBITS(t) ++ s->mode = LENS; /* get length of stored block */ ++ break; ++ case 1: /* fixed */ ++ Tracev((stderr, "inflate: fixed codes block%s\n", ++ s->last ? " (last)" : "")); ++ { ++ uInt bl, bd; ++ inflate_huft *tl, *td; ++ ++ inflate_trees_fixed(&bl, &bd, &tl, &td, z); ++ s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); ++ if (s->sub.decode.codes == Z_NULL) ++ { ++ r = Z_MEM_ERROR; ++ LEAVE ++ } ++ } ++ DUMPBITS(3) ++ s->mode = CODES; ++ break; ++ case 2: /* dynamic */ ++ Tracev((stderr, "inflate: dynamic codes block%s\n", ++ s->last ? " (last)" : "")); ++ DUMPBITS(3) ++ s->mode = TABLE; ++ break; ++ case 3: /* illegal */ ++ DUMPBITS(3) ++ s->mode = BAD; ++ z->msg = (char*)"invalid block type"; ++ r = Z_DATA_ERROR; ++ LEAVE ++ } ++ break; ++ case LENS: ++ NEEDBITS(32) ++ if ((((~b) >> 16) & 0xffff) != (b & 0xffff)) ++ { ++ s->mode = BAD; ++ z->msg = (char*)"invalid stored block lengths"; ++ r = Z_DATA_ERROR; ++ LEAVE ++ } ++ s->sub.left = (uInt)b & 0xffff; ++ b = k = 0; /* dump bits */ ++ Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); ++ s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE); ++ break; ++ case STORED: ++ if (n == 0) ++ LEAVE ++ NEEDOUT ++ t = s->sub.left; ++ if (t > n) t = n; ++ if (t > m) t = m; ++ zmemcpy(q, p, t); ++ p += t; n -= t; ++ q += t; m -= t; ++ if ((s->sub.left -= t) != 0) ++ break; ++ Tracev((stderr, "inflate: stored end, %lu total out\n", ++ z->total_out + (q >= s->read ? q - s->read : ++ (s->end - s->read) + (q - s->window)))); ++ s->mode = s->last ? DRY : TYPE; ++ break; ++ case TABLE: ++ NEEDBITS(14) ++ s->sub.trees.table = t = (uInt)b & 0x3fff; ++#ifndef PKZIP_BUG_WORKAROUND ++ if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) ++ { ++ s->mode = BAD; ++ z->msg = (char*)"too many length or distance symbols"; ++ r = Z_DATA_ERROR; ++ LEAVE ++ } ++#endif ++ t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); ++ if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) ++ { ++ r = Z_MEM_ERROR; ++ LEAVE ++ } ++ DUMPBITS(14) ++ s->sub.trees.index = 0; ++ Tracev((stderr, "inflate: table sizes ok\n")); ++ s->mode = BTREE; ++ case BTREE: ++ while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) ++ { ++ NEEDBITS(3) ++ s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; ++ DUMPBITS(3) ++ } ++ while (s->sub.trees.index < 19) ++ s->sub.trees.blens[border[s->sub.trees.index++]] = 0; ++ s->sub.trees.bb = 7; ++ t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, ++ &s->sub.trees.tb, s->hufts, z); ++ if (t != Z_OK) ++ { ++ r = t; ++ if (r == Z_DATA_ERROR) ++ { ++ ZFREE(z, s->sub.trees.blens); ++ s->mode = BAD; ++ } ++ LEAVE ++ } ++ s->sub.trees.index = 0; ++ Tracev((stderr, "inflate: bits tree ok\n")); ++ s->mode = DTREE; ++ case DTREE: ++ while (t = s->sub.trees.table, ++ s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) ++ { ++ inflate_huft *h; ++ uInt i, j, c; ++ ++ t = s->sub.trees.bb; ++ NEEDBITS(t) ++ h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); ++ t = h->bits; ++ c = h->base; ++ if (c < 16) ++ { ++ DUMPBITS(t) ++ s->sub.trees.blens[s->sub.trees.index++] = c; ++ } ++ else /* c == 16..18 */ ++ { ++ i = c == 18 ? 7 : c - 14; ++ j = c == 18 ? 11 : 3; ++ NEEDBITS(t + i) ++ DUMPBITS(t) ++ j += (uInt)b & inflate_mask[i]; ++ DUMPBITS(i) ++ i = s->sub.trees.index; ++ t = s->sub.trees.table; ++ if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || ++ (c == 16 && i < 1)) ++ { ++ ZFREE(z, s->sub.trees.blens); ++ s->mode = BAD; ++ z->msg = (char*)"invalid bit length repeat"; ++ r = Z_DATA_ERROR; ++ LEAVE ++ } ++ c = c == 16 ? s->sub.trees.blens[i - 1] : 0; ++ do { ++ s->sub.trees.blens[i++] = c; ++ } while (--j); ++ s->sub.trees.index = i; ++ } ++ } ++ s->sub.trees.tb = Z_NULL; ++ { ++ uInt bl, bd; ++ inflate_huft *tl, *td; ++ inflate_codes_statef *c; ++ ++ bl = 9; /* must be <= 9 for lookahead assumptions */ ++ bd = 6; /* must be <= 9 for lookahead assumptions */ ++ t = s->sub.trees.table; ++ t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), ++ s->sub.trees.blens, &bl, &bd, &tl, &td, ++ s->hufts, z); ++ if (t != Z_OK) ++ { ++ if (t == (uInt)Z_DATA_ERROR) ++ { ++ ZFREE(z, s->sub.trees.blens); ++ s->mode = BAD; ++ } ++ r = t; ++ LEAVE ++ } ++ Tracev((stderr, "inflate: trees ok\n")); ++ if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) ++ { ++ r = Z_MEM_ERROR; ++ LEAVE ++ } ++ s->sub.decode.codes = c; ++ } ++ ZFREE(z, s->sub.trees.blens); ++ s->mode = CODES; ++ case CODES: ++ UPDATE ++ if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) ++ return inflate_flush(s, z, r); ++ r = Z_OK; ++ inflate_codes_free(s->sub.decode.codes, z); ++ LOAD ++ Tracev((stderr, "inflate: codes end, %lu total out\n", ++ z->total_out + (q >= s->read ? q - s->read : ++ (s->end - s->read) + (q - s->window)))); ++ if (!s->last) ++ { ++ s->mode = TYPE; ++ break; ++ } ++ s->mode = DRY; ++ case DRY: ++ FLUSH ++ if (s->read != s->write) ++ LEAVE ++ s->mode = DONE; ++ case DONE: ++ r = Z_STREAM_END; ++ LEAVE ++ case BAD: ++ r = Z_DATA_ERROR; ++ LEAVE ++ default: ++ r = Z_STREAM_ERROR; ++ LEAVE ++ } ++} ++ ++ ++int inflate_blocks_free(s, z) ++inflate_blocks_statef *s; ++z_streamp z; ++{ ++ inflate_blocks_reset(s, z, Z_NULL); ++ ZFREE(z, s->window); ++ ZFREE(z, s->hufts); ++ ZFREE(z, s); ++ Tracev((stderr, "inflate: blocks freed\n")); ++ return Z_OK; ++} ++ ++ ++void inflate_set_dictionary(s, d, n) ++inflate_blocks_statef *s; ++const Bytef *d; ++uInt n; ++{ ++ zmemcpy(s->window, d, n); ++ s->read = s->write = s->window + n; ++} ++ ++ ++/* Returns true if inflate is currently at the end of a block generated ++ * by Z_SYNC_FLUSH or Z_FULL_FLUSH. ++ * IN assertion: s != Z_NULL ++ */ ++int inflate_blocks_sync_point(s) ++inflate_blocks_statef *s; ++{ ++ return s->mode == LENS; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/infblock.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,39 @@ ++/* infblock.h -- header to use infblock.c ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* WARNING: this file should *not* be used by applications. It is ++ part of the implementation of the compression library and is ++ subject to change. Applications should only use zlib.h. ++ */ ++ ++struct inflate_blocks_state; ++typedef struct inflate_blocks_state FAR inflate_blocks_statef; ++ ++extern inflate_blocks_statef * inflate_blocks_new OF(( ++ z_streamp z, ++ check_func c, /* check function */ ++ uInt w)); /* window size */ ++ ++extern int inflate_blocks OF(( ++ inflate_blocks_statef *, ++ z_streamp , ++ int)); /* initial return code */ ++ ++extern void inflate_blocks_reset OF(( ++ inflate_blocks_statef *, ++ z_streamp , ++ uLongf *)); /* check value on output */ ++ ++extern int inflate_blocks_free OF(( ++ inflate_blocks_statef *, ++ z_streamp)); ++ ++extern void inflate_set_dictionary OF(( ++ inflate_blocks_statef *s, ++ const Bytef *d, /* dictionary */ ++ uInt n)); /* dictionary length */ ++ ++extern int inflate_blocks_sync_point OF(( ++ inflate_blocks_statef *s)); +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/infcodes.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,251 @@ ++/* infcodes.c -- process literals and length/distance pairs ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "inftrees.h" ++#include "infblock.h" ++#include "infcodes.h" ++#include "infutil.h" ++#include "inffast.h" ++ ++/* simplify the use of the inflate_huft type with some defines */ ++#define exop word.what.Exop ++#define bits word.what.Bits ++ ++typedef enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ ++ START, /* x: set up for LEN */ ++ LEN, /* i: get length/literal/eob next */ ++ LENEXT, /* i: getting length extra (have base) */ ++ DIST, /* i: get distance next */ ++ DISTEXT, /* i: getting distance extra */ ++ COPY, /* o: copying bytes in window, waiting for space */ ++ LIT, /* o: got literal, waiting for output space */ ++ WASH, /* o: got eob, possibly still output waiting */ ++ END, /* x: got eob and all data flushed */ ++ BADCODE} /* x: got error */ ++inflate_codes_mode; ++ ++/* inflate codes private state */ ++struct inflate_codes_state { ++ ++ /* mode */ ++ inflate_codes_mode mode; /* current inflate_codes mode */ ++ ++ /* mode dependent information */ ++ uInt len; ++ union { ++ struct { ++ inflate_huft *tree; /* pointer into tree */ ++ uInt need; /* bits needed */ ++ } code; /* if LEN or DIST, where in tree */ ++ uInt lit; /* if LIT, literal */ ++ struct { ++ uInt get; /* bits to get for extra */ ++ uInt dist; /* distance back to copy from */ ++ } copy; /* if EXT or COPY, where and how much */ ++ } sub; /* submode */ ++ ++ /* mode independent information */ ++ Byte lbits; /* ltree bits decoded per branch */ ++ Byte dbits; /* dtree bits decoder per branch */ ++ inflate_huft *ltree; /* literal/length/eob tree */ ++ inflate_huft *dtree; /* distance tree */ ++ ++}; ++ ++ ++inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z) ++uInt bl, bd; ++inflate_huft *tl; ++inflate_huft *td; /* need separate declaration for Borland C++ */ ++z_streamp z; ++{ ++ inflate_codes_statef *c; ++ ++ if ((c = (inflate_codes_statef *) ++ ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) ++ { ++ c->mode = START; ++ c->lbits = (Byte)bl; ++ c->dbits = (Byte)bd; ++ c->ltree = tl; ++ c->dtree = td; ++ Tracev((stderr, "inflate: codes new\n")); ++ } ++ return c; ++} ++ ++ ++int inflate_codes(s, z, r) ++inflate_blocks_statef *s; ++z_streamp z; ++int r; ++{ ++ uInt j; /* temporary storage */ ++ inflate_huft *t; /* temporary pointer */ ++ uInt e; /* extra bits or operation */ ++ uLong b; /* bit buffer */ ++ uInt k; /* bits in bit buffer */ ++ Bytef *p; /* input data pointer */ ++ uInt n; /* bytes available there */ ++ Bytef *q; /* output window write pointer */ ++ uInt m; /* bytes to end of window or read pointer */ ++ Bytef *f; /* pointer to copy strings from */ ++ inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ ++ ++ /* copy input/output information to locals (UPDATE macro restores) */ ++ LOAD ++ ++ /* process input and output based on current state */ ++ while (1) switch (c->mode) ++ { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ ++ case START: /* x: set up for LEN */ ++#ifndef SLOW ++ if (m >= 258 && n >= 10) ++ { ++ UPDATE ++ r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); ++ LOAD ++ if (r != Z_OK) ++ { ++ c->mode = r == Z_STREAM_END ? WASH : BADCODE; ++ break; ++ } ++ } ++#endif /* !SLOW */ ++ c->sub.code.need = c->lbits; ++ c->sub.code.tree = c->ltree; ++ c->mode = LEN; ++ case LEN: /* i: get length/literal/eob next */ ++ j = c->sub.code.need; ++ NEEDBITS(j) ++ t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); ++ DUMPBITS(t->bits) ++ e = (uInt)(t->exop); ++ if (e == 0) /* literal */ ++ { ++ c->sub.lit = t->base; ++ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? ++ "inflate: literal '%c'\n" : ++ "inflate: literal 0x%02x\n", t->base)); ++ c->mode = LIT; ++ break; ++ } ++ if (e & 16) /* length */ ++ { ++ c->sub.copy.get = e & 15; ++ c->len = t->base; ++ c->mode = LENEXT; ++ break; ++ } ++ if ((e & 64) == 0) /* next table */ ++ { ++ c->sub.code.need = e; ++ c->sub.code.tree = t + t->base; ++ break; ++ } ++ if (e & 32) /* end of block */ ++ { ++ Tracevv((stderr, "inflate: end of block\n")); ++ c->mode = WASH; ++ break; ++ } ++ c->mode = BADCODE; /* invalid code */ ++ z->msg = (char*)"invalid literal/length code"; ++ r = Z_DATA_ERROR; ++ LEAVE ++ case LENEXT: /* i: getting length extra (have base) */ ++ j = c->sub.copy.get; ++ NEEDBITS(j) ++ c->len += (uInt)b & inflate_mask[j]; ++ DUMPBITS(j) ++ c->sub.code.need = c->dbits; ++ c->sub.code.tree = c->dtree; ++ Tracevv((stderr, "inflate: length %u\n", c->len)); ++ c->mode = DIST; ++ case DIST: /* i: get distance next */ ++ j = c->sub.code.need; ++ NEEDBITS(j) ++ t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); ++ DUMPBITS(t->bits) ++ e = (uInt)(t->exop); ++ if (e & 16) /* distance */ ++ { ++ c->sub.copy.get = e & 15; ++ c->sub.copy.dist = t->base; ++ c->mode = DISTEXT; ++ break; ++ } ++ if ((e & 64) == 0) /* next table */ ++ { ++ c->sub.code.need = e; ++ c->sub.code.tree = t + t->base; ++ break; ++ } ++ c->mode = BADCODE; /* invalid code */ ++ z->msg = (char*)"invalid distance code"; ++ r = Z_DATA_ERROR; ++ LEAVE ++ case DISTEXT: /* i: getting distance extra */ ++ j = c->sub.copy.get; ++ NEEDBITS(j) ++ c->sub.copy.dist += (uInt)b & inflate_mask[j]; ++ DUMPBITS(j) ++ Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); ++ c->mode = COPY; ++ case COPY: /* o: copying bytes in window, waiting for space */ ++ f = q - c->sub.copy.dist; ++ while (f < s->window) /* modulo window size-"while" instead */ ++ f += s->end - s->window; /* of "if" handles invalid distances */ ++ while (c->len) ++ { ++ NEEDOUT ++ OUTBYTE(*f++) ++ if (f == s->end) ++ f = s->window; ++ c->len--; ++ } ++ c->mode = START; ++ break; ++ case LIT: /* o: got literal, waiting for output space */ ++ NEEDOUT ++ OUTBYTE(c->sub.lit) ++ c->mode = START; ++ break; ++ case WASH: /* o: got eob, possibly more output */ ++ if (k > 7) /* return unused byte, if any */ ++ { ++ Assert(k < 16, "inflate_codes grabbed too many bytes") ++ k -= 8; ++ n++; ++ p--; /* can always return one */ ++ } ++ FLUSH ++ if (s->read != s->write) ++ LEAVE ++ c->mode = END; ++ case END: ++ r = Z_STREAM_END; ++ LEAVE ++ case BADCODE: /* x: got error */ ++ r = Z_DATA_ERROR; ++ LEAVE ++ default: ++ r = Z_STREAM_ERROR; ++ LEAVE ++ } ++#ifdef NEED_DUMMY_RETURN ++ return Z_STREAM_ERROR; /* Some dumb compilers complain without this */ ++#endif ++} ++ ++ ++void inflate_codes_free(c, z) ++inflate_codes_statef *c; ++z_streamp z; ++{ ++ ZFREE(z, c); ++ Tracev((stderr, "inflate: codes free\n")); ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/infcodes.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,31 @@ ++/* infcodes.h -- header to use infcodes.c ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* WARNING: this file should *not* be used by applications. It is ++ part of the implementation of the compression library and is ++ subject to change. Applications should only use zlib.h. ++ */ ++ ++#ifndef _INFCODES_H ++#define _INFCODES_H ++ ++struct inflate_codes_state; ++typedef struct inflate_codes_state FAR inflate_codes_statef; ++ ++extern inflate_codes_statef *inflate_codes_new OF(( ++ uInt, uInt, ++ inflate_huft *, inflate_huft *, ++ z_streamp )); ++ ++extern int inflate_codes OF(( ++ inflate_blocks_statef *, ++ z_streamp , ++ int)); ++ ++extern void inflate_codes_free OF(( ++ inflate_codes_statef *, ++ z_streamp )); ++ ++#endif /* _INFCODES_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/inffast.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,183 @@ ++/* inffast.c -- process literals and length/distance pairs fast ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "inftrees.h" ++#include "infblock.h" ++#include "infcodes.h" ++#include "infutil.h" ++#include "inffast.h" ++ ++struct inflate_codes_state {int dummy;}; /* for buggy compilers */ ++ ++/* simplify the use of the inflate_huft type with some defines */ ++#define exop word.what.Exop ++#define bits word.what.Bits ++ ++/* macros for bit input with no checking and for returning unused bytes */ ++#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<avail_in-n;c=(k>>3)>3:c;n+=c;p-=c;k-=c<<3;} ++ ++/* Called with number of bytes left to write in window at least 258 ++ (the maximum string length) and number of input bytes available ++ at least ten. The ten bytes are six bytes for the longest length/ ++ distance pair plus four bytes for overloading the bit buffer. */ ++ ++int inflate_fast(bl, bd, tl, td, s, z) ++uInt bl, bd; ++inflate_huft *tl; ++inflate_huft *td; /* need separate declaration for Borland C++ */ ++inflate_blocks_statef *s; ++z_streamp z; ++{ ++ inflate_huft *t; /* temporary pointer */ ++ uInt e; /* extra bits or operation */ ++ uLong b; /* bit buffer */ ++ uInt k; /* bits in bit buffer */ ++ Bytef *p; /* input data pointer */ ++ uInt n; /* bytes available there */ ++ Bytef *q; /* output window write pointer */ ++ uInt m; /* bytes to end of window or read pointer */ ++ uInt ml; /* mask for literal/length tree */ ++ uInt md; /* mask for distance tree */ ++ uInt c; /* bytes to copy */ ++ uInt d; /* distance back to copy from */ ++ Bytef *r; /* copy source pointer */ ++ ++ /* load input, output, bit values */ ++ LOAD ++ ++ /* initialize masks */ ++ ml = inflate_mask[bl]; ++ md = inflate_mask[bd]; ++ ++ /* do until not enough input or output space for fast loop */ ++ do { /* assume called with m >= 258 && n >= 10 */ ++ /* get literal/length code */ ++ GRABBITS(20) /* max bits for literal/length code */ ++ if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) ++ { ++ DUMPBITS(t->bits) ++ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? ++ "inflate: * literal '%c'\n" : ++ "inflate: * literal 0x%02x\n", t->base)); ++ *q++ = (Byte)t->base; ++ m--; ++ continue; ++ } ++ do { ++ DUMPBITS(t->bits) ++ if (e & 16) ++ { ++ /* get extra bits for length */ ++ e &= 15; ++ c = t->base + ((uInt)b & inflate_mask[e]); ++ DUMPBITS(e) ++ Tracevv((stderr, "inflate: * length %u\n", c)); ++ ++ /* decode distance base of block to copy */ ++ GRABBITS(15); /* max bits for distance code */ ++ e = (t = td + ((uInt)b & md))->exop; ++ do { ++ DUMPBITS(t->bits) ++ if (e & 16) ++ { ++ /* get extra bits to add to distance base */ ++ e &= 15; ++ GRABBITS(e) /* get extra bits (up to 13) */ ++ d = t->base + ((uInt)b & inflate_mask[e]); ++ DUMPBITS(e) ++ Tracevv((stderr, "inflate: * distance %u\n", d)); ++ ++ /* do the copy */ ++ m -= c; ++ r = q - d; ++ if (r < s->window) /* wrap if needed */ ++ { ++ do { ++ r += s->end - s->window; /* force pointer in window */ ++ } while (r < s->window); /* covers invalid distances */ ++ e = s->end - r; ++ if (c > e) ++ { ++ c -= e; /* wrapped copy */ ++ do { ++ *q++ = *r++; ++ } while (--e); ++ r = s->window; ++ do { ++ *q++ = *r++; ++ } while (--c); ++ } ++ else /* normal copy */ ++ { ++ *q++ = *r++; c--; ++ *q++ = *r++; c--; ++ do { ++ *q++ = *r++; ++ } while (--c); ++ } ++ } ++ else /* normal copy */ ++ { ++ *q++ = *r++; c--; ++ *q++ = *r++; c--; ++ do { ++ *q++ = *r++; ++ } while (--c); ++ } ++ break; ++ } ++ else if ((e & 64) == 0) ++ { ++ t += t->base; ++ e = (t += ((uInt)b & inflate_mask[e]))->exop; ++ } ++ else ++ { ++ z->msg = (char*)"invalid distance code"; ++ UNGRAB ++ UPDATE ++ return Z_DATA_ERROR; ++ } ++ } while (1); ++ break; ++ } ++ if ((e & 64) == 0) ++ { ++ t += t->base; ++ if ((e = (t += ((uInt)b & inflate_mask[e]))->exop) == 0) ++ { ++ DUMPBITS(t->bits) ++ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? ++ "inflate: * literal '%c'\n" : ++ "inflate: * literal 0x%02x\n", t->base)); ++ *q++ = (Byte)t->base; ++ m--; ++ break; ++ } ++ } ++ else if (e & 32) ++ { ++ Tracevv((stderr, "inflate: * end of block\n")); ++ UNGRAB ++ UPDATE ++ return Z_STREAM_END; ++ } ++ else ++ { ++ z->msg = (char*)"invalid literal/length code"; ++ UNGRAB ++ UPDATE ++ return Z_DATA_ERROR; ++ } ++ } while (1); ++ } while (m >= 258 && n >= 10); ++ ++ /* not enough input or output--restore pointers and return */ ++ UNGRAB ++ UPDATE ++ return Z_OK; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/inffast.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,22 @@ ++/* inffast.h -- header to use inffast.c ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* WARNING: this file should *not* be used by applications. It is ++ part of the implementation of the compression library and is ++ subject to change. Applications should only use zlib.h. ++ */ ++ ++#ifndef _INFFAST_H ++#define _INFFAST_H ++ ++extern int inflate_fast OF(( ++ uInt, ++ uInt, ++ inflate_huft *, ++ inflate_huft *, ++ inflate_blocks_statef *, ++ z_streamp )); ++ ++#endif /* _INFFAST_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/inffixed.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,151 @@ ++/* inffixed.h -- table for decoding fixed codes ++ * Generated automatically by the maketree.c program ++ */ ++ ++/* WARNING: this file should *not* be used by applications. It is ++ part of the implementation of the compression library and is ++ subject to change. Applications should only use zlib.h. ++ */ ++ ++local uInt fixed_bl = 9; ++local uInt fixed_bd = 5; ++local inflate_huft fixed_tl[] = { ++ {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115}, ++ {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},192}, ++ {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},160}, ++ {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},224}, ++ {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},144}, ++ {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},208}, ++ {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},176}, ++ {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},240}, ++ {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227}, ++ {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},200}, ++ {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},168}, ++ {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},232}, ++ {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},152}, ++ {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},216}, ++ {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},184}, ++ {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},248}, ++ {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163}, ++ {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},196}, ++ {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},164}, ++ {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},228}, ++ {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},148}, ++ {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},212}, ++ {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},180}, ++ {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},244}, ++ {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0}, ++ {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},204}, ++ {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},172}, ++ {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},236}, ++ {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},156}, ++ {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},220}, ++ {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},188}, ++ {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},252}, ++ {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131}, ++ {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},194}, ++ {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},162}, ++ {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},226}, ++ {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},146}, ++ {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},210}, ++ {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},178}, ++ {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},242}, ++ {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258}, ++ {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},202}, ++ {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},170}, ++ {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},234}, ++ {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},154}, ++ {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},218}, ++ {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},186}, ++ {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},250}, ++ {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195}, ++ {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},198}, ++ {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},166}, ++ {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},230}, ++ {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},150}, ++ {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},214}, ++ {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},182}, ++ {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},246}, ++ {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0}, ++ {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},206}, ++ {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},174}, ++ {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},238}, ++ {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},158}, ++ {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},222}, ++ {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},190}, ++ {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},254}, ++ {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115}, ++ {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},193}, ++ {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},161}, ++ {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},225}, ++ {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},145}, ++ {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},209}, ++ {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},177}, ++ {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},241}, ++ {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227}, ++ {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},201}, ++ {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},169}, ++ {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},233}, ++ {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},153}, ++ {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},217}, ++ {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},185}, ++ {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},249}, ++ {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163}, ++ {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},197}, ++ {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},165}, ++ {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},229}, ++ {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},149}, ++ {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},213}, ++ {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},181}, ++ {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},245}, ++ {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0}, ++ {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},205}, ++ {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},173}, ++ {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},237}, ++ {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},157}, ++ {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},221}, ++ {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},189}, ++ {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},253}, ++ {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131}, ++ {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},195}, ++ {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},163}, ++ {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},227}, ++ {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},147}, ++ {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},211}, ++ {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},179}, ++ {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},243}, ++ {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258}, ++ {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},203}, ++ {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},171}, ++ {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},235}, ++ {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},155}, ++ {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},219}, ++ {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},187}, ++ {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},251}, ++ {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195}, ++ {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},199}, ++ {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},167}, ++ {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},231}, ++ {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},151}, ++ {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},215}, ++ {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},183}, ++ {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},247}, ++ {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0}, ++ {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},207}, ++ {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},175}, ++ {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},239}, ++ {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},159}, ++ {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},223}, ++ {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},191}, ++ {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},255} ++ }; ++local inflate_huft fixed_td[] = { ++ {{{80,5}},1}, {{{87,5}},257}, {{{83,5}},17}, {{{91,5}},4097}, ++ {{{81,5}},5}, {{{89,5}},1025}, {{{85,5}},65}, {{{93,5}},16385}, ++ {{{80,5}},3}, {{{88,5}},513}, {{{84,5}},33}, {{{92,5}},8193}, ++ {{{82,5}},9}, {{{90,5}},2049}, {{{86,5}},129}, {{{192,5}},24577}, ++ {{{80,5}},2}, {{{87,5}},385}, {{{83,5}},25}, {{{91,5}},6145}, ++ {{{81,5}},7}, {{{89,5}},1537}, {{{85,5}},97}, {{{93,5}},24577}, ++ {{{80,5}},4}, {{{88,5}},769}, {{{84,5}},49}, {{{92,5}},12289}, ++ {{{82,5}},13}, {{{90,5}},3073}, {{{86,5}},193}, {{{192,5}},24577} ++ }; +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/inflate.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,368 @@ ++/* inflate.c -- zlib interface to inflate modules ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "infblock.h" ++ ++struct inflate_blocks_state {int dummy;}; /* for buggy compilers */ ++ ++typedef enum { ++ METHOD, /* waiting for method byte */ ++ FLAG, /* waiting for flag byte */ ++ DICT4, /* four dictionary check bytes to go */ ++ DICT3, /* three dictionary check bytes to go */ ++ DICT2, /* two dictionary check bytes to go */ ++ DICT1, /* one dictionary check byte to go */ ++ DICT0, /* waiting for inflateSetDictionary */ ++ BLOCKS, /* decompressing blocks */ ++ CHECK4, /* four check bytes to go */ ++ CHECK3, /* three check bytes to go */ ++ CHECK2, /* two check bytes to go */ ++ CHECK1, /* one check byte to go */ ++ DONE, /* finished check, done */ ++ BAD} /* got an error--stay here */ ++inflate_mode; ++ ++/* inflate private state */ ++struct internal_state { ++ ++ /* mode */ ++ inflate_mode mode; /* current inflate mode */ ++ ++ /* mode dependent information */ ++ union { ++ uInt method; /* if FLAGS, method byte */ ++ struct { ++ uLong was; /* computed check value */ ++ uLong need; /* stream check value */ ++ } check; /* if CHECK, check values to compare */ ++ uInt marker; /* if BAD, inflateSync's marker bytes count */ ++ } sub; /* submode */ ++ ++ /* mode independent information */ ++ int nowrap; /* flag for no wrapper */ ++ uInt wbits; /* log2(window size) (8..15, defaults to 15) */ ++ inflate_blocks_statef ++ *blocks; /* current inflate_blocks state */ ++ ++}; ++ ++ ++int ZEXPORT inflateReset(z) ++z_streamp z; ++{ ++ if (z == Z_NULL || z->state == Z_NULL) ++ return Z_STREAM_ERROR; ++ z->total_in = z->total_out = 0; ++ z->msg = Z_NULL; ++ z->state->mode = z->state->nowrap ? BLOCKS : METHOD; ++ inflate_blocks_reset(z->state->blocks, z, Z_NULL); ++ Tracev((stderr, "inflate: reset\n")); ++ return Z_OK; ++} ++ ++ ++int ZEXPORT inflateEnd(z) ++z_streamp z; ++{ ++ if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) ++ return Z_STREAM_ERROR; ++ if (z->state->blocks != Z_NULL) ++ inflate_blocks_free(z->state->blocks, z); ++ ZFREE(z, z->state); ++ z->state = Z_NULL; ++ Tracev((stderr, "inflate: end\n")); ++ return Z_OK; ++} ++ ++ ++int ZEXPORT inflateInit2_(z, w, version, stream_size) ++z_streamp z; ++int w; ++const char *version; ++int stream_size; ++{ ++ if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || ++ stream_size != sizeof(z_stream)) ++ return Z_VERSION_ERROR; ++ ++ /* initialize state */ ++ if (z == Z_NULL) ++ return Z_STREAM_ERROR; ++ z->msg = Z_NULL; ++ if (z->zalloc == Z_NULL) ++ { ++ return Z_STREAM_ERROR; ++/* z->zalloc = zcalloc; ++ z->opaque = (voidpf)0; ++*/ ++ } ++ if (z->zfree == Z_NULL) return Z_STREAM_ERROR; /* z->zfree = zcfree; */ ++ if ((z->state = (struct internal_state FAR *) ++ ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL) ++ return Z_MEM_ERROR; ++ z->state->blocks = Z_NULL; ++ ++ /* handle undocumented nowrap option (no zlib header or check) */ ++ z->state->nowrap = 0; ++ if (w < 0) ++ { ++ w = - w; ++ z->state->nowrap = 1; ++ } ++ ++ /* set window size */ ++ if (w < 8 || w > 15) ++ { ++ inflateEnd(z); ++ return Z_STREAM_ERROR; ++ } ++ z->state->wbits = (uInt)w; ++ ++ /* create inflate_blocks state */ ++ if ((z->state->blocks = ++ inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, (uInt)1 << w)) ++ == Z_NULL) ++ { ++ inflateEnd(z); ++ return Z_MEM_ERROR; ++ } ++ Tracev((stderr, "inflate: allocated\n")); ++ ++ /* reset state */ ++ inflateReset(z); ++ return Z_OK; ++} ++ ++ ++int ZEXPORT inflateInit_(z, version, stream_size) ++z_streamp z; ++const char *version; ++int stream_size; ++{ ++ return inflateInit2_(z, DEF_WBITS, version, stream_size); ++} ++ ++ ++#define NEEDBYTE {if(z->avail_in==0)return r;r=f;} ++#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) ++ ++int ZEXPORT inflate(z, f) ++z_streamp z; ++int f; ++{ ++ int r; ++ uInt b; ++ ++ if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL) ++ return Z_STREAM_ERROR; ++ f = f == Z_FINISH ? Z_BUF_ERROR : Z_OK; ++ r = Z_BUF_ERROR; ++ while (1) switch (z->state->mode) ++ { ++ case METHOD: ++ NEEDBYTE ++ if (((z->state->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED) ++ { ++ z->state->mode = BAD; ++ z->msg = (char*)"unknown compression method"; ++ z->state->sub.marker = 5; /* can't try inflateSync */ ++ break; ++ } ++ if ((z->state->sub.method >> 4) + 8 > z->state->wbits) ++ { ++ z->state->mode = BAD; ++ z->msg = (char*)"invalid window size"; ++ z->state->sub.marker = 5; /* can't try inflateSync */ ++ break; ++ } ++ z->state->mode = FLAG; ++ case FLAG: ++ NEEDBYTE ++ b = NEXTBYTE; ++ if (((z->state->sub.method << 8) + b) % 31) ++ { ++ z->state->mode = BAD; ++ z->msg = (char*)"incorrect header check"; ++ z->state->sub.marker = 5; /* can't try inflateSync */ ++ break; ++ } ++ Tracev((stderr, "inflate: zlib header ok\n")); ++ if (!(b & PRESET_DICT)) ++ { ++ z->state->mode = BLOCKS; ++ break; ++ } ++ z->state->mode = DICT4; ++ case DICT4: ++ NEEDBYTE ++ z->state->sub.check.need = (uLong)NEXTBYTE << 24; ++ z->state->mode = DICT3; ++ case DICT3: ++ NEEDBYTE ++ z->state->sub.check.need += (uLong)NEXTBYTE << 16; ++ z->state->mode = DICT2; ++ case DICT2: ++ NEEDBYTE ++ z->state->sub.check.need += (uLong)NEXTBYTE << 8; ++ z->state->mode = DICT1; ++ case DICT1: ++ NEEDBYTE ++ z->state->sub.check.need += (uLong)NEXTBYTE; ++ z->adler = z->state->sub.check.need; ++ z->state->mode = DICT0; ++ return Z_NEED_DICT; ++ case DICT0: ++ z->state->mode = BAD; ++ z->msg = (char*)"need dictionary"; ++ z->state->sub.marker = 0; /* can try inflateSync */ ++ return Z_STREAM_ERROR; ++ case BLOCKS: ++ r = inflate_blocks(z->state->blocks, z, r); ++ if (r == Z_DATA_ERROR) ++ { ++ z->state->mode = BAD; ++ z->state->sub.marker = 0; /* can try inflateSync */ ++ break; ++ } ++ if (r == Z_OK) ++ r = f; ++ if (r != Z_STREAM_END) ++ return r; ++ r = f; ++ inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was); ++ if (z->state->nowrap) ++ { ++ z->state->mode = DONE; ++ break; ++ } ++ z->state->mode = CHECK4; ++ case CHECK4: ++ NEEDBYTE ++ z->state->sub.check.need = (uLong)NEXTBYTE << 24; ++ z->state->mode = CHECK3; ++ case CHECK3: ++ NEEDBYTE ++ z->state->sub.check.need += (uLong)NEXTBYTE << 16; ++ z->state->mode = CHECK2; ++ case CHECK2: ++ NEEDBYTE ++ z->state->sub.check.need += (uLong)NEXTBYTE << 8; ++ z->state->mode = CHECK1; ++ case CHECK1: ++ NEEDBYTE ++ z->state->sub.check.need += (uLong)NEXTBYTE; ++ ++ if (z->state->sub.check.was != z->state->sub.check.need) ++ { ++ z->state->mode = BAD; ++ z->msg = (char*)"incorrect data check"; ++ z->state->sub.marker = 5; /* can't try inflateSync */ ++ break; ++ } ++ Tracev((stderr, "inflate: zlib check ok\n")); ++ z->state->mode = DONE; ++ case DONE: ++ return Z_STREAM_END; ++ case BAD: ++ return Z_DATA_ERROR; ++ default: ++ return Z_STREAM_ERROR; ++ } ++#ifdef NEED_DUMMY_RETURN ++ return Z_STREAM_ERROR; /* Some dumb compilers complain without this */ ++#endif ++} ++ ++ ++int ZEXPORT inflateSetDictionary(z, dictionary, dictLength) ++z_streamp z; ++const Bytef *dictionary; ++uInt dictLength; ++{ ++ uInt length = dictLength; ++ ++ if (z == Z_NULL || z->state == Z_NULL || z->state->mode != DICT0) ++ return Z_STREAM_ERROR; ++ ++ if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR; ++ z->adler = 1L; ++ ++ if (length >= ((uInt)1<state->wbits)) ++ { ++ length = (1<state->wbits)-1; ++ dictionary += dictLength - length; ++ } ++ inflate_set_dictionary(z->state->blocks, dictionary, length); ++ z->state->mode = BLOCKS; ++ return Z_OK; ++} ++ ++ ++int ZEXPORT inflateSync(z) ++z_streamp z; ++{ ++ uInt n; /* number of bytes to look at */ ++ Bytef *p; /* pointer to bytes */ ++ uInt m; /* number of marker bytes found in a row */ ++ uLong r, w; /* temporaries to save total_in and total_out */ ++ ++ /* set up */ ++ if (z == Z_NULL || z->state == Z_NULL) ++ return Z_STREAM_ERROR; ++ if (z->state->mode != BAD) ++ { ++ z->state->mode = BAD; ++ z->state->sub.marker = 0; ++ } ++ if ((n = z->avail_in) == 0) ++ return Z_BUF_ERROR; ++ p = z->next_in; ++ m = z->state->sub.marker; ++ ++ /* search */ ++ while (n && m < 4) ++ { ++ static const Byte mark[4] = {0, 0, 0xff, 0xff}; ++ if (*p == mark[m]) ++ m++; ++ else if (*p) ++ m = 0; ++ else ++ m = 4 - m; ++ p++, n--; ++ } ++ ++ /* restore */ ++ z->total_in += p - z->next_in; ++ z->next_in = p; ++ z->avail_in = n; ++ z->state->sub.marker = m; ++ ++ /* return no joy or set up to restart on a new block */ ++ if (m != 4) ++ return Z_DATA_ERROR; ++ r = z->total_in; w = z->total_out; ++ inflateReset(z); ++ z->total_in = r; z->total_out = w; ++ z->state->mode = BLOCKS; ++ return Z_OK; ++} ++ ++ ++/* Returns true if inflate is currently at the end of a block generated ++ * by Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP ++ * implementation to provide an additional safety check. PPP uses Z_SYNC_FLUSH ++ * but removes the length bytes of the resulting empty stored block. When ++ * decompressing, PPP checks that at the end of input packet, inflate is ++ * waiting for these length bytes. ++ */ ++int ZEXPORT inflateSyncPoint(z) ++z_streamp z; ++{ ++ if (z == Z_NULL || z->state == Z_NULL || z->state->blocks == Z_NULL) ++ return Z_STREAM_ERROR; ++ return inflate_blocks_sync_point(z->state->blocks); ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/inftrees.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,454 @@ ++/* inftrees.c -- generate Huffman trees for efficient decoding ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "inftrees.h" ++ ++#if !defined(BUILDFIXED) && !defined(STDC) ++# define BUILDFIXED /* non ANSI compilers may not accept inffixed.h */ ++#endif ++ ++local const char inflate_copyright[] = ++ " inflate 1.1.4 Copyright 1995-2002 Mark Adler "; ++/* ++ If you use the zlib library in a product, an acknowledgment is welcome ++ in the documentation of your product. If for some reason you cannot ++ include such an acknowledgment, I would appreciate that you keep this ++ copyright string in the executable of your product. ++ */ ++struct internal_state {int dummy;}; /* for buggy compilers */ ++ ++/* simplify the use of the inflate_huft type with some defines */ ++#define exop word.what.Exop ++#define bits word.what.Bits ++ ++ ++local int huft_build OF(( ++ uIntf *, /* code lengths in bits */ ++ uInt, /* number of codes */ ++ uInt, /* number of "simple" codes */ ++ const uIntf *, /* list of base values for non-simple codes */ ++ const uIntf *, /* list of extra bits for non-simple codes */ ++ inflate_huft * FAR*,/* result: starting table */ ++ uIntf *, /* maximum lookup bits (returns actual) */ ++ inflate_huft *, /* space for trees */ ++ uInt *, /* hufts used in space */ ++ uIntf * )); /* space for values */ ++ ++/* Tables for deflate from PKZIP's appnote.txt. */ ++local const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */ ++ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, ++ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; ++ /* see note #13 above about 258 */ ++local const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */ ++ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, ++ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */ ++local const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */ ++ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, ++ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, ++ 8193, 12289, 16385, 24577}; ++local const uInt cpdext[30] = { /* Extra bits for distance codes */ ++ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, ++ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, ++ 12, 12, 13, 13}; ++ ++/* ++ Huffman code decoding is performed using a multi-level table lookup. ++ The fastest way to decode is to simply build a lookup table whose ++ size is determined by the longest code. However, the time it takes ++ to build this table can also be a factor if the data being decoded ++ is not very long. The most common codes are necessarily the ++ shortest codes, so those codes dominate the decoding time, and hence ++ the speed. The idea is you can have a shorter table that decodes the ++ shorter, more probable codes, and then point to subsidiary tables for ++ the longer codes. The time it costs to decode the longer codes is ++ then traded against the time it takes to make longer tables. ++ ++ This results of this trade are in the variables lbits and dbits ++ below. lbits is the number of bits the first level table for literal/ ++ length codes can decode in one step, and dbits is the same thing for ++ the distance codes. Subsequent tables are also less than or equal to ++ those sizes. These values may be adjusted either when all of the ++ codes are shorter than that, in which case the longest code length in ++ bits is used, or when the shortest code is *longer* than the requested ++ table size, in which case the length of the shortest code in bits is ++ used. ++ ++ There are two different values for the two tables, since they code a ++ different number of possibilities each. The literal/length table ++ codes 286 possible values, or in a flat code, a little over eight ++ bits. The distance table codes 30 possible values, or a little less ++ than five bits, flat. The optimum values for speed end up being ++ about one bit more than those, so lbits is 8+1 and dbits is 5+1. ++ The optimum values may differ though from machine to machine, and ++ possibly even between compilers. Your mileage may vary. ++ */ ++ ++ ++/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ ++#define BMAX 15 /* maximum bit length of any code */ ++ ++local int huft_build(b, n, s, d, e, t, m, hp, hn, v) ++uIntf *b; /* code lengths in bits (all assumed <= BMAX) */ ++uInt n; /* number of codes (assumed <= 288) */ ++uInt s; /* number of simple-valued codes (0..s-1) */ ++const uIntf *d; /* list of base values for non-simple codes */ ++const uIntf *e; /* list of extra bits for non-simple codes */ ++inflate_huft * FAR *t; /* result: starting table */ ++uIntf *m; /* maximum lookup bits, returns actual */ ++inflate_huft *hp; /* space for trees */ ++uInt *hn; /* hufts used in space */ ++uIntf *v; /* working area: values in order of bit length */ ++/* Given a list of code lengths and a maximum table size, make a set of ++ tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR ++ if the given code set is incomplete (the tables are still built in this ++ case), or Z_DATA_ERROR if the input is invalid. */ ++{ ++ ++ uInt a; /* counter for codes of length k */ ++ uInt c[BMAX+1]; /* bit length count table */ ++ uInt f; /* i repeats in table every f entries */ ++ int g; /* maximum code length */ ++ int h; /* table level */ ++ register uInt i; /* counter, current code */ ++ register uInt j; /* counter */ ++ register int k; /* number of bits in current code */ ++ int l; /* bits per table (returned in m) */ ++ uInt mask; /* (1 << w) - 1, to avoid cc -O bug on HP */ ++ register uIntf *p; /* pointer into c[], b[], or v[] */ ++ inflate_huft *q; /* points to current table */ ++ struct inflate_huft_s r; /* table entry for structure assignment */ ++ inflate_huft *u[BMAX]; /* table stack */ ++ register int w; /* bits before this table == (l * h) */ ++ uInt x[BMAX+1]; /* bit offsets, then code stack */ ++ uIntf *xp; /* pointer into x */ ++ int y; /* number of dummy codes added */ ++ uInt z; /* number of entries in current table */ ++ ++ ++ /* Generate counts for each bit length */ ++ p = c; ++#define C0 *p++ = 0; ++#define C2 C0 C0 C0 C0 ++#define C4 C2 C2 C2 C2 ++ C4 /* clear c[]--assume BMAX+1 is 16 */ ++ p = b; i = n; ++ do { ++ c[*p++]++; /* assume all entries <= BMAX */ ++ } while (--i); ++ if (c[0] == n) /* null input--all zero length codes */ ++ { ++ *t = (inflate_huft *)Z_NULL; ++ *m = 0; ++ return Z_OK; ++ } ++ ++ ++ /* Find minimum and maximum length, bound *m by those */ ++ l = *m; ++ for (j = 1; j <= BMAX; j++) ++ if (c[j]) ++ break; ++ k = j; /* minimum code length */ ++ if ((uInt)l < j) ++ l = j; ++ for (i = BMAX; i; i--) ++ if (c[i]) ++ break; ++ g = i; /* maximum code length */ ++ if ((uInt)l > i) ++ l = i; ++ *m = l; ++ ++ ++ /* Adjust last length count to fill out codes, if needed */ ++ for (y = 1 << j; j < i; j++, y <<= 1) ++ if ((y -= c[j]) < 0) ++ return Z_DATA_ERROR; ++ if ((y -= c[i]) < 0) ++ return Z_DATA_ERROR; ++ c[i] += y; ++ ++ ++ /* Generate starting offsets into the value table for each length */ ++ x[1] = j = 0; ++ p = c + 1; xp = x + 2; ++ while (--i) { /* note that i == g from above */ ++ *xp++ = (j += *p++); ++ } ++ ++ ++ /* Make a table of values in order of bit lengths */ ++ p = b; i = 0; ++ do { ++ if ((j = *p++) != 0) ++ v[x[j]++] = i; ++ } while (++i < n); ++ n = x[g]; /* set n to length of v */ ++ ++ ++ /* Generate the Huffman codes and for each, make the table entries */ ++ x[0] = i = 0; /* first Huffman code is zero */ ++ p = v; /* grab values in bit order */ ++ h = -1; /* no tables yet--level -1 */ ++ w = -l; /* bits decoded == (l * h) */ ++ u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ ++ q = (inflate_huft *)Z_NULL; /* ditto */ ++ z = 0; /* ditto */ ++ ++ /* go through the bit lengths (k already is bits in shortest code) */ ++ for (; k <= g; k++) ++ { ++ a = c[k]; ++ while (a--) ++ { ++ /* here i is the Huffman code of length k bits for value *p */ ++ /* make tables up to required level */ ++ while (k > w + l) ++ { ++ h++; ++ w += l; /* previous table always l bits */ ++ ++ /* compute minimum size table less than or equal to l bits */ ++ z = g - w; ++ z = z > (uInt)l ? l : z; /* table size upper limit */ ++ if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ ++ { /* too few codes for k-w bit table */ ++ f -= a + 1; /* deduct codes from patterns left */ ++ xp = c + k; ++ if (j < z) ++ while (++j < z) /* try smaller tables up to z bits */ ++ { ++ if ((f <<= 1) <= *++xp) ++ break; /* enough codes to use up j bits */ ++ f -= *xp; /* else deduct codes from patterns */ ++ } ++ } ++ z = 1 << j; /* table entries for j-bit table */ ++ ++ /* allocate new table */ ++ if (*hn + z > MANY) /* (note: doesn't matter for fixed) */ ++ return Z_DATA_ERROR; /* overflow of MANY */ ++ u[h] = q = hp + *hn; ++ *hn += z; ++ ++ /* connect to last table, if there is one */ ++ if (h) ++ { ++ x[h] = i; /* save pattern for backing up */ ++ r.bits = (Byte)l; /* bits to dump before this table */ ++ r.exop = (Byte)j; /* bits in this table */ ++ j = i >> (w - l); ++ r.base = (uInt)(q - u[h-1] - j); /* offset to this table */ ++ u[h-1][j] = r; /* connect to last table */ ++ } ++ else ++ *t = q; /* first table is returned result */ ++ } ++ ++ /* set up table entry in r */ ++ r.bits = (Byte)(k - w); ++ if (p >= v + n) ++ r.exop = 128 + 64; /* out of values--invalid code */ ++ else if (*p < s) ++ { ++ r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ ++ r.base = *p++; /* simple code is just the value */ ++ } ++ else ++ { ++ r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */ ++ r.base = d[*p++ - s]; ++ } ++ ++ /* fill code-like entries with r */ ++ f = 1 << (k - w); ++ for (j = i >> w; j < z; j += f) ++ q[j] = r; ++ ++ /* backwards increment the k-bit code i */ ++ for (j = 1 << (k - 1); i & j; j >>= 1) ++ i ^= j; ++ i ^= j; ++ ++ /* backup over finished tables */ ++ mask = (1 << w) - 1; /* needed on HP, cc -O bug */ ++ while ((i & mask) != x[h]) ++ { ++ h--; /* don't need to update q */ ++ w -= l; ++ mask = (1 << w) - 1; ++ } ++ } ++ } ++ ++ ++ /* Return Z_BUF_ERROR if we were given an incomplete table */ ++ return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; ++} ++ ++ ++int inflate_trees_bits(c, bb, tb, hp, z) ++uIntf *c; /* 19 code lengths */ ++uIntf *bb; /* bits tree desired/actual depth */ ++inflate_huft * FAR *tb; /* bits tree result */ ++inflate_huft *hp; /* space for trees */ ++z_streamp z; /* for messages */ ++{ ++ int r; ++ uInt hn = 0; /* hufts used in space */ ++ uIntf *v; /* work area for huft_build */ ++ ++ if ((v = (uIntf*)ZALLOC(z, 19, sizeof(uInt))) == Z_NULL) ++ return Z_MEM_ERROR; ++ r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, ++ tb, bb, hp, &hn, v); ++ if (r == Z_DATA_ERROR) ++ z->msg = (char*)"oversubscribed dynamic bit lengths tree"; ++ else if (r == Z_BUF_ERROR || *bb == 0) ++ { ++ z->msg = (char*)"incomplete dynamic bit lengths tree"; ++ r = Z_DATA_ERROR; ++ } ++ ZFREE(z, v); ++ return r; ++} ++ ++ ++int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, hp, z) ++uInt nl; /* number of literal/length codes */ ++uInt nd; /* number of distance codes */ ++uIntf *c; /* that many (total) code lengths */ ++uIntf *bl; /* literal desired/actual bit depth */ ++uIntf *bd; /* distance desired/actual bit depth */ ++inflate_huft * FAR *tl; /* literal/length tree result */ ++inflate_huft * FAR *td; /* distance tree result */ ++inflate_huft *hp; /* space for trees */ ++z_streamp z; /* for messages */ ++{ ++ int r; ++ uInt hn = 0; /* hufts used in space */ ++ uIntf *v; /* work area for huft_build */ ++ ++ /* allocate work area */ ++ if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) ++ return Z_MEM_ERROR; ++ ++ /* build literal/length tree */ ++ r = huft_build(c, nl, 257, cplens, cplext, tl, bl, hp, &hn, v); ++ if (r != Z_OK || *bl == 0) ++ { ++ if (r == Z_DATA_ERROR) ++ z->msg = (char*)"oversubscribed literal/length tree"; ++ else if (r != Z_MEM_ERROR) ++ { ++ z->msg = (char*)"incomplete literal/length tree"; ++ r = Z_DATA_ERROR; ++ } ++ ZFREE(z, v); ++ return r; ++ } ++ ++ /* build distance tree */ ++ r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, hp, &hn, v); ++ if (r != Z_OK || (*bd == 0 && nl > 257)) ++ { ++ if (r == Z_DATA_ERROR) ++ z->msg = (char*)"oversubscribed distance tree"; ++ else if (r == Z_BUF_ERROR) { ++#ifdef PKZIP_BUG_WORKAROUND ++ r = Z_OK; ++ } ++#else ++ z->msg = (char*)"incomplete distance tree"; ++ r = Z_DATA_ERROR; ++ } ++ else if (r != Z_MEM_ERROR) ++ { ++ z->msg = (char*)"empty distance tree with lengths"; ++ r = Z_DATA_ERROR; ++ } ++ ZFREE(z, v); ++ return r; ++#endif ++ } ++ ++ /* done */ ++ ZFREE(z, v); ++ return Z_OK; ++} ++ ++ ++/* build fixed tables only once--keep them here */ ++#ifdef BUILDFIXED ++local int fixed_built = 0; ++#define FIXEDH 544 /* number of hufts used by fixed tables */ ++local inflate_huft fixed_mem[FIXEDH]; ++local uInt fixed_bl; ++local uInt fixed_bd; ++local inflate_huft *fixed_tl; ++local inflate_huft *fixed_td; ++#else ++#include "inffixed.h" ++#endif ++ ++ ++int inflate_trees_fixed(bl, bd, tl, td, z) ++uIntf *bl; /* literal desired/actual bit depth */ ++uIntf *bd; /* distance desired/actual bit depth */ ++inflate_huft * FAR *tl; /* literal/length tree result */ ++inflate_huft * FAR *td; /* distance tree result */ ++z_streamp z; /* for memory allocation */ ++{ ++#ifdef BUILDFIXED ++ /* build fixed tables if not already */ ++ if (!fixed_built) ++ { ++ int k; /* temporary variable */ ++ uInt f = 0; /* number of hufts used in fixed_mem */ ++ uIntf *c; /* length list for huft_build */ ++ uIntf *v; /* work area for huft_build */ ++ ++ /* allocate memory */ ++ if ((c = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) ++ return Z_MEM_ERROR; ++ if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) ++ { ++ ZFREE(z, c); ++ return Z_MEM_ERROR; ++ } ++ ++ /* literal table */ ++ for (k = 0; k < 144; k++) ++ c[k] = 8; ++ for (; k < 256; k++) ++ c[k] = 9; ++ for (; k < 280; k++) ++ c[k] = 7; ++ for (; k < 288; k++) ++ c[k] = 8; ++ fixed_bl = 9; ++ huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, ++ fixed_mem, &f, v); ++ ++ /* distance table */ ++ for (k = 0; k < 30; k++) ++ c[k] = 5; ++ fixed_bd = 5; ++ huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, ++ fixed_mem, &f, v); ++ ++ /* done */ ++ ZFREE(z, v); ++ ZFREE(z, c); ++ fixed_built = 1; ++ } ++#endif ++ *bl = fixed_bl; ++ *bd = fixed_bd; ++ *tl = fixed_tl; ++ *td = fixed_td; ++ return Z_OK; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/inftrees.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,63 @@ ++/* inftrees.h -- header to use inftrees.c ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* WARNING: this file should *not* be used by applications. It is ++ part of the implementation of the compression library and is ++ subject to change. Applications should only use zlib.h. ++ */ ++ ++/* Huffman code lookup table entry--this entry is four bytes for machines ++ that have 16-bit pointers (e.g. PC's in the small or medium model). */ ++ ++#ifndef _INFTREES_H ++#define _INFTREES_H ++ ++typedef struct inflate_huft_s FAR inflate_huft; ++ ++struct inflate_huft_s { ++ union { ++ struct { ++ Byte Exop; /* number of extra bits or operation */ ++ Byte Bits; /* number of bits in this code or subcode */ ++ } what; ++ uInt pad; /* pad structure to a power of 2 (4 bytes for */ ++ } word; /* 16-bit, 8 bytes for 32-bit int's) */ ++ uInt base; /* literal, length base, distance base, ++ or table offset */ ++}; ++ ++/* Maximum size of dynamic tree. The maximum found in a long but non- ++ exhaustive search was 1004 huft structures (850 for length/literals ++ and 154 for distances, the latter actually the result of an ++ exhaustive search). The actual maximum is not known, but the ++ value below is more than safe. */ ++#define MANY 1440 ++ ++extern int inflate_trees_bits OF(( ++ uIntf *, /* 19 code lengths */ ++ uIntf *, /* bits tree desired/actual depth */ ++ inflate_huft * FAR *, /* bits tree result */ ++ inflate_huft *, /* space for trees */ ++ z_streamp)); /* for messages */ ++ ++extern int inflate_trees_dynamic OF(( ++ uInt, /* number of literal/length codes */ ++ uInt, /* number of distance codes */ ++ uIntf *, /* that many (total) code lengths */ ++ uIntf *, /* literal desired/actual bit depth */ ++ uIntf *, /* distance desired/actual bit depth */ ++ inflate_huft * FAR *, /* literal/length tree result */ ++ inflate_huft * FAR *, /* distance tree result */ ++ inflate_huft *, /* space for trees */ ++ z_streamp)); /* for messages */ ++ ++extern int inflate_trees_fixed OF(( ++ uIntf *, /* literal desired/actual bit depth */ ++ uIntf *, /* distance desired/actual bit depth */ ++ inflate_huft * FAR *, /* literal/length tree result */ ++ inflate_huft * FAR *, /* distance tree result */ ++ z_streamp)); /* for memory allocation */ ++ ++#endif /* _INFTREES_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/infutil.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,87 @@ ++/* inflate_util.c -- data and routines common to blocks and codes ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "infblock.h" ++#include "inftrees.h" ++#include "infcodes.h" ++#include "infutil.h" ++ ++struct inflate_codes_state {int dummy;}; /* for buggy compilers */ ++ ++/* And'ing with mask[n] masks the lower n bits */ ++uInt inflate_mask[17] = { ++ 0x0000, ++ 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, ++ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff ++}; ++ ++ ++/* copy as much as possible from the sliding window to the output area */ ++int inflate_flush(s, z, r) ++inflate_blocks_statef *s; ++z_streamp z; ++int r; ++{ ++ uInt n; ++ Bytef *p; ++ Bytef *q; ++ ++ /* local copies of source and destination pointers */ ++ p = z->next_out; ++ q = s->read; ++ ++ /* compute number of bytes to copy as far as end of window */ ++ n = (uInt)((q <= s->write ? s->write : s->end) - q); ++ if (n > z->avail_out) n = z->avail_out; ++ if (n && r == Z_BUF_ERROR) r = Z_OK; ++ ++ /* update counters */ ++ z->avail_out -= n; ++ z->total_out += n; ++ ++ /* update check information */ ++ if (s->checkfn != Z_NULL) ++ z->adler = s->check = (*s->checkfn)(s->check, q, n); ++ ++ /* copy as far as end of window */ ++ zmemcpy(p, q, n); ++ p += n; ++ q += n; ++ ++ /* see if more to copy at beginning of window */ ++ if (q == s->end) ++ { ++ /* wrap pointers */ ++ q = s->window; ++ if (s->write == s->end) ++ s->write = s->window; ++ ++ /* compute bytes to copy */ ++ n = (uInt)(s->write - q); ++ if (n > z->avail_out) n = z->avail_out; ++ if (n && r == Z_BUF_ERROR) r = Z_OK; ++ ++ /* update counters */ ++ z->avail_out -= n; ++ z->total_out += n; ++ ++ /* update check information */ ++ if (s->checkfn != Z_NULL) ++ z->adler = s->check = (*s->checkfn)(s->check, q, n); ++ ++ /* copy */ ++ zmemcpy(p, q, n); ++ p += n; ++ q += n; ++ } ++ ++ /* update pointers */ ++ z->next_out = p; ++ s->read = q; ++ ++ /* done */ ++ return r; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/infutil.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,98 @@ ++/* infutil.h -- types and macros common to blocks and codes ++ * Copyright (C) 1995-2002 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* WARNING: this file should *not* be used by applications. It is ++ part of the implementation of the compression library and is ++ subject to change. Applications should only use zlib.h. ++ */ ++ ++#ifndef _INFUTIL_H ++#define _INFUTIL_H ++ ++typedef enum { ++ TYPE, /* get type bits (3, including end bit) */ ++ LENS, /* get lengths for stored */ ++ STORED, /* processing stored block */ ++ TABLE, /* get table lengths */ ++ BTREE, /* get bit lengths tree for a dynamic block */ ++ DTREE, /* get length, distance trees for a dynamic block */ ++ CODES, /* processing fixed or dynamic block */ ++ DRY, /* output remaining window bytes */ ++ DONE, /* finished last block, done */ ++ BAD} /* got a data error--stuck here */ ++inflate_block_mode; ++ ++/* inflate blocks semi-private state */ ++struct inflate_blocks_state { ++ ++ /* mode */ ++ inflate_block_mode mode; /* current inflate_block mode */ ++ ++ /* mode dependent information */ ++ union { ++ uInt left; /* if STORED, bytes left to copy */ ++ struct { ++ uInt table; /* table lengths (14 bits) */ ++ uInt index; /* index into blens (or border) */ ++ uIntf *blens; /* bit lengths of codes */ ++ uInt bb; /* bit length tree depth */ ++ inflate_huft *tb; /* bit length decoding tree */ ++ } trees; /* if DTREE, decoding info for trees */ ++ struct { ++ inflate_codes_statef ++ *codes; ++ } decode; /* if CODES, current state */ ++ } sub; /* submode */ ++ uInt last; /* true if this block is the last block */ ++ ++ /* mode independent information */ ++ uInt bitk; /* bits in bit buffer */ ++ uLong bitb; /* bit buffer */ ++ inflate_huft *hufts; /* single malloc for tree space */ ++ Bytef *window; /* sliding window */ ++ Bytef *end; /* one byte after sliding window */ ++ Bytef *read; /* window read pointer */ ++ Bytef *write; /* window write pointer */ ++ check_func checkfn; /* check function */ ++ uLong check; /* check on output */ ++ ++}; ++ ++ ++/* defines for inflate input/output */ ++/* update pointers and return */ ++#define UPDBITS {s->bitb=b;s->bitk=k;} ++#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} ++#define UPDOUT {s->write=q;} ++#define UPDATE {UPDBITS UPDIN UPDOUT} ++#define LEAVE {UPDATE return inflate_flush(s,z,r);} ++/* get bytes and bits */ ++#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} ++#define NEEDBYTE {if(n)r=Z_OK;else LEAVE} ++#define NEXTBYTE (n--,*p++) ++#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<>=(j);k-=(j);} ++/* output bytes */ ++#define WAVAIL (uInt)(qread?s->read-q-1:s->end-q) ++#define LOADOUT {q=s->write;m=(uInt)WAVAIL;} ++#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}} ++#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} ++#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;} ++#define OUTBYTE(a) {*q++=(Byte)(a);m--;} ++/* load local pointers */ ++#define LOAD {LOADIN LOADOUT} ++ ++/* masks for lower bits (size given to avoid silly warnings with Visual C++) */ ++extern uInt inflate_mask[17]; ++ ++/* copy as much as possible from the sliding window to the output area */ ++extern int inflate_flush OF(( ++ inflate_blocks_statef *, ++ z_streamp , ++ int)); ++ ++struct internal_state {int dummy;}; /* for buggy compilers */ ++ ++#endif /* _INFUTIL_H */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/initaddr.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,50 @@ ++/* ++ * initialize address structure ++ * Copyright (C) 2000 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: initaddr.c,v 1.6 2004-07-10 07:43:47 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++/* ++ - initaddr - initialize ip_address from bytes ++ */ ++err_t /* NULL for success, else string literal */ ++initaddr(src, srclen, af, dst) ++const unsigned char *src; ++size_t srclen; ++int af; /* address family */ ++ip_address *dst; ++{ ++ switch (af) { ++ case AF_INET: ++ if (srclen != 4) ++ return "IPv4 address must be exactly 4 bytes"; ++ dst->u.v4.sin_family = af; ++ dst->u.v4.sin_port = 0; /* unused */ ++ memcpy((char *)&dst->u.v4.sin_addr.s_addr, src, srclen); ++ break; ++ case AF_INET6: ++ if (srclen != 16) ++ return "IPv6 address must be exactly 16 bytes"; ++ dst->u.v6.sin6_family = af; ++ dst->u.v6.sin6_flowinfo = 0; /* unused */ ++ dst->u.v6.sin6_port = 0; /* unused */ ++ memcpy((char *)&dst->u.v6.sin6_addr, src, srclen); ++ break; ++ default: ++ return "unknown address family in initaddr"; ++ break; ++ } ++ return NULL; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipcomp.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,697 @@ ++/* ++ * IPCOMP zlib interface code. ++ * Copyright (C) 2000 Svenning Soerensen ++ * Copyright (C) 2000, 2001 Richard Guy Briggs ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipcomp_c_version[] = "RCSID $Id: ipcomp.c,v 1.41.2.8 2007-10-30 21:33:40 paul Exp $"; ++ ++/* SSS */ ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++#define __NO_VERSION__ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include ++#include ++#include ++#include ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" /* sysctl_ipsec_inbound_policy_check */ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipcomp.h" ++#include "zlib/zlib.h" ++#include "zlib/zutil.h" ++ ++#include /* SADB_X_CALG_DEFLATE */ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int sysctl_ipsec_debug_ipcomp = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++static ++struct sk_buff *skb_copy_ipcomp(struct sk_buff *skb, int data_growth, int gfp_mask); ++ ++static ++voidpf my_zcalloc(voidpf opaque, uInt items, uInt size) ++{ ++ return (voidpf) kmalloc(items*size, GFP_ATOMIC); ++} ++ ++static ++void my_zfree(voidpf opaque, voidpf address) ++{ ++ kfree(address); ++} ++ ++/* ++ * We use this function because sometimes we want to pass a negative offset ++ * into skb_put(), this does not work on 64bit platforms because long to ++ * unsigned int casting. ++ */ ++static inline unsigned char * ++safe_skb_put(struct sk_buff *skb, int extend) ++{ ++ unsigned char *ptr; ++ ++ if (extend>0) { ++ // increase the size of the packet ++ ptr = skb_put(skb, extend); ++ } else { ++ // shrink the size of the packet ++ ptr = skb_tail_pointer(skb); ++ skb_trim (skb, skb->len + extend); ++ } ++ ++ return ptr; ++} ++ ++struct sk_buff *skb_compress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags) ++{ ++ struct iphdr *iph; ++ unsigned int iphlen, pyldsz, cpyldsz; ++ unsigned char *buffer; ++ z_stream zs; ++ int zresult; ++ ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: .\n"); ++ ++ if(skb == NULL) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "passed in NULL skb, returning ERROR.\n"); ++ if(flags != NULL) { ++ *flags |= IPCOMP_PARMERROR; ++ } ++ return skb; ++ } ++ ++ if(ips == NULL) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "passed in NULL ipsec_sa needed for cpi, returning ERROR.\n"); ++ if(flags) { ++ *flags |= IPCOMP_PARMERROR; ++ } ++ return skb; ++ } ++ ++ if (flags == NULL) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "passed in NULL flags, returning ERROR.\n"); ++ ipsec_kfree_skb(skb); ++ return NULL; ++ } ++ ++#ifdef NET_21 ++ iph = ip_hdr(skb); ++#else /* NET_21 */ ++ iph = skb->ip_hdr; ++#endif /* NET_21 */ ++ ++ switch (iph->protocol) { ++ case IPPROTO_COMP: ++ case IPPROTO_AH: ++ case IPPROTO_ESP: ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "skipping compression of packet with ip protocol %d.\n", ++ iph->protocol); ++ *flags |= IPCOMP_UNCOMPRESSABLE; ++ return skb; ++ } ++ ++ /* Don't compress packets already fragmented */ ++ if (iph->frag_off & __constant_htons(IP_MF | IP_OFFSET)) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "skipping compression of fragmented packet.\n"); ++ *flags |= IPCOMP_UNCOMPRESSABLE; ++ return skb; ++ } ++ ++ iphlen = iph->ihl << 2; ++ pyldsz = ntohs(iph->tot_len) - iphlen; ++ ++ /* Don't compress less than 90 bytes (rfc 2394) */ ++ if (pyldsz < 90) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "skipping compression of tiny packet, len=%d.\n", ++ pyldsz); ++ *flags |= IPCOMP_UNCOMPRESSABLE; ++ return skb; ++ } ++ ++ /* Adaptive decision */ ++ if (ips->ips_comp_adapt_skip) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "skipping compression: ips_comp_adapt_skip=%d.\n", ++ ips->ips_comp_adapt_skip); ++ ips->ips_comp_adapt_skip--; ++ *flags |= IPCOMP_UNCOMPRESSABLE; ++ return skb; ++ } ++ ++ zs.zalloc = my_zcalloc; ++ zs.zfree = my_zfree; ++ zs.opaque = 0; ++ ++ /* We want to use deflateInit2 because we don't want the adler ++ header. */ ++ zresult = deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -11, ++ DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); ++ if (zresult != Z_OK) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_compress: " ++ "deflateInit2() returned error %d (%s), " ++ "skipping compression.\n", ++ zresult, ++ zs.msg ? zs.msg : zError(zresult)); ++ *flags |= IPCOMP_COMPRESSIONERROR; ++ return skb; ++ } ++ ++ ++ /* Max output size. Result should be max this size. ++ * Implementation specific tweak: ++ * If it's not at least 32 bytes and 6.25% smaller than ++ * the original packet, it's probably not worth wasting ++ * the receiver's CPU cycles decompressing it. ++ * Your mileage may vary. ++ */ ++ cpyldsz = pyldsz - sizeof(struct ipcomphdr) - (pyldsz <= 512 ? 32 : pyldsz >> 4); ++ ++ buffer = kmalloc(cpyldsz, GFP_ATOMIC); ++ if (!buffer) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_compress: " ++ "unable to kmalloc(%d, GFP_ATOMIC), " ++ "skipping compression.\n", ++ cpyldsz); ++ *flags |= IPCOMP_COMPRESSIONERROR; ++ deflateEnd(&zs); ++ return skb; ++ } ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if(sysctl_ipsec_debug_ipcomp && sysctl_ipsec_debug_verbose) { ++ __u8 *c; ++ ++ c = (__u8*)iph + iphlen; ++ ipsec_dmp_block("compress before", c, pyldsz); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ zs.next_in = (char *) iph + iphlen; /* start of payload */ ++ zs.avail_in = pyldsz; ++ zs.next_out = buffer; /* start of compressed payload */ ++ zs.avail_out = cpyldsz; ++ ++ /* Finish compression in one step */ ++ zresult = deflate(&zs, Z_FINISH); ++ ++ /* Free all dynamically allocated buffers */ ++ deflateEnd(&zs); ++ if (zresult != Z_STREAM_END) { ++ *flags |= IPCOMP_UNCOMPRESSABLE; ++ kfree(buffer); ++ ++ /* Adjust adaptive counters */ ++ if (++(ips->ips_comp_adapt_tries) == IPCOMP_ADAPT_INITIAL_TRIES) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "first %d packets didn't compress, " ++ "skipping next %d\n", ++ IPCOMP_ADAPT_INITIAL_TRIES, ++ IPCOMP_ADAPT_INITIAL_SKIP); ++ ips->ips_comp_adapt_skip = IPCOMP_ADAPT_INITIAL_SKIP; ++ } ++ else if (ips->ips_comp_adapt_tries == IPCOMP_ADAPT_INITIAL_TRIES + IPCOMP_ADAPT_SUBSEQ_TRIES) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "next %d packets didn't compress, " ++ "skipping next %d\n", ++ IPCOMP_ADAPT_SUBSEQ_TRIES, ++ IPCOMP_ADAPT_SUBSEQ_SKIP); ++ ips->ips_comp_adapt_skip = IPCOMP_ADAPT_SUBSEQ_SKIP; ++ ips->ips_comp_adapt_tries = IPCOMP_ADAPT_INITIAL_TRIES; ++ } ++ ++ return skb; ++ } ++ ++ /* resulting compressed size */ ++ cpyldsz -= zs.avail_out; ++ ++ /* Insert IPCOMP header */ ++ ((struct ipcomphdr*) ((char*) iph + iphlen))->ipcomp_nh = iph->protocol; ++ ((struct ipcomphdr*) ((char*) iph + iphlen))->ipcomp_flags = 0; ++ /* use the bottom 16 bits of the spi for the cpi. The top 16 bits are ++ for internal reference only. */ ++ ((struct ipcomphdr*) (((char*)iph) + iphlen))->ipcomp_cpi = htons((__u16)(ntohl(ips->ips_said.spi) & 0x0000ffff)); ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_compress: " ++ "spi=%08x, spi&0xffff=%04x, cpi=%04x, payload size: raw=%d, comp=%d.\n", ++ ntohl(ips->ips_said.spi), ++ ntohl(ips->ips_said.spi) & 0x0000ffff, ++ ntohs(((struct ipcomphdr*)(((char*)iph)+iphlen))->ipcomp_cpi), ++ pyldsz, ++ cpyldsz); ++ ++ /* Update IP header */ ++ iph->protocol = IPPROTO_COMP; ++ iph->tot_len = htons(iphlen + sizeof(struct ipcomphdr) + cpyldsz); ++#if 1 /* XXX checksum is done by ipsec_tunnel ? */ ++ iph->check = 0; ++ iph->check = ip_fast_csum((char *) iph, iph->ihl); ++#endif ++ ++ /* Copy compressed payload */ ++ memcpy((char *) iph + iphlen + sizeof(struct ipcomphdr), ++ buffer, ++ cpyldsz); ++ kfree(buffer); ++ ++ /* Update skb length/tail by "unputting" the shrinkage */ ++ safe_skb_put (skb, cpyldsz + sizeof(struct ipcomphdr) - pyldsz); ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if(sysctl_ipsec_debug_ipcomp && sysctl_ipsec_debug_verbose) { ++ __u8 *c; ++ ++ c = (__u8*)iph + iphlen + sizeof(struct ipcomphdr); ++ ipsec_dmp_block("compress result", c, cpyldsz); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ ips->ips_comp_adapt_skip = 0; ++ ips->ips_comp_adapt_tries = 0; ++ ++ return skb; ++} ++ ++struct sk_buff *skb_decompress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags) ++{ ++ struct sk_buff *nskb = NULL; ++ ++ /* original ip header */ ++ struct iphdr *oiph, *iph; ++ unsigned int iphlen, pyldsz, cpyldsz; ++ z_stream zs; ++ int zresult; ++ ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_decompress: .\n"); ++ ++ if(!skb) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "passed in NULL skb, returning ERROR.\n"); ++ if (flags) *flags |= IPCOMP_PARMERROR; ++ return skb; ++ } ++ ++ if(!ips && sysctl_ipsec_inbound_policy_check) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "passed in NULL ipsec_sa needed for comp alg, returning ERROR.\n"); ++ if (flags) *flags |= IPCOMP_PARMERROR; ++ return skb; ++ } ++ ++ if (!flags) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "passed in NULL flags, returning ERROR.\n"); ++ ipsec_kfree_skb(skb); ++ return NULL; ++ } ++ ++#ifdef NET_21 ++ oiph = ip_hdr(skb); ++#else /* NET_21 */ ++ oiph = skb->ip_hdr; ++#endif /* NET_21 */ ++ ++ iphlen = oiph->ihl << 2; ++ ++ if (oiph->protocol != IPPROTO_COMP) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "called with non-IPCOMP packet (protocol=%d)," ++ "skipping decompression.\n", ++ oiph->protocol); ++ *flags |= IPCOMP_PARMERROR; ++ return skb; ++ } ++ ++ if ( (((struct ipcomphdr*)((char*) oiph + iphlen))->ipcomp_flags != 0) ++ || ((((struct ipcomphdr*) ((char*) oiph + iphlen))->ipcomp_cpi ++ != htons(SADB_X_CALG_DEFLATE)) ++ && sysctl_ipsec_inbound_policy_check ++ && (!ips || (ips && (ips->ips_encalg != SADB_X_CALG_DEFLATE)))) ) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "called with incompatible IPCOMP packet (flags=%d, " ++ "cpi=%d), ips-compalg=%d, skipping decompression.\n", ++ ntohs(((struct ipcomphdr*) ((char*) oiph + iphlen))->ipcomp_flags), ++ ntohs(((struct ipcomphdr*) ((char*) oiph + iphlen))->ipcomp_cpi), ++ ips ? ips->ips_encalg : 0); ++ *flags |= IPCOMP_PARMERROR; ++ ++ return skb; ++ } ++ ++ if (ntohs(oiph->frag_off) & ~0x4000) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "called with fragmented IPCOMP packet, " ++ "skipping decompression.\n"); ++ *flags |= IPCOMP_PARMERROR; ++ return skb; ++ } ++ ++ /* original compressed payload size */ ++ cpyldsz = ntohs(oiph->tot_len) - iphlen - sizeof(struct ipcomphdr); ++ ++ zs.zalloc = my_zcalloc; ++ zs.zfree = my_zfree; ++ zs.opaque = 0; ++ ++ zs.next_in = (char *) oiph + iphlen + sizeof(struct ipcomphdr); ++ zs.avail_in = cpyldsz; ++ ++ /* Maybe we should be a bit conservative about memory ++ requirements and use inflateInit2 */ ++ /* Beware, that this might make us unable to decompress packets ++ from other implementations - HINT: check PGPnet source code */ ++ /* We want to use inflateInit2 because we don't want the adler ++ header. */ ++ zresult = inflateInit2(&zs, -15); ++ if (zresult != Z_OK) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "inflateInit2() returned error %d (%s), " ++ "skipping decompression.\n", ++ zresult, ++ zs.msg ? zs.msg : zError(zresult)); ++ *flags |= IPCOMP_DECOMPRESSIONERROR; ++ ++ return skb; ++ } ++ ++ /* We have no way of knowing the exact length of the resulting ++ decompressed output before we have actually done the decompression. ++ For now, we guess that the packet will not be bigger than the ++ attached ipsec device's mtu or 16260, whichever is biggest. ++ This may be wrong, since the sender's mtu may be bigger yet. ++ XXX This must be dealt with later XXX ++ */ ++ ++ /* max payload size */ ++ pyldsz = skb->dev ? (skb->dev->mtu < 16260 ? 16260 : skb->dev->mtu) ++ : (65520 - iphlen); ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_decompress: " ++ "max payload size: %d\n", pyldsz); ++ ++ while (pyldsz > (cpyldsz + sizeof(struct ipcomphdr)) && ++ (nskb = skb_copy_ipcomp(skb, ++ pyldsz - cpyldsz - sizeof(struct ipcomphdr), ++ GFP_ATOMIC)) == NULL) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "unable to skb_copy_ipcomp(skb, %d, GFP_ATOMIC), " ++ "trying with less payload size.\n", ++ (int)(pyldsz - cpyldsz - sizeof(struct ipcomphdr))); ++ pyldsz >>=1; ++ } ++ ++ if (!nskb) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "unable to allocate memory, dropping packet.\n"); ++ *flags |= IPCOMP_DECOMPRESSIONERROR; ++ inflateEnd(&zs); ++ ++ return skb; ++ } ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if(sysctl_ipsec_debug_ipcomp && sysctl_ipsec_debug_verbose) { ++ __u8 *c; ++ ++ c = (__u8*)oiph + iphlen + sizeof(struct ipcomphdr); ++ ipsec_dmp_block("decompress before", c, cpyldsz); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++#ifdef NET_21 ++ iph = ip_hdr(nskb); ++#else /* NET_21 */ ++ iph = nskb->ip_hdr; ++#endif /* NET_21 */ ++ zs.next_out = (char *)iph + iphlen; ++ zs.avail_out = pyldsz; ++ ++ zresult = inflate(&zs, Z_SYNC_FLUSH); ++ ++ /* work around a bug in zlib, which sometimes wants to taste an extra ++ * byte when being used in the (undocumented) raw deflate mode. ++ */ ++ if (zresult == Z_OK && !zs.avail_in && zs.avail_out) { ++ __u8 zerostuff = 0; ++ ++ zs.next_in = &zerostuff; ++ zs.avail_in = 1; ++ zresult = inflate(&zs, Z_FINISH); ++ } ++ ++ inflateEnd(&zs); ++ if (zresult != Z_STREAM_END) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_error:skb_decompress: " ++ "inflate() returned error %d (%s), " ++ "skipping decompression.\n", ++ zresult, ++ zs.msg ? zs.msg : zError(zresult)); ++ *flags |= IPCOMP_DECOMPRESSIONERROR; ++ ipsec_kfree_skb(nskb); ++ ++ return skb; ++ } ++ ++ /* Update IP header */ ++ /* resulting decompressed size */ ++ pyldsz -= zs.avail_out; ++ iph->tot_len = htons(iphlen + pyldsz); ++ iph->protocol = ((struct ipcomphdr*) ((char*) oiph + iphlen))->ipcomp_nh; ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_decompress: " ++ "spi=%08x, spi&0xffff=%04x, cpi=%04x, payload size: comp=%d, raw=%d, nh=%d.\n", ++ ips ? ntohl(ips->ips_said.spi) : 0, ++ ips ? ntohl(ips->ips_said.spi) & 0x0000ffff : 0, ++ ntohs(((struct ipcomphdr*)(((char*)oiph)+iphlen))->ipcomp_cpi), ++ cpyldsz, ++ pyldsz, ++ iph->protocol); ++ ++#if 1 /* XXX checksum is done by ipsec_rcv ? */ ++ iph->check = 0; ++ iph->check = ip_fast_csum((char*) iph, iph->ihl); ++#endif ++ ++ /* Update skb length/tail by "unputting" the unused data area */ ++ safe_skb_put(nskb, -zs.avail_out); ++ ++ ipsec_kfree_skb(skb); ++ ++ if (iph->protocol == IPPROTO_COMP) ++ { ++#ifdef CONFIG_KLIPS_DEBUG ++ if(sysctl_ipsec_debug_ipcomp) ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_decompress: " ++ "Eh? inner packet is also compressed, dropping.\n"); ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ ipsec_kfree_skb(nskb); ++ return NULL; ++ } ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if(sysctl_ipsec_debug_ipcomp && sysctl_ipsec_debug_verbose) { ++ __u8 *c; ++ ++ c = (__u8*)iph + iphlen; ++ ipsec_dmp_block("decompress result", c, pyldsz); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ return nskb; ++} ++ ++ ++/* this is derived from skb_copy() in linux 2.2.14 */ ++/* May be incompatible with other kernel versions!! */ ++static ++struct sk_buff *skb_copy_ipcomp(struct sk_buff *skb, int data_growth, int gfp_mask) ++{ ++ struct sk_buff *n; ++ struct iphdr *iph; ++ unsigned long offset; ++ unsigned int iphlen; ++ ++ if(!skb) { ++ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp, ++ "klips_debug:skb_copy_ipcomp: " ++ "passed in NULL skb, returning NULL.\n"); ++ return NULL; ++ } ++ ++ /* ++ * Allocate the copy buffer ++ */ ++ ++#ifdef NET_21 ++ iph = ip_hdr(skb); ++#else /* NET_21 */ ++ iph = skb->ip_hdr; ++#endif /* NET_21 */ ++ if (!iph) return NULL; ++ iphlen = iph->ihl << 2; ++ ++ n=alloc_skb(skb_end_pointer(skb) - skb->head + data_growth, gfp_mask); ++ if(n==NULL) ++ return NULL; ++ ++ /* ++ * Shift between the two data areas in bytes ++ */ ++ ++ offset=n->head-skb->head; ++ ++ /* Set the data pointer */ ++ skb_reserve(n,skb->data-skb->head); ++ /* Set the tail pointer and length */ ++ safe_skb_put(n,skb->len+data_growth); ++ /* Copy the bytes up to and including the ip header */ ++ memcpy(n->head, ++ skb->head, ++ ((char *)iph - (char *)skb->head) + iphlen); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ++ n->list=NULL; ++#endif ++ n->next=NULL; ++ n->prev=NULL; ++ n->sk=NULL; ++ n->dev=skb->dev; ++ if (skb_transport_header(skb)) ++ skb_set_transport_header(n, offset); ++ n->protocol=skb->protocol; ++#ifdef NET_21 ++ n->csum = 0; ++ n->priority=skb->priority; ++ n->dst=dst_clone(skb->dst); ++ skb_set_network_header(n, offset); ++#ifndef NETDEV_23 ++ n->is_clone=0; ++#endif /* NETDEV_23 */ ++ atomic_set(&n->users, 1); ++ n->destructor = NULL; ++#ifdef HAVE_SOCK_SECURITY ++ n->security=skb->security; ++#endif ++ memcpy(n->cb, skb->cb, sizeof(skb->cb)); ++#ifdef CONFIG_IP_FIREWALL ++ n->fwmark = skb->fwmark; ++#endif ++#else /* NET_21 */ ++ n->link3=NULL; ++ n->when=skb->when; ++ n->ip_hdr=(struct iphdr *)(((char *)skb->ip_hdr)+offset); ++ n->saddr=skb->saddr; ++ n->daddr=skb->daddr; ++ n->raddr=skb->raddr; ++ n->seq=skb->seq; ++ n->end_seq=skb->end_seq; ++ n->ack_seq=skb->ack_seq; ++ n->acked=skb->acked; ++ n->free=1; ++ n->arp=skb->arp; ++ n->tries=0; ++ n->lock=0; ++ n->users=0; ++ memcpy(n->proto_priv, skb->proto_priv, sizeof(skb->proto_priv)); ++#endif /* NET_21 */ ++ if (skb_mac_header(skb)) ++ skb_set_mac_header(n, offset); ++#ifndef NETDEV_23 ++ n->used=skb->used; ++#endif /* !NETDEV_23 */ ++ n->pkt_type=skb->pkt_type; ++#ifndef NETDEV_23 ++ n->pkt_bridged=skb->pkt_bridged; ++#endif /* NETDEV_23 */ ++ n->ip_summed=0; ++#ifdef HAVE_TSTAMP ++ n->tstamp = skb->tstamp; ++#else ++ n->stamp=skb->stamp; ++#endif ++#ifndef NETDEV_23 /* this seems to have been removed in 2.4 */ ++#if defined(CONFIG_SHAPER) || defined(CONFIG_SHAPER_MODULE) ++ n->shapelatency=skb->shapelatency; /* Latency on frame */ ++ n->shapeclock=skb->shapeclock; /* Time it should go out */ ++ n->shapelen=skb->shapelen; /* Frame length in clocks */ ++ n->shapestamp=skb->shapestamp; /* Stamp for shaper */ ++ n->shapepend=skb->shapepend; /* Pending */ ++#endif /* defined(CONFIG_SHAPER) || defined(CONFIG_SHAPER_MODULE) */ ++#endif /* NETDEV_23 */ ++ ++ return n; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_ah.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,411 @@ ++/* ++ * processing code for AH ++ * Copyright (C) 2003-2004 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipsec_ah_c_version[] = "RCSID $Id: ipsec_ah.c,v 1.12.2.3 2007-09-05 02:56:09 paul Exp $"; ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++#define __NO_VERSION__ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++#include ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipsec_xmit.h" ++ ++#include "openswan/ipsec_auth.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_proto.h" ++ ++__u32 zeroes[AH_AMAX]; ++ ++enum ipsec_rcv_value ++ipsec_rcv_ah_checks(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb) ++{ ++ int ahminlen; ++ ++ ahminlen = irs->hard_header_len + sizeof(struct iphdr); ++ ++ /* take care not to deref this pointer until we check the minlen though */ ++ irs->protostuff.ahstuff.ahp = (struct ahhdr *)skb_transport_header(skb); ++ ++ if((skb->len < ahminlen+sizeof(struct ahhdr)) || ++ (skb->len < ahminlen+(irs->protostuff.ahstuff.ahp->ah_hl << 2))) { ++ KLIPS_PRINT(debug_rcv & DB_RX_INAU, ++ "klips_debug:ipsec_rcv: " ++ "runt ah packet of skb->len=%d received from %s, dropped.\n", ++ skb->len, ++ irs->ipsaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ return IPSEC_RCV_BADLEN; ++ } ++ ++ irs->said.spi = irs->protostuff.ahstuff.ahp->ah_spi; ++ ++ /* XXX we only support the one 12-byte authenticator for now */ ++ if(irs->protostuff.ahstuff.ahp->ah_hl != ((AHHMAC_HASHLEN+AHHMAC_RPLLEN) >> 2)) { ++ KLIPS_PRINT(debug_rcv & DB_RX_INAU, ++ "klips_debug:ipsec_rcv: " ++ "bad authenticator length %ld, expected %lu from %s.\n", ++ (long)(irs->protostuff.ahstuff.ahp->ah_hl << 2), ++ (unsigned long) sizeof(struct ahhdr), ++ irs->ipsaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ return IPSEC_RCV_BADLEN; ++ } ++ ++ return IPSEC_RCV_OK; ++} ++ ++ ++enum ipsec_rcv_value ++ipsec_rcv_ah_setup_auth(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb, ++ __u32 *replay, ++ unsigned char **authenticator) ++{ ++ struct ahhdr *ahp = irs->protostuff.ahstuff.ahp; ++ ++ *replay = ntohl(ahp->ah_rpl); ++ *authenticator = ahp->ah_data; ++ ++ return IPSEC_RCV_OK; ++} ++ ++enum ipsec_rcv_value ++ipsec_rcv_ah_authcalc(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb) ++{ ++ struct auth_alg *aa; ++ struct ahhdr *ahp = irs->protostuff.ahstuff.ahp; ++ union { ++ MD5_CTX md5; ++ SHA1_CTX sha1; ++ } tctx; ++ struct iphdr ipo; ++ int ahhlen; ++ ++ aa = irs->authfuncs; ++ ++ /* copy the initialized keying material */ ++ memcpy(&tctx, irs->ictx, irs->ictx_len); ++ ++ ipo = *irs->ipp; ++ ipo.tos = 0; /* mutable RFC 2402 3.3.3.1.1.1 */ ++ ipo.frag_off = 0; ++ ipo.ttl = 0; ++ ipo.check = 0; ++ ++ ++ /* do the sanitized header */ ++ (*aa->update)((void*)&tctx, (caddr_t)&ipo, sizeof(struct iphdr)); ++ ++ /* XXX we didn't do the options here! */ ++ ++ /* now do the AH header itself */ ++ ahhlen = AH_BASIC_LEN + (ahp->ah_hl << 2); ++ (*aa->update)((void*)&tctx, (caddr_t)ahp, ahhlen - AHHMAC_HASHLEN); ++ ++ /* now, do some zeroes */ ++ (*aa->update)((void*)&tctx, (caddr_t)zeroes, AHHMAC_HASHLEN); ++ ++ /* finally, do the packet contents themselves */ ++ (*aa->update)((void*)&tctx, ++ (caddr_t)skb_transport_header(skb) + ahhlen, ++ skb->len - ahhlen); ++ ++ (*aa->final)(irs->hash, (void *)&tctx); ++ ++ memcpy(&tctx, irs->octx, irs->octx_len); ++ ++ (*aa->update)((void *)&tctx, irs->hash, aa->hashlen); ++ (*aa->final)(irs->hash, (void *)&tctx); ++ ++ return IPSEC_RCV_OK; ++} ++ ++enum ipsec_rcv_value ++ipsec_rcv_ah_decap(struct ipsec_rcv_state *irs) ++{ ++ struct ahhdr *ahp = irs->protostuff.ahstuff.ahp; ++ struct sk_buff *skb; ++ int ahhlen; ++ ++ skb=irs->skb; ++ ++ ahhlen = AH_BASIC_LEN + (ahp->ah_hl << 2); ++ ++ irs->ipp->tot_len = htons(ntohs(irs->ipp->tot_len) - ahhlen); ++ irs->next_header = ahp->ah_nh; ++ ++ /* ++ * move the IP header forward by the size of the AH header, which ++ * will remove the the AH header from the packet. ++ */ ++ memmove((void *)(skb_network_header(skb) + ahhlen), ++ (void *)(skb_network_header(skb)), irs->iphlen); ++ ++ ipsec_rcv_dmp("ah postmove", skb->data, skb->len); ++ ++ /* skb_pull below, will move up by ahhlen */ ++ ++ /* XXX not clear how this can happen, as the message indicates */ ++ if(skb->len < ahhlen) { ++ printk(KERN_WARNING ++ "klips_error:ipsec_rcv: " ++ "tried to skb_pull ahhlen=%d, %d available. This should never happen, please report.\n", ++ ahhlen, ++ (int)(skb->len)); ++ return IPSEC_RCV_DECAPFAIL; ++ } ++ skb_pull(skb, ahhlen); ++ ++ skb_set_network_header(skb, ahhlen); ++ irs->ipp = ip_hdr(skb); ++ ++ ipsec_rcv_dmp("ah postpull", (void *)ip_hdr(skb), skb->len); ++ ++ return IPSEC_RCV_OK; ++} ++ ++enum ipsec_xmit_value ++ipsec_xmit_ah_setup(struct ipsec_xmit_state *ixs) ++{ ++ struct iphdr ipo; ++ struct ahhdr *ahp; ++ __u8 hash[AH_AMAX]; ++ union { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ MD5_CTX md5; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ SHA1_CTX sha1; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ } tctx; ++ unsigned char *dat = (unsigned char *)ixs->iph; ++ ++ ahp = (struct ahhdr *)(dat + ixs->iphlen); ++ ahp->ah_spi = ixs->ipsp->ips_said.spi; ++ ahp->ah_rpl = htonl(++(ixs->ipsp->ips_replaywin_lastseq)); ++ ahp->ah_rv = 0; ++ ahp->ah_nh = ixs->iph->protocol; ++ ahp->ah_hl = (sizeof(struct ahhdr) >> 2) - sizeof(__u64)/sizeof(__u32); ++ ixs->iph->protocol = IPPROTO_AH; ++ ipsec_xmit_dmp("ahp", (char*)ahp, sizeof(*ahp)); ++ ++ ipo = *ixs->iph; ++ ipo.tos = 0; ++ ipo.frag_off = 0; ++ ipo.ttl = 0; ++ ipo.check = 0; ++ ipsec_xmit_dmp("ipo", (char*)&ipo, sizeof(ipo)); ++ ++ switch(ixs->ipsp->ips_authalg) { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: ++ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->ictx; ++ ipsec_xmit_dmp("ictx", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, (unsigned char *)&ipo, sizeof (struct iphdr)); ++ ipsec_xmit_dmp("ictx+ipo", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, (unsigned char *)ahp, ++ sizeof(struct ahhdr) - sizeof(ahp->ah_data)); ++ ipsec_xmit_dmp("ictx+ahp", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, (unsigned char *)zeroes, AHHMAC_HASHLEN); ++ ipsec_xmit_dmp("ictx+zeroes", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, dat + ixs->iphlen + sizeof(struct ahhdr), ++ ixs->skb->len - ixs->iphlen - sizeof(struct ahhdr)); ++ ipsec_xmit_dmp("ictx+dat", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Final(hash, &tctx.md5); ++ ipsec_xmit_dmp("ictx hash", (char*)&hash, sizeof(hash)); ++ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->octx; ++ ipsec_xmit_dmp("octx", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, hash, AHMD596_ALEN); ++ ipsec_xmit_dmp("octx+hash", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Final(hash, &tctx.md5); ++ ipsec_xmit_dmp("octx hash", (char*)&hash, sizeof(hash)); ++ ++ memcpy(ahp->ah_data, hash, AHHMAC_HASHLEN); ++ ++ /* paranoid */ ++ memset((caddr_t)&tctx.md5, 0, sizeof(tctx.md5)); ++ memset((caddr_t)hash, 0, sizeof(*hash)); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: ++ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->ictx; ++ SHA1Update(&tctx.sha1, (unsigned char *)&ipo, sizeof (struct iphdr)); ++ SHA1Update(&tctx.sha1, (unsigned char *)ahp, sizeof(struct ahhdr) - sizeof(ahp->ah_data)); ++ SHA1Update(&tctx.sha1, (unsigned char *)zeroes, AHHMAC_HASHLEN); ++ SHA1Update(&tctx.sha1, dat + ixs->iphlen + sizeof(struct ahhdr), ++ ixs->skb->len - ixs->iphlen - sizeof(struct ahhdr)); ++ SHA1Final(hash, &tctx.sha1); ++ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->octx; ++ SHA1Update(&tctx.sha1, hash, AHSHA196_ALEN); ++ SHA1Final(hash, &tctx.sha1); ++ ++ memcpy(ahp->ah_data, hash, AHHMAC_HASHLEN); ++ ++ /* paranoid */ ++ memset((caddr_t)&tctx.sha1, 0, sizeof(tctx.sha1)); ++ memset((caddr_t)hash, 0, sizeof(*hash)); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_AH_BADALG; ++ } ++#ifdef NET_21 ++ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ahp)); ++#endif /* NET_21 */ ++ ++ return IPSEC_XMIT_OK; ++} ++ ++struct xform_functions ah_xform_funcs[]={ ++ { rcv_checks: ipsec_rcv_ah_checks, ++ rcv_setup_auth: ipsec_rcv_ah_setup_auth, ++ rcv_calc_auth: ipsec_rcv_ah_authcalc, ++ rcv_decrypt: ipsec_rcv_ah_decap, ++ ++ xmit_setup: ipsec_xmit_ah_setup, ++ xmit_headroom: sizeof(struct ahhdr), ++ xmit_needtailroom: 0, ++ }, ++}; ++ ++ ++#ifdef NET_26 ++struct inet_protocol ah_protocol = { ++ .handler = ipsec_rcv, ++ .no_policy = 1, ++}; ++#else ++struct inet_protocol ah_protocol = ++{ ++ ipsec_rcv, /* AH handler */ ++ NULL, /* TUNNEL error control */ ++#ifdef NETDEV_25 ++ 1, /* no policy */ ++#else ++ 0, /* next */ ++ IPPROTO_AH, /* protocol ID */ ++ 0, /* copy */ ++ NULL, /* data */ ++ "AH" /* name */ ++#endif ++}; ++#endif /* NET_26 */ ++ ++/* ++ * $Log: ipsec_ah.c,v $ ++ * Revision 1.12.2.3 2007-09-05 02:56:09 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.12.2.2 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.12.2.1 2006/02/15 05:35:14 paul ++ * Patch by David McCullough ++ * If you setup a tunnel without ESP it doesn't work. It used to work in ++ * an older openswan version but stopped when klips was modified to deal ++ * with the pulled IP header on the received SKB's. ++ * ++ * The code in ipsec_ah.c still thinks the IP header is there and runs the ++ * hash on the incorrect data. ++ * ++ * Revision 1.12 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.11 2005/04/15 19:50:55 mcr ++ * adjustments to use proper skb fields for data. ++ * ++ * Revision 1.10 2004/09/14 00:22:57 mcr ++ * adjustment of MD5* functions. ++ * ++ * Revision 1.9 2004/09/13 02:22:47 mcr ++ * #define inet_protocol if necessary. ++ * ++ * Revision 1.8 2004/09/06 18:35:48 mcr ++ * 2.6.8.1 gets rid of inet_protocol->net_protocol compatibility, ++ * so adjust for that. ++ * ++ * Revision 1.7 2004/08/22 05:00:48 mcr ++ * if we choose to compile the file, we want the contents, ++ * so don't pull any punches. ++ * ++ * Revision 1.6 2004/08/17 03:27:23 mcr ++ * klips 2.6 edits. ++ * ++ * Revision 1.5 2004/08/14 03:28:24 mcr ++ * fixed log comment to remove warning about embedded comment. ++ * ++ * Revision 1.4 2004/08/04 15:57:07 mcr ++ * moved des .h files to include/des/ * ++ * included 2.6 protocol specific things ++ * started at NAT-T support, but it will require a kernel patch. ++ * ++ * Revision 1.3 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.2 2004/04/06 02:49:25 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_alg.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1057 @@ ++/* ++ * Modular extensions service and registration functions ++ * ++ * Author: JuanJo Ciarlante ++ * ++ * Version: 0.8.1 ++ * ++ * ipsec_alg.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ */ ++#define __NO_VERSION__ ++ ++#if defined (MODULE) ++#include ++#endif ++ ++#include /* printk() */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++#include ++#include ++#include /* memcmp() */ ++#include /* get_random_bytes() */ ++#include /* error codes */ ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include "openswan/ipsec_param.h" ++#include ++#include "openswan/ipsec_sa.h" ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" ++#if defined(CONFIG_KLIPS_ESP) || defined(CONFIG_KLIPS_AH) ++# include "openswan/ipsec_ah.h" ++#endif /* defined(CONFIG_KLIPS_ESP) || defined(CONFIG_KLIPS_AH) */ ++#ifdef CONFIG_KLIPS_ESP ++# include "openswan/ipsec_esp.h" ++#endif /* !CONFIG_KLIPS_ESP */ ++#ifdef CONFIG_KLIPS_IPCOMP ++# include "openswan/ipcomp.h" ++#endif /* CONFIG_KLIPS_COMP */ ++ ++#include ++#include ++ ++#include "openswan/ipsec_alg.h" ++#include "openswan/ipsec_proto.h" ++ ++#if SADB_EALG_MAX < 255 ++#warning Compiling with limited ESP support ( SADB_EALG_MAX < 256 ) ++#endif ++ ++static rwlock_t ipsec_alg_lock = RW_LOCK_UNLOCKED; ++#define IPSEC_ALG_HASHSZ 16 /* must be power of 2, even 2^0=1 */ ++static struct list_head ipsec_alg_hash_table[IPSEC_ALG_HASHSZ]; ++ ++/* Old gcc's will fail here */ ++#define barf_out(fmt, args...) do { struct ipsec_alg *ixtc = (struct ipsec_alg *)ixt; printk(KERN_ERR "%s: (%s) " fmt, __FUNCTION__, ixtc->ixt_name , ## args) \ ++ ; goto out; } while(0) ++ ++#ifdef NET_26 ++/* ++ * Must be already protected by lock ++ */ ++static void __ipsec_alg_usage_inc(struct ipsec_alg *ixt) ++{ ++#ifdef MODULE ++ if (ixt->ixt_module) ++ try_module_get(ixt->ixt_module); ++#endif ++ atomic_inc(&ixt->ixt_refcnt); ++} ++static void __ipsec_alg_usage_dec(struct ipsec_alg *ixt) { ++ atomic_dec(&ixt->ixt_refcnt); ++#ifdef MODULE ++ if (ixt->ixt_module) ++ module_put(ixt->ixt_module); ++#endif ++} ++ ++#else ++ ++/* ++ * Must be already protected by lock ++ */ ++static void __ipsec_alg_usage_inc(struct ipsec_alg *ixt) { ++#ifdef MODULE ++ if (ixt->ixt_module) { ++ __MOD_INC_USE_COUNT(ixt->ixt_module); ++ } ++#endif ++ atomic_inc(&ixt->ixt_refcnt); ++} ++static void __ipsec_alg_usage_dec(struct ipsec_alg *ixt) { ++ atomic_dec(&ixt->ixt_refcnt); ++#ifdef MODULE ++ if (ixt->ixt_module) ++ __MOD_DEC_USE_COUNT(ixt->ixt_module); ++#endif ++} ++#endif ++ ++/* ++ * simple hash function, optimized for 0-hash (1 list) special ++ * case ++ */ ++#if IPSEC_ALG_HASHSZ > 1 ++static inline unsigned ipsec_alg_hashfn(int alg_type, int alg_id) { ++ return ((alg_type^alg_id)&(IPSEC_ALG_HASHSZ-1)); ++} ++#else ++#define ipsec_alg_hashfn(x,y) (0) ++#endif ++ ++/***************************************************************** ++ * ++ * INTERNAL table handling: insert, delete, find ++ * ++ *****************************************************************/ ++ ++/* ++ * hash table initialization, called from ipsec_alg_init() ++ */ ++static void ipsec_alg_hash_init(void) { ++ struct list_head *head = ipsec_alg_hash_table; ++ int i = IPSEC_ALG_HASHSZ; ++ do { ++ INIT_LIST_HEAD(head); ++ head++; ++ i--; ++ } while (i); ++} ++/* ++ * hash list lookup by {alg_type, alg_id} and table head, ++ * must be already protected by lock ++ */ ++static struct ipsec_alg *__ipsec_alg_find(unsigned alg_type, unsigned alg_id, struct list_head * head) { ++ struct list_head *p; ++ struct ipsec_alg *ixt=NULL; ++ for (p=head->next; p!=head; p=p->next) { ++ ixt = list_entry(p, struct ipsec_alg, ixt_list); ++ if (ixt->ixt_alg_type == alg_type && ixt->ixt_alg_id==alg_id) { ++ goto out; ++ } ++ } ++ ixt=NULL; ++out: ++ return ixt; ++} ++/* ++ * inserts (in front) a new entry in hash table, ++ * called from ipsec_alg_register() when new algorithm is registered. ++ */ ++static int ipsec_alg_insert(struct ipsec_alg *ixt) { ++ int ret=-EINVAL; ++ unsigned hashval=ipsec_alg_hashfn(ixt->ixt_alg_type, ixt->ixt_alg_id); ++ struct list_head *head= ipsec_alg_hash_table + hashval; ++ struct ipsec_alg *ixt_cur; ++ ++ /* new element must be virgin ... */ ++ if (ixt->ixt_list.next != &ixt->ixt_list || ++ ixt->ixt_list.prev != &ixt->ixt_list) { ++ printk(KERN_ERR "ipsec_alg_insert: ixt object \"%s\" " ++ "list head not initialized\n", ++ ixt->ixt_name); ++ return ret; ++ } ++ write_lock_bh(&ipsec_alg_lock); ++ ++ ixt_cur = __ipsec_alg_find(ixt->ixt_alg_type, ixt->ixt_alg_id, head); ++ ++ /* if previous (current) ipsec_alg found check excl flag of _anyone_ */ ++ if (ixt_cur ++ && ((ixt->ixt_state|ixt_cur->ixt_state) & IPSEC_ALG_ST_EXCL)) { ++ barf_out("ipsec_alg for alg_type=%d, alg_id=%d already exist. " ++ "Not loaded (ret=%d).\n", ++ ixt->ixt_alg_type, ++ ixt->ixt_alg_id, ret=-EEXIST); ++ } ++ list_add(&ixt->ixt_list, head); ++ ixt->ixt_state |= IPSEC_ALG_ST_REGISTERED; ++ ret=0; ++out: ++ write_unlock_bh(&ipsec_alg_lock); ++ return ret; ++} ++ ++/* ++ * deletes an existing entry in hash table, ++ * called from ipsec_alg_unregister() when algorithm is unregistered. ++ */ ++static int ipsec_alg_delete(struct ipsec_alg *ixt) { ++ write_lock_bh(&ipsec_alg_lock); ++ list_del(&ixt->ixt_list); ++ write_unlock_bh(&ipsec_alg_lock); ++ return 0; ++} ++ ++/* ++ * here @user context (read-only when @kernel bh context) ++ * -> no bh disabling ++ * ++ * called from ipsec_sa_init() -> ipsec_alg_sa_init() ++ */ ++static struct ipsec_alg *ipsec_alg_get(int alg_type, int alg_id) ++{ ++ unsigned hashval=ipsec_alg_hashfn(alg_type, alg_id); ++ struct list_head *head= ipsec_alg_hash_table + hashval; ++ struct ipsec_alg *ixt; ++ ++ read_lock(&ipsec_alg_lock); ++ ixt=__ipsec_alg_find(alg_type, alg_id, head); ++ if (ixt) __ipsec_alg_usage_inc(ixt); ++ read_unlock(&ipsec_alg_lock); ++ ++ return ixt; ++} ++ ++static void ipsec_alg_put(struct ipsec_alg *ixt) { ++ __ipsec_alg_usage_dec((struct ipsec_alg *)ixt); ++} ++ ++/***************************************************************** ++ * ++ * INTERFACE for ENC services: key creation, encrypt function ++ * ++ *****************************************************************/ ++ ++/* ++ * main encrypt service entry point ++ * called from ipsec_rcv() with encrypt=IPSEC_ALG_DECRYPT and ++ * ipsec_tunnel_start_xmit with encrypt=IPSEC_ALG_ENCRYPT ++ */ ++int ipsec_alg_esp_encrypt(struct ipsec_sa *sa_p, __u8 * idat, ++ int ilen, const __u8 * iv, int encrypt) ++{ ++ int ret; ++ struct ipsec_alg_enc *ixt_e=sa_p->ips_alg_enc; ++#ifdef CONFIG_KLIPS_DEBUG ++ int debug_flag = (encrypt==IPSEC_ALG_ENCRYPT ? ++ debug_tunnel : debug_rcv); ++#endif ++ ++ KLIPS_PRINT(debug_flag, ++ "klips_debug:ipsec_alg_esp_encrypt: " ++ "entering with encalg=%d, ixt_e=%p\n", ++ sa_p->ips_encalg, ixt_e); ++ if (ixt_e == NULL) { ++#ifdef CONFIG_KLIPS_DEBUG ++ KLIPS_ERROR(debug_flag, ++ "klips_debug:ipsec_alg_esp_encrypt: " ++ "NULL ipsec_alg_enc object\n"); ++#endif ++ return -1; ++ } ++ KLIPS_PRINT(debug_flag, ++ "klips_debug:ipsec_alg_esp_encrypt: " ++ "calling cbc_encrypt encalg=%d " ++ "ips_key_e=%p idat=%p ilen=%d iv=%p, encrypt=%d\n", ++ sa_p->ips_encalg, ++ sa_p->ips_key_e, idat, ilen, iv, encrypt); ++ ret=ixt_e->ixt_e_cbc_encrypt(ixt_e, sa_p->ips_key_e, idat, ++ ilen, iv, encrypt); ++ KLIPS_PRINT(debug_flag, ++ "klips_debug:ipsec_alg_esp_encrypt: " ++ "returned ret=%d\n", ++ ret); ++ return ret; ++} ++ ++/* ++ * encryption key context creation function ++ * called from pfkey_v2_parser.c:pfkey_ips_init() ++ */ ++int ipsec_alg_enc_key_create(struct ipsec_sa *sa_p) { ++ int ret=-EINVAL; ++ int keyminbits, keymaxbits; ++ caddr_t ekp; ++ struct ipsec_alg_enc *ixt_e=sa_p->ips_alg_enc; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_alg_enc_key_create: " ++ "entering with encalg=%d ixt_e=%p\n", ++ sa_p->ips_encalg, ixt_e); ++ if (!ixt_e) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_alg_enc_key_create: " ++ "NULL ipsec_alg_enc object\n"); ++ return -EPROTO; ++ } ++ ++ /* ++ * grRRR... DES 7bits jurassic stuff ... f*ckk --jjo ++ */ ++ switch(ixt_e->ixt_common.ixt_support.ias_id) { ++ case ESP_3DES: ++ keyminbits=keymaxbits=192;break; ++ case ESP_DES: ++ keyminbits=keymaxbits=64;break; ++ default: ++ keyminbits=ixt_e->ixt_common.ixt_support.ias_keyminbits; ++ keymaxbits=ixt_e->ixt_common.ixt_support.ias_keymaxbits; ++ } ++ if(sa_p->ips_key_bits_eips_key_bits_e>keymaxbits) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_alg_enc_key_create: " ++ "incorrect encryption key size for id=%d: %d bits -- " ++ "must be between %d,%d bits\n" /*octets (bytes)\n"*/, ++ ixt_e->ixt_common.ixt_support.ias_id, ++ sa_p->ips_key_bits_e, keyminbits, keymaxbits); ++ ret=-EINVAL; ++ goto ixt_out; ++ } ++ /* save encryption key pointer */ ++ ekp = sa_p->ips_key_e; ++ ++ ++ if (ixt_e->ixt_e_new_key) { ++ sa_p->ips_key_e = ixt_e->ixt_e_new_key(ixt_e, ++ ekp, sa_p->ips_key_bits_e/8); ++ ret = (sa_p->ips_key_e)? 0 : -EINVAL; ++ } else { ++ if((sa_p->ips_key_e = (caddr_t) ++ kmalloc((sa_p->ips_key_e_size = ixt_e->ixt_e_ctx_size), ++ GFP_ATOMIC)) == NULL) { ++ ret=-ENOMEM; ++ goto ixt_out; ++ } ++ /* zero-out key_e */ ++ memset(sa_p->ips_key_e, 0, sa_p->ips_key_e_size); ++ ++ /* I cast here to allow more decoupling in alg module */ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_alg_enc_key_create: about to call:" ++ "set_key(key_e=%p, ekp=%p, key_size=%d)\n", ++ (caddr_t)sa_p->ips_key_e, ekp, sa_p->ips_key_bits_e/8); ++ ret = ixt_e->ixt_e_set_key(ixt_e, (caddr_t)sa_p->ips_key_e, ekp, sa_p->ips_key_bits_e/8); ++ } ++ /* paranoid */ ++ memset(ekp, 0, sa_p->ips_key_bits_e/8); ++ kfree(ekp); ++ixt_out: ++ return ret; ++} ++ ++/*************************************************************** ++ * ++ * INTERFACE for AUTH services: key creation, hash functions ++ * ++ ***************************************************************/ ++ ++/* ++ * auth key context creation function ++ * called from pfkey_v2_parser.c:pfkey_ips_init() ++ */ ++int ipsec_alg_auth_key_create(struct ipsec_sa *sa_p) { ++ int ret=-EINVAL; ++ struct ipsec_alg_auth *ixt_a=sa_p->ips_alg_auth; ++ int keyminbits, keymaxbits; ++ unsigned char *akp; ++ unsigned int aks; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_alg_auth_key_create: " ++ "entering with authalg=%d ixt_a=%p\n", ++ sa_p->ips_authalg, ixt_a); ++ if (!ixt_a) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_alg_auth_key_create: " ++ "NULL ipsec_alg_auth object\n"); ++ return -EPROTO; ++ } ++ keyminbits=ixt_a->ixt_common.ixt_support.ias_keyminbits; ++ keymaxbits=ixt_a->ixt_common.ixt_support.ias_keymaxbits; ++ if(sa_p->ips_key_bits_aips_key_bits_a>keymaxbits) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_alg_auth_key_create: incorrect auth" ++ "key size: %d bits -- must be between %d,%d bits\n"/*octets (bytes)\n"*/, ++ sa_p->ips_key_bits_a, keyminbits, keymaxbits); ++ ret=-EINVAL; ++ goto ixt_out; ++ } ++ /* save auth key pointer */ ++ sa_p->ips_auth_bits = ixt_a->ixt_a_keylen * 8; /* XXX XXX */ ++ akp = sa_p->ips_key_a; ++ aks = sa_p->ips_key_a_size; ++ ++ /* will hold: 2 ctx and a blocksize buffer: kb */ ++ sa_p->ips_key_a_size = ixt_a->ixt_a_ctx_size; ++ if((sa_p->ips_key_a = ++ (caddr_t) kmalloc(sa_p->ips_key_a_size, GFP_ATOMIC)) == NULL) { ++ ret=-ENOMEM; ++ goto ixt_out; ++ } ++ ixt_a->ixt_a_hmac_set_key(ixt_a, sa_p->ips_key_a, akp, sa_p->ips_key_bits_a/8); /* XXX XXX */ ++ ret=0; ++ memset(akp, 0, aks); ++ kfree(akp); ++ ++ixt_out: ++ return ret; ++} ++ ++ ++int ipsec_alg_sa_esp_hash(const struct ipsec_sa *sa_p, const __u8 *espp, ++ int len, __u8 *hash, int hashlen) ++{ ++ struct ipsec_alg_auth *ixt_a=sa_p->ips_alg_auth; ++ if (!ixt_a) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_sa_esp_hash: " ++ "NULL ipsec_alg_auth object\n"); ++ return -EPROTO; ++ } ++ KLIPS_PRINT(debug_tunnel|debug_rcv, ++ "klips_debug:ipsec_sa_esp_hash: " ++ "hashing %p (%d bytes) to %p (%d bytes)\n", ++ espp, len, ++ hash, hashlen); ++ ixt_a->ixt_a_hmac_hash(ixt_a, ++ sa_p->ips_key_a, ++ espp, len, ++ hash, hashlen); ++ return 0; ++} ++ ++/*************************************************************** ++ * ++ * INTERFACE for module loading,testing, and unloading ++ * ++ ***************************************************************/ ++ ++/* validation for registering (enc) module */ ++static int check_enc(struct ipsec_alg_enc *ixt) ++{ ++ int ret=-EINVAL; ++ if (ixt->ixt_common.ixt_blocksize==0) /* || ixt->ixt_common.ixt_blocksize%2) need for ESP_NULL */ ++ barf_out(KERN_ERR "invalid blocksize=%d\n", ixt->ixt_common.ixt_blocksize); ++ if (ixt->ixt_common.ixt_support.ias_keyminbits==0 ++ && ixt->ixt_common.ixt_support.ias_keymaxbits==0 ++ && ixt->ixt_e_keylen==0) ++ goto zero_key_ok; ++ ++ if (ixt->ixt_common.ixt_support.ias_keyminbits==0) ++ barf_out(KERN_ERR "invalid keyminbits=%d\n", ixt->ixt_common.ixt_support.ias_keyminbits); ++ ++ if (ixt->ixt_common.ixt_support.ias_keymaxbits==0) ++ barf_out(KERN_ERR "invalid keymaxbits=%d\n", ixt->ixt_common.ixt_support.ias_keymaxbits); ++ ++ if (ixt->ixt_e_keylen==0) ++ barf_out(KERN_ERR "invalid keysize=%d\n", ixt->ixt_e_keylen); ++ ++zero_key_ok: ++ if (ixt->ixt_e_ctx_size==0 && ixt->ixt_e_new_key == NULL) ++ barf_out(KERN_ERR "invalid key_e_size=%d and ixt_e_new_key=NULL\n", ixt->ixt_e_ctx_size); ++ if (ixt->ixt_e_cbc_encrypt==NULL) ++ barf_out(KERN_ERR "e_cbc_encrypt() must be not NULL\n"); ++ ret=0; ++out: ++ return ret; ++} ++ ++/* validation for registering (auth) module */ ++static int check_auth(struct ipsec_alg_auth *ixt) ++{ ++ int ret=-EINVAL; ++ if (ixt->ixt_common.ixt_support.ias_id==0 || ixt->ixt_common.ixt_support.ias_id > SADB_AALG_MAX) ++ barf_out("invalid alg_id=%d > %d (SADB_AALG_MAX)\n", ++ ixt->ixt_common.ixt_support.ias_id, SADB_AALG_MAX); ++ ++ if (ixt->ixt_common.ixt_blocksize==0 ++ || ixt->ixt_common.ixt_blocksize%2) ++ barf_out(KERN_ERR "invalid blocksize=%d\n", ++ ixt->ixt_common.ixt_blocksize); ++ ++ if (ixt->ixt_common.ixt_blocksize>AH_BLKLEN_MAX) ++ barf_out(KERN_ERR "sorry blocksize=%d > %d. " ++ "Please increase AH_BLKLEN_MAX and recompile\n", ++ ixt->ixt_common.ixt_blocksize, ++ AH_BLKLEN_MAX); ++ if (ixt->ixt_common.ixt_support.ias_keyminbits==0 && ixt->ixt_common.ixt_support.ias_keymaxbits==0 && ixt->ixt_a_keylen==0) ++ goto zero_key_ok; ++ if (ixt->ixt_common.ixt_support.ias_keyminbits==0) ++ barf_out(KERN_ERR "invalid keyminbits=%d\n", ixt->ixt_common.ixt_support.ias_keyminbits); ++ if (ixt->ixt_common.ixt_support.ias_keymaxbits==0) ++ barf_out(KERN_ERR "invalid keymaxbits=%d\n", ixt->ixt_common.ixt_support.ias_keymaxbits); ++ if (ixt->ixt_common.ixt_support.ias_keymaxbits!=ixt->ixt_common.ixt_support.ias_keyminbits) ++ barf_out(KERN_ERR "keymaxbits must equal keyminbits (not sure).\n"); ++ if (ixt->ixt_a_keylen==0) ++ barf_out(KERN_ERR "invalid keysize=%d\n", ixt->ixt_a_keylen); ++zero_key_ok: ++ if (ixt->ixt_a_ctx_size==0) ++ barf_out(KERN_ERR "invalid a_ctx_size=%d\n", ixt->ixt_a_ctx_size); ++ if (ixt->ixt_a_hmac_set_key==NULL) ++ barf_out(KERN_ERR "a_hmac_set_key() must be not NULL\n"); ++ if (ixt->ixt_a_hmac_hash==NULL) ++ barf_out(KERN_ERR "a_hmac_hash() must be not NULL\n"); ++ ret=0; ++out: ++ return ret; ++} ++ ++/* ++ * Generic (enc, auth) registration entry point ++ */ ++int register_ipsec_alg(struct ipsec_alg *ixt) ++{ ++ int ret=-EINVAL; ++ /* Validation */ ++ if (ixt==NULL) ++ barf_out("NULL ipsec_alg object passed\n"); ++ if ((ixt->ixt_version&0xffffff00) != (IPSEC_ALG_VERSION&0xffffff00)) ++ barf_out("incorrect version: %d.%d.%d-%d, " ++ "must be %d.%d.%d[-%d]\n", ++ IPSEC_ALG_VERSION_QUAD(ixt->ixt_version), ++ IPSEC_ALG_VERSION_QUAD(IPSEC_ALG_VERSION)); ++ ++ switch(ixt->ixt_alg_type) { ++ case IPSEC_ALG_TYPE_AUTH: ++ if ((ret=check_auth((struct ipsec_alg_auth *)ixt)<0)) ++ goto out; ++ break; ++ case IPSEC_ALG_TYPE_ENCRYPT: ++ if ((ret=check_enc((struct ipsec_alg_enc *)ixt)<0)) ++ goto out; ++ /* ++ * Adapted two lines below: ++ * ivlen == 0 is possible (NULL enc has blocksize==1) ++ * ++ * fixed NULL support by David De Reu ++ */ ++ if (ixt->ixt_support.ias_ivlen == 0 ++ && ixt->ixt_blocksize > 1) { ++ ixt->ixt_support.ias_ivlen = ixt->ixt_blocksize*8; ++ } ++ break; ++ default: ++ barf_out("alg_type=%d not supported\n", ixt->ixt_alg_type); ++ } ++ INIT_LIST_HEAD(&ixt->ixt_list); ++ ret = ipsec_alg_insert(ixt); ++ if (ret<0) ++ barf_out(KERN_WARNING "ipsec_alg for alg_id=%d failed." ++ "Not loaded (ret=%d).\n", ++ ixt->ixt_support.ias_id, ret); ++ ++ ++ ret = pfkey_list_insert_supported((struct ipsec_alg_supported *)&ixt->ixt_support ++ , &(pfkey_supported_list[SADB_SATYPE_ESP])); ++ ++ if (ret==0) { ++ ixt->ixt_state |= IPSEC_ALG_ST_SUPP; ++ /* send register event to userspace */ ++ pfkey_register_reply(SADB_SATYPE_ESP, NULL); ++ } else ++ printk(KERN_ERR "pfkey_list_insert_supported returned %d. " ++ "Loading anyway.\n", ret); ++ ret=0; ++out: ++ return ret; ++} ++ ++/* ++ * unregister ipsec_alg object from own tables, if ++ * success => calls pfkey_list_remove_supported() ++ */ ++int unregister_ipsec_alg(struct ipsec_alg *ixt) { ++ int ret= -EINVAL; ++ switch(ixt->ixt_alg_type) { ++ case IPSEC_ALG_TYPE_AUTH: ++ case IPSEC_ALG_TYPE_ENCRYPT: ++ break; ++ default: ++ /* this is not a typo :) */ ++ barf_out("frog found in list (\"%s\"): ixt_p=NULL\n", ++ ixt->ixt_name); ++ } ++ ++ ret=ipsec_alg_delete(ixt); ++ if (ixt->ixt_state&IPSEC_ALG_ST_SUPP) { ++ ixt->ixt_state &= ~IPSEC_ALG_ST_SUPP; ++ pfkey_list_remove_supported((struct ipsec_alg_supported *)&ixt->ixt_support ++ , &(pfkey_supported_list[SADB_SATYPE_ESP])); ++ ++ /* send register event to userspace */ ++ pfkey_register_reply(SADB_SATYPE_ESP, NULL); ++ } ++ ++out: ++ return ret; ++} ++ ++/* ++ * Must be called from user context ++ * used at module load type for testing algo implementation ++ */ ++static int ipsec_alg_test_encrypt(int enc_alg, int test) { ++ int ret; ++ caddr_t buf = NULL; ++ int iv_size, keysize, key_e_size; ++ struct ipsec_alg_enc *ixt_e; ++ void *tmp_key_e = NULL; ++ #define BUFSZ 1024 ++ #define MARGIN 0 ++ #define test_enc (buf+MARGIN) ++ #define test_dec (test_enc+BUFSZ+MARGIN) ++ #define test_tmp (test_dec+BUFSZ+MARGIN) ++ #define test_key_e (test_tmp+BUFSZ+MARGIN) ++ #define test_iv (test_key_e+key_e_size+MARGIN) ++ #define test_key (test_iv+iv_size+MARGIN) ++ #define test_size (BUFSZ*3+key_e_size+iv_size+keysize+MARGIN*7) ++ ixt_e=(struct ipsec_alg_enc *)ipsec_alg_get(IPSEC_ALG_TYPE_ENCRYPT, enc_alg); ++ if (ixt_e==NULL) { ++ KLIPS_PRINT(1, ++ "klips_debug: ipsec_alg_test_encrypt: " ++ "encalg=%d object not found\n", ++ enc_alg); ++ ret=-EINVAL; ++ goto out; ++ } ++ iv_size=ixt_e->ixt_common.ixt_support.ias_ivlen / 8; ++ key_e_size=ixt_e->ixt_e_ctx_size; ++ keysize=ixt_e->ixt_e_keylen; ++ KLIPS_PRINT(1, ++ "klips_debug: ipsec_alg_test_encrypt: " ++ "enc_alg=%d blocksize=%d key_e_size=%d keysize=%d\n", ++ enc_alg, iv_size, key_e_size, keysize); ++ if ((buf=kmalloc (test_size, GFP_KERNEL)) == NULL) { ++ ret= -ENOMEM; ++ goto out; ++ } ++ get_random_bytes(test_key, keysize); ++ get_random_bytes(test_iv, iv_size); ++ if (ixt_e->ixt_e_new_key) { ++ tmp_key_e = ixt_e->ixt_e_new_key(ixt_e, test_key, keysize); ++ ret = tmp_key_e ? 0 : -EINVAL; ++ } else { ++ tmp_key_e = test_key_e; ++ ret = ixt_e->ixt_e_set_key(ixt_e, test_key_e, test_key, keysize); ++ } ++ if (ret < 0) ++ goto out; ++ get_random_bytes(test_enc, BUFSZ); ++ memcpy(test_tmp, test_enc, BUFSZ); ++ ret=ixt_e->ixt_e_cbc_encrypt(ixt_e, tmp_key_e, test_enc, BUFSZ, test_iv, 1); ++ printk(KERN_INFO ++ "klips_info: ipsec_alg_test_encrypt: " ++ "cbc_encrypt=1 ret=%d\n", ++ ret); ++ ret=memcmp(test_enc, test_tmp, BUFSZ); ++ printk(KERN_INFO ++ "klips_info: ipsec_alg_test_encrypt: " ++ "memcmp(enc, tmp) ret=%d: %s\n", ret, ++ ret!=0? "OK. (encr->DIFFers)" : "FAIL! (encr->SAME)" ); ++ memcpy(test_dec, test_enc, BUFSZ); ++ ret=ixt_e->ixt_e_cbc_encrypt(ixt_e, tmp_key_e, test_dec, BUFSZ, test_iv, 0); ++ printk(KERN_INFO ++ "klips_info: ipsec_alg_test_encrypt: " ++ "cbc_encrypt=0 ret=%d\n", ret); ++ ret=memcmp(test_dec, test_tmp, BUFSZ); ++ printk(KERN_INFO ++ "klips_info: ipsec_alg_test_encrypt: " ++ "memcmp(dec,tmp) ret=%d: %s\n", ret, ++ ret==0? "OK. (encr->decr->SAME)" : "FAIL! (encr->decr->DIFFers)" ); ++ { ++ /* Shamelessly taken from drivers/md sources O:) */ ++ unsigned long now; ++ int i, count, max=0; ++ int encrypt, speed; ++ for (encrypt=0; encrypt <2;encrypt ++) { ++ for (i = 0; i < 5; i++) { ++ now = jiffies; ++ count = 0; ++ while (jiffies == now) { ++ mb(); ++ ixt_e->ixt_e_cbc_encrypt(ixt_e, ++ tmp_key_e, test_tmp, ++ BUFSZ, test_iv, encrypt); ++ mb(); ++ count++; ++ mb(); ++ } ++ if (count > max) ++ max = count; ++ } ++ speed = max * (HZ * BUFSZ / 1024); ++ printk(KERN_INFO ++ "klips_info: ipsec_alg_test_encrypt: " ++ "%s %s speed=%d KB/s\n", ++ ixt_e->ixt_common.ixt_name, ++ encrypt? "encrypt": "decrypt", speed); ++ } ++ } ++out: ++ if (tmp_key_e && ixt_e->ixt_e_destroy_key) ixt_e->ixt_e_destroy_key(ixt_e, tmp_key_e); ++ if (buf) kfree(buf); ++ if (ixt_e) ipsec_alg_put((struct ipsec_alg *)ixt_e); ++ return ret; ++ #undef test_enc ++ #undef test_dec ++ #undef test_tmp ++ #undef test_key_e ++ #undef test_iv ++ #undef test_key ++ #undef test_size ++} ++ ++/* ++ * Must be called from user context ++ * used at module load type for testing algo implementation ++ */ ++static int ipsec_alg_test_auth(int auth_alg, int test) { ++ int ret; ++ caddr_t buf = NULL; ++ int blocksize, keysize, key_a_size; ++ struct ipsec_alg_auth *ixt_a; ++ #define BUFSZ 1024 ++ #define MARGIN 0 ++ #define test_auth (buf+MARGIN) ++ #define test_key_a (test_auth+BUFSZ+MARGIN) ++ #define test_key (test_key_a+key_a_size+MARGIN) ++ #define test_hash (test_key+keysize+MARGIN) ++ #define test_size (BUFSZ+key_a_size+keysize+AHHMAC_HASHLEN+MARGIN*4) ++ ixt_a=(struct ipsec_alg_auth *)ipsec_alg_get(IPSEC_ALG_TYPE_AUTH, auth_alg); ++ if (ixt_a==NULL) { ++ KLIPS_PRINT(1, ++ "klips_debug: ipsec_alg_test_auth: " ++ "encalg=%d object not found\n", ++ auth_alg); ++ ret=-EINVAL; ++ goto out; ++ } ++ blocksize=ixt_a->ixt_common.ixt_blocksize; ++ key_a_size=ixt_a->ixt_a_ctx_size; ++ keysize=ixt_a->ixt_a_keylen; ++ KLIPS_PRINT(1, ++ "klips_debug: ipsec_alg_test_auth: " ++ "auth_alg=%d blocksize=%d key_a_size=%d keysize=%d\n", ++ auth_alg, blocksize, key_a_size, keysize); ++ if ((buf=kmalloc (test_size, GFP_KERNEL)) == NULL) { ++ ret= -ENOMEM; ++ goto out; ++ } ++ get_random_bytes(test_key, keysize); ++ ret = ixt_a->ixt_a_hmac_set_key(ixt_a, test_key_a, test_key, keysize); ++ if (ret < 0 ) ++ goto out; ++ get_random_bytes(test_auth, BUFSZ); ++ ret=ixt_a->ixt_a_hmac_hash(ixt_a, test_key_a, test_auth, BUFSZ, test_hash, AHHMAC_HASHLEN); ++ printk(KERN_INFO ++ "klips_info: ipsec_alg_test_auth: " ++ "ret=%d\n", ret); ++ { ++ /* Shamelessly taken from drivers/md sources O:) */ ++ unsigned long now; ++ int i, count, max=0; ++ int speed; ++ for (i = 0; i < 5; i++) { ++ now = jiffies; ++ count = 0; ++ while (jiffies == now) { ++ mb(); ++ ixt_a->ixt_a_hmac_hash(ixt_a, test_key_a, test_auth, BUFSZ, test_hash, AHHMAC_HASHLEN); ++ mb(); ++ count++; ++ mb(); ++ } ++ if (count > max) ++ max = count; ++ } ++ speed = max * (HZ * BUFSZ / 1024); ++ printk(KERN_INFO ++ "klips_info: ipsec_alg_test_auth: " ++ "%s hash speed=%d KB/s\n", ++ ixt_a->ixt_common.ixt_name, ++ speed); ++ } ++out: ++ if (buf) kfree(buf); ++ if (ixt_a) ipsec_alg_put((struct ipsec_alg *)ixt_a); ++ return ret; ++ #undef test_auth ++ #undef test_key_a ++ #undef test_key ++ #undef test_hash ++ #undef test_size ++} ++ ++int ipsec_alg_test(unsigned alg_type, unsigned alg_id, int test) { ++ switch(alg_type) { ++ case IPSEC_ALG_TYPE_ENCRYPT: ++ return ipsec_alg_test_encrypt(alg_id, test); ++ break; ++ case IPSEC_ALG_TYPE_AUTH: ++ return ipsec_alg_test_auth(alg_id, test); ++ break; ++ } ++ printk(KERN_ERR "klips_info: ipsec_alg_test() called incorrectly: " ++ "alg_type=%d alg_id=%d\n", ++ alg_type, alg_id); ++ return -EINVAL; ++} ++ ++int ipsec_alg_init(void) { ++ KLIPS_PRINT(1, "klips_info:ipsec_alg_init: " ++ "KLIPS alg v=%d.%d.%d-%d (EALG_MAX=%d, AALG_MAX=%d)\n", ++ IPSEC_ALG_VERSION_QUAD(IPSEC_ALG_VERSION), ++ SADB_EALG_MAX, SADB_AALG_MAX); ++ /* Initialize tables */ ++ write_lock_bh(&ipsec_alg_lock); ++ ipsec_alg_hash_init(); ++ write_unlock_bh(&ipsec_alg_lock); ++ ++ /* Initialize static algos */ ++ KLIPS_PRINT(1, "klips_info:ipsec_alg_init: " ++ "calling ipsec_alg_static_init()\n"); ++ ++ /* If we are suppose to use our AES, and don't have ++ * CryptoAPI enabled... ++ */ ++#if defined(CONFIG_KLIPS_ENC_AES) && CONFIG_KLIPS_ENC_AES && !defined(CONFIG_KLIPS_ENC_AES_MODULE) ++#if defined(CONFIG_KLIPS_ENC_CRYPTOAPI) && CONFIG_KLIPS_ENC_CRYPTOAPI ++#warning "Using built-in AES rather than CryptoAPI AES" ++#endif ++ { ++ extern int ipsec_aes_init(void); ++ ipsec_aes_init(); ++ } ++#endif ++ ++#if defined(CONFIG_KLIPS_ENC_3DES) && CONFIG_KLIPS_ENC_3DES && !defined(CONFIG_KLIPS_ENC_3DES_MODULE) ++#if defined(CONFIG_KLIPS_ENC_CRYPTOAPI) && CONFIG_KLIPS_ENC_CRYPTOAPI ++#warning "Using built-in 3des rather than CryptoAPI 3des" ++#endif ++ { ++ extern int ipsec_3des_init(void); ++ ipsec_3des_init(); ++ } ++#endif ++#if defined(CONFIG_KLIPS_ENC_NULL) && CONFIG_KLIPS_ENC_NULL && !defined(CONFIG_KLIPS_ENC_NULL_MODULE) ++#if defined(CONFIG_KLIPS_ENC_CRYPTOAPI) && CONFIG_KLIPS_ENC_CRYPTOAPI ++#warning "Using built-in null cipher rather than CryptoAPI null cipher" ++#endif ++#warning "Building with null cipher (ESP_NULL), blame on you :-)" ++ { ++ extern int ipsec_null_init(void); ++ ipsec_null_init(); ++ } ++#endif ++ ++ ++ /* If we are doing CryptoAPI, then init */ ++#if defined(CONFIG_KLIPS_ENC_CRYPTOAPI) && CONFIG_KLIPS_ENC_CRYPTOAPI && !defined(CONFIG_KLIPS_ENC_CRYPTOAPI_MODULE) ++ { ++ extern int ipsec_cryptoapi_init(void); ++ ipsec_cryptoapi_init(); ++ } ++#endif ++ ++ ++ return 0; ++} ++ ++/********************************************** ++ * ++ * INTERFACE for ipsec_sa init and wipe ++ * ++ **********************************************/ ++ ++/* ++ * Called from pluto -> pfkey_v2_parser.c:pfkey_ipsec_sa_init() ++ */ ++int ipsec_alg_sa_init(struct ipsec_sa *sa_p) { ++ struct ipsec_alg_enc *ixt_e; ++ struct ipsec_alg_auth *ixt_a; ++ ++ /* Only ESP for now ... */ ++ if (sa_p->ips_said.proto != IPPROTO_ESP) ++ return -EPROTONOSUPPORT; ++ ++ KLIPS_PRINT(debug_pfkey, "klips_debug: ipsec_alg_sa_init() :" ++ "entering for encalg=%d, authalg=%d\n", ++ sa_p->ips_encalg, sa_p->ips_authalg); ++ ++ if ((ixt_e=(struct ipsec_alg_enc *) ++ ipsec_alg_get(IPSEC_ALG_TYPE_ENCRYPT, sa_p->ips_encalg))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug: ipsec_alg_sa_init() :" ++ "found ipsec_alg (ixt_e=%p) for encalg=%d\n", ++ ixt_e, sa_p->ips_encalg); ++ sa_p->ips_alg_enc=ixt_e; ++ } ++ ++ if ((ixt_a=(struct ipsec_alg_auth *) ++ ipsec_alg_get(IPSEC_ALG_TYPE_AUTH, sa_p->ips_authalg))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug: ipsec_alg_sa_init() :" ++ "found ipsec_alg (ixt_a=%p) for auth=%d\n", ++ ixt_a, sa_p->ips_authalg); ++ sa_p->ips_alg_auth=ixt_a; ++ } ++ return 0; ++} ++ ++/* ++ * Called from pluto -> ipsec_sa.c:ipsec_sa_delchain() ++ */ ++int ipsec_alg_sa_wipe(struct ipsec_sa *sa_p) { ++ struct ipsec_alg *ixt; ++ if ((ixt=(struct ipsec_alg *)sa_p->ips_alg_enc)) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug: ipsec_alg_sa_wipe() :" ++ "unlinking for encalg=%d\n", ++ ixt->ixt_support.ias_id); ++ ipsec_alg_put(ixt); ++ } ++ if ((ixt=(struct ipsec_alg *)sa_p->ips_alg_auth)) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug: ipsec_alg_sa_wipe() :" ++ "unlinking for authalg=%d\n", ++ ixt->ixt_support.ias_id); ++ ipsec_alg_put(ixt); ++ } ++ return 0; ++} ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_xform_get_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length IPSEC_PROC_LAST_ARG) ++{ ++ int len = 0; ++ off_t begin = 0; ++ int i; ++ struct list_head *head; ++ struct ipsec_alg *ixt; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_tncfg_get_info: " ++ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n", ++ buffer, ++ *start, ++ (int)offset, ++ length); ++ ++ for(i = 0, head = ipsec_alg_hash_table; ++ inext; p!=head; p=p->next) ++ { ++ ixt = list_entry(p, struct ipsec_alg, ixt_list); ++ len += ipsec_snprintf(buffer+len, length-len, ++ "VERSION=%d TYPE=%d ID=%d NAME=%s REFCNT=%d ", ++ ixt->ixt_version, ixt->ixt_alg_type, ixt->ixt_support.ias_id, ++ ixt->ixt_name, ixt->ixt_refcnt); ++ ++ len += ipsec_snprintf(buffer+len, length-len, ++ "STATE=%08x BLOCKSIZE=%d IVLEN=%d KEYMINBITS=%d KEYMAXBITS=%d ", ++ ixt->ixt_state, ixt->ixt_blocksize, ++ ixt->ixt_support.ias_ivlen, ixt->ixt_support.ias_keyminbits, ixt->ixt_support.ias_keymaxbits); ++ ++ len += ipsec_snprintf(buffer+len, length-len, ++ "IVLEN=%d KEYMINBITS=%d KEYMAXBITS=%d ", ++ ixt->ixt_support.ias_ivlen, ixt->ixt_support.ias_keyminbits, ixt->ixt_support.ias_keymaxbits); ++ ++ switch(ixt->ixt_alg_type) ++ { ++ case IPSEC_ALG_TYPE_AUTH: ++ { ++ struct ipsec_alg_auth *auth = (struct ipsec_alg_auth *)ixt; ++ ++ len += ipsec_snprintf(buffer+len, length-len, ++ "KEYLEN=%d CTXSIZE=%d AUTHLEN=%d ", ++ auth->ixt_a_keylen, auth->ixt_a_ctx_size, ++ auth->ixt_a_authlen); ++ break; ++ } ++ case IPSEC_ALG_TYPE_ENCRYPT: ++ { ++ struct ipsec_alg_enc *enc = (struct ipsec_alg_enc *)ixt; ++ len += ipsec_snprintf(buffer+len, length-len, ++ "KEYLEN=%d CTXSIZE=%d ", ++ enc->ixt_e_keylen, enc->ixt_e_ctx_size); ++ ++ break; ++ } ++ } ++ ++ len += ipsec_snprintf(buffer+len, length-len, "\n"); ++ } ++ } ++ ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ len -= (offset - begin); /* Start slop */ ++ if (len > length) ++ len = length; ++ return len; ++} ++ ++ ++/* ++ * As the author of this module, I ONLY ALLOW using it from ++ * GPL (or same LICENSE TERMS as kernel source) modules. ++ * ++ * In respect to hardware crypto engines this means: ++ * * Closed-source device drivers ARE NOT ALLOWED to use ++ * this interface. ++ * * Closed-source VHDL/Verilog firmware running on ++ * the crypto hardware device IS ALLOWED to use this interface ++ * via a GPL (or same LICENSE TERMS as kernel source) device driver. ++ * --Juan Jose Ciarlante 20/03/2002 (thanks RGB for the correct wording) ++ */ ++ ++/* ++ * These symbols can only be used from GPL modules ++ * for now, I'm disabling this because it creates false ++ * symbol problems for old modutils. ++ */ ++ ++#ifdef CONFIG_MODULES ++#ifndef NET_26 ++#if 0 ++#ifndef EXPORT_SYMBOL_GPL ++#undef EXPORT_SYMBOL_GPL ++#define EXPORT_SYMBOL_GPL EXPORT_SYMBOL ++#endif ++#endif ++EXPORT_SYMBOL(register_ipsec_alg); ++EXPORT_SYMBOL(unregister_ipsec_alg); ++EXPORT_SYMBOL(ipsec_alg_test); ++#endif ++#endif +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_alg_cryptoapi.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,455 @@ ++/* ++ * ipsec_alg to linux cryptoapi GLUE ++ * ++ * Authors: CODE.ar TEAM ++ * Harpo MAxx ++ * JuanJo Ciarlante ++ * Luciano Ruete ++ * ++ * ipsec_alg_cryptoapi.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * Example usage: ++ * modinfo -p ipsec_cryptoapi (quite useful info, including supported algos) ++ * modprobe ipsec_cryptoapi ++ * modprobe ipsec_cryptoapi test=1 ++ * modprobe ipsec_cryptoapi excl=1 (exclusive cipher/algo) ++ * modprobe ipsec_cryptoapi noauto=1 aes=1 twofish=1 (only these ciphers) ++ * modprobe ipsec_cryptoapi aes=128,128 (force these keylens) ++ * modprobe ipsec_cryptoapi des_ede3=0 (everything but 3DES) ++ */ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++/* ++ * special case: ipsec core modular with this static algo inside: ++ * must avoid MODULE magic for this file ++ */ ++#if defined(CONFIG_KLIPS_MODULE) && defined(CONFIG_KLIPS_ENC_CRYPTOAPI) ++#undef MODULE ++#endif ++ ++#include ++#include ++ ++#include /* printk() */ ++#include /* error codes */ ++#include /* size_t */ ++#include ++ ++/* Check if __exit is defined, if not null it */ ++#ifndef __exit ++#define __exit ++#endif ++ ++/* warn the innocent */ ++#if !defined (CONFIG_CRYPTO) && !defined (CONFIG_CRYPTO_MODULE) ++#warning "No linux CryptoAPI found, install 2.4.22+ or 2.6.x" ++#define NO_CRYPTOAPI_SUPPORT ++#endif ++ ++#include "openswan.h" ++#include "openswan/ipsec_alg.h" ++#include "openswan/ipsec_policy.h" ++ ++#include ++#ifdef CRYPTO_API_VERSION_CODE ++#warning "Old CryptoAPI is not supported. Only linux-2.4.22+ or linux-2.6.x are supported" ++#define NO_CRYPTOAPI_SUPPORT ++#endif ++ ++#ifdef NO_CRYPTOAPI_SUPPORT ++#warning "Building an unusable module :P" ++/* Catch old CryptoAPI by not allowing module to load */ ++IPSEC_ALG_MODULE_INIT_STATIC( ipsec_cryptoapi_init ) ++{ ++ printk(KERN_WARNING "ipsec_cryptoapi.o was not built on stock Linux CryptoAPI (2.4.22+ or 2.6.x), not loading.\n"); ++ return -EINVAL; ++} ++#else ++#include ++#include ++#include ++ ++#define CIPHERNAME_AES "aes" ++#define CIPHERNAME_1DES "des" ++#define CIPHERNAME_3DES "des3_ede" ++#define CIPHERNAME_BLOWFISH "blowfish" ++#define CIPHERNAME_CAST "cast5" ++#define CIPHERNAME_SERPENT "serpent" ++#define CIPHERNAME_TWOFISH "twofish" ++ ++#define ESP_SERPENT 252 /* from ipsec drafts */ ++#define ESP_TWOFISH 253 /* from ipsec drafts */ ++ ++#define DIGESTNAME_MD5 "md5" ++#define DIGESTNAME_SHA1 "sha1" ++ ++MODULE_AUTHOR("Juanjo Ciarlante, Harpo MAxx, Luciano Ruete"); ++static int debug_crypto=0; ++static int test_crypto=0; ++static int excl_crypto=0; ++ ++static int noauto = 0; ++ ++#ifdef module_param ++module_param(debug_crypto,int,0600) ++module_param(test_crypto,int,0600) ++module_param(excl_crypto,int,0600) ++ ++module_param(noauto,int,0600) ++#else ++MODULE_PARM(debug_crypto, "i"); ++MODULE_PARM(test_crypto, "i"); ++MODULE_PARM(excl_crypto, "i"); ++ ++MODULE_PARM(noauto,"i"); ++#endif ++MODULE_PARM_DESC(noauto, "Dont try all known algos, just setup enabled ones"); ++ ++#ifdef CONFIG_KLIPS_ENC_1DES ++static int des_ede1[] = {-1, -1}; ++#endif ++static int des_ede3[] = {-1, -1}; ++static int aes[] = {-1, -1}; ++static int blowfish[] = {-1, -1}; ++static int cast[] = {-1, -1}; ++static int serpent[] = {-1, -1}; ++static int twofish[] = {-1, -1}; ++ ++#ifdef CONFIG_KLIPS_ENC_1DES ++#ifdef module_param ++module_param_array(des_ede1,int,NULL,0) ++#else ++MODULE_PARM(des_ede1,"1-2i"); ++#endif ++#endif ++#ifdef module_param ++module_param_array(des_ede3,int,NULL,0) ++module_param_array(aes,int,NULL,0) ++module_param_array(blowfish,int,NULL,0) ++module_param_array(cast,int,NULL,0) ++module_param_array(serpent,int,NULL,0) ++module_param_array(twofish,int,NULL,0) ++#else ++MODULE_PARM(des_ede3,"1-2i"); ++MODULE_PARM(aes,"1-2i"); ++MODULE_PARM(blowfish,"1-2i"); ++MODULE_PARM(cast,"1-2i"); ++MODULE_PARM(serpent,"1-2i"); ++MODULE_PARM(twofish,"1-2i"); ++#endif ++MODULE_PARM_DESC(des_ede1, "0: disable | 1: force_enable | min,max: dontuse"); ++MODULE_PARM_DESC(des_ede3, "0: disable | 1: force_enable | min,max: dontuse"); ++MODULE_PARM_DESC(aes, "0: disable | 1: force_enable | min,max: keybitlens"); ++MODULE_PARM_DESC(blowfish, "0: disable | 1: force_enable | min,max: keybitlens"); ++MODULE_PARM_DESC(cast, "0: disable | 1: force_enable | min,max: keybitlens"); ++MODULE_PARM_DESC(serpent, "0: disable | 1: force_enable | min,max: keybitlens"); ++MODULE_PARM_DESC(twofish, "0: disable | 1: force_enable | min,max: keybitlens"); ++ ++struct ipsec_alg_capi_cipher { ++ const char *ciphername; /* cryptoapi's ciphername */ ++ unsigned blocksize; ++ unsigned short minbits; ++ unsigned short maxbits; ++ int *parm; /* lkm param for this cipher */ ++ struct ipsec_alg_enc alg; /* note it's not a pointer */ ++}; ++ ++static struct ipsec_alg_capi_cipher alg_capi_carray[] = { ++ { CIPHERNAME_AES, 16, 128, 256, aes, { ixt_common:{ ixt_support:{ ias_id: ESP_AES}}}}, ++ { CIPHERNAME_TWOFISH, 16, 128, 256, twofish, { ixt_common:{ ixt_support:{ ias_id: ESP_TWOFISH,}}}}, ++ { CIPHERNAME_SERPENT, 16, 128, 256, serpent, { ixt_common:{ ixt_support:{ ias_id: ESP_SERPENT,}}}}, ++ { CIPHERNAME_CAST, 8, 128, 128, cast , { ixt_common:{ ixt_support:{ ias_id: ESP_CAST,}}}}, ++ { CIPHERNAME_BLOWFISH, 8, 96, 448, blowfish, { ixt_common:{ ixt_support:{ ias_id: ESP_BLOWFISH,}}}}, ++ { CIPHERNAME_3DES, 8, 192, 192, des_ede3, { ixt_common:{ ixt_support:{ ias_id: ESP_3DES,}}}}, ++#ifdef CONFIG_KLIPS_ENC_1DES ++ { CIPHERNAME_1DES, 8, 64, 64, des_ede1, { ixt_common:{ ixt_support:{ ias_id: ESP_DES,}}}}, ++#endif ++ { NULL, 0, 0, 0, NULL, {} } ++}; ++ ++#ifdef NOT_YET ++struct ipsec_alg_capi_digest { ++ const char *digestname; /* cryptoapi's digestname */ ++ struct digest_implementation *di; ++ struct ipsec_alg_auth alg; /* note it's not a pointer */ ++}; ++static struct ipsec_alg_capi_cipher alg_capi_darray[] = { ++ { DIGESTNAME_MD5, NULL, { ixt_alg_id: AH_MD5, }}, ++ { DIGESTNAME_SHA1, NULL, { ixt_alg_id: AH_SHA, }}, ++ { NULL, NULL, {} } ++}; ++#endif ++/* ++ * "generic" linux cryptoapi setup_cipher() function ++ */ ++int setup_cipher(const char *ciphername) ++{ ++ return crypto_alg_available(ciphername, 0); ++} ++ ++/* ++ * setups ipsec_alg_capi_cipher "hyper" struct components, calling ++ * register_ipsec_alg for cointaned ipsec_alg object ++ */ ++static void _capi_destroy_key (struct ipsec_alg_enc *alg, __u8 *key_e); ++static __u8 * _capi_new_key (struct ipsec_alg_enc *alg, const __u8 *key, size_t keylen); ++static int _capi_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, __u8 * in, int ilen, const __u8 * iv, int encrypt); ++ ++static int ++setup_ipsec_alg_capi_cipher(struct ipsec_alg_capi_cipher *cptr) ++{ ++ int ret; ++ cptr->alg.ixt_common.ixt_version = IPSEC_ALG_VERSION; ++ cptr->alg.ixt_common.ixt_module = THIS_MODULE; ++ atomic_set (& cptr->alg.ixt_common.ixt_refcnt, 0); ++ strncpy (cptr->alg.ixt_common.ixt_name , cptr->ciphername, sizeof (cptr->alg.ixt_common.ixt_name)); ++ ++ cptr->alg.ixt_common.ixt_blocksize=cptr->blocksize; ++ cptr->alg.ixt_common.ixt_support.ias_keyminbits=cptr->minbits; ++ cptr->alg.ixt_common.ixt_support.ias_keymaxbits=cptr->maxbits; ++ cptr->alg.ixt_common.ixt_state = 0; ++ if (excl_crypto) cptr->alg.ixt_common.ixt_state |= IPSEC_ALG_ST_EXCL; ++ cptr->alg.ixt_e_keylen=cptr->alg.ixt_common.ixt_support.ias_keymaxbits/8; ++ cptr->alg.ixt_e_ctx_size = 0; ++ cptr->alg.ixt_common.ixt_support.ias_exttype = IPSEC_ALG_TYPE_ENCRYPT; ++ cptr->alg.ixt_e_new_key = _capi_new_key; ++ cptr->alg.ixt_e_destroy_key = _capi_destroy_key; ++ cptr->alg.ixt_e_cbc_encrypt = _capi_cbc_encrypt; ++ cptr->alg.ixt_common.ixt_data = cptr; ++ ++ ret=register_ipsec_alg_enc(&cptr->alg); ++ printk(KERN_INFO "KLIPS cryptoapi interface: " ++ "alg_type=%d alg_id=%d name=%s " ++ "keyminbits=%d keymaxbits=%d, %s(%d)\n", ++ cptr->alg.ixt_common.ixt_support.ias_exttype, ++ cptr->alg.ixt_common.ixt_support.ias_id, ++ cptr->alg.ixt_common.ixt_name, ++ cptr->alg.ixt_common.ixt_support.ias_keyminbits, ++ cptr->alg.ixt_common.ixt_support.ias_keymaxbits, ++ ret ? "not found" : "found", ret); ++ return ret; ++} ++/* ++ * called in ipsec_sa_wipe() time, will destroy key contexts ++ * and do 1 unbind() ++ */ ++static void ++_capi_destroy_key (struct ipsec_alg_enc *alg, __u8 *key_e) ++{ ++ struct crypto_tfm *tfm=(struct crypto_tfm*)key_e; ++ ++ if (debug_crypto > 0) ++ printk(KERN_DEBUG "klips_debug: _capi_destroy_key:" ++ "name=%s key_e=%p \n", ++ alg->ixt_common.ixt_name, key_e); ++ if (!key_e) { ++ printk(KERN_ERR "klips_debug: _capi_destroy_key:" ++ "name=%s NULL key_e!\n", ++ alg->ixt_common.ixt_name); ++ return; ++ } ++ crypto_free_tfm(tfm); ++} ++ ++/* ++ * create new key context, need alg->ixt_data to know which ++ * (of many) cipher inside this module is the target ++ */ ++static __u8 * ++_capi_new_key (struct ipsec_alg_enc *alg, const __u8 *key, size_t keylen) ++{ ++ struct ipsec_alg_capi_cipher *cptr; ++ struct crypto_tfm *tfm=NULL; ++ ++ cptr = alg->ixt_common.ixt_data; ++ if (!cptr) { ++ printk(KERN_ERR "_capi_new_key(): " ++ "NULL ixt_data (?!) for \"%s\" algo\n" ++ , alg->ixt_common.ixt_name); ++ goto err; ++ } ++ if (debug_crypto > 0) ++ printk(KERN_DEBUG "klips_debug:_capi_new_key:" ++ "name=%s cptr=%p key=%p keysize=%d\n", ++ alg->ixt_common.ixt_name, cptr, key, keylen); ++ ++ /* ++ * alloc tfm ++ */ ++ tfm = crypto_alloc_tfm(cptr->ciphername, CRYPTO_TFM_MODE_CBC); ++ if (!tfm) { ++ printk(KERN_ERR "_capi_new_key(): " ++ "NULL tfm for \"%s\" cryptoapi (\"%s\") algo\n" ++ , alg->ixt_common.ixt_name, cptr->ciphername); ++ goto err; ++ } ++ if (crypto_cipher_setkey(tfm, key, keylen) < 0) { ++ printk(KERN_ERR "_capi_new_key(): " ++ "failed new_key() for \"%s\" cryptoapi algo (keylen=%d)\n" ++ , alg->ixt_common.ixt_name, keylen); ++ crypto_free_tfm(tfm); ++ tfm=NULL; ++ } ++err: ++ if (debug_crypto > 0) ++ printk(KERN_DEBUG "klips_debug:_capi_new_key:" ++ "name=%s key=%p keylen=%d tfm=%p\n", ++ alg->ixt_common.ixt_name, key, keylen, tfm); ++ return (__u8 *) tfm; ++} ++/* ++ * core encryption function: will use cx->ci to call actual cipher's ++ * cbc function ++ */ ++static int ++_capi_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, __u8 * in, int ilen, const __u8 * iv, int encrypt) { ++ int error =0; ++ struct crypto_tfm *tfm=(struct crypto_tfm *)key_e; ++ struct scatterlist sg = { ++ .page = virt_to_page(in), ++ .offset = (unsigned long)(in) % PAGE_SIZE, ++ .length=ilen, ++ }; ++ if (debug_crypto > 1) ++ printk(KERN_DEBUG "klips_debug:_capi_cbc_encrypt:" ++ "key_e=%p " ++ "in=%p out=%p ilen=%d iv=%p encrypt=%d\n" ++ , key_e ++ , in, in, ilen, iv, encrypt); ++ crypto_cipher_set_iv(tfm, iv, crypto_tfm_alg_ivsize(tfm)); ++ if (encrypt) ++ error = crypto_cipher_encrypt (tfm, &sg, &sg, ilen); ++ else ++ error = crypto_cipher_decrypt (tfm, &sg, &sg, ilen); ++ if (debug_crypto > 1) ++ printk(KERN_DEBUG "klips_debug:_capi_cbc_encrypt:" ++ "error=%d\n" ++ , error); ++ return (error<0)? error : ilen; ++} ++/* ++ * main initialization loop: for each cipher in list, do ++ * 1) setup cryptoapi cipher else continue ++ * 2) register ipsec_alg object ++ */ ++static int ++setup_cipher_list (struct ipsec_alg_capi_cipher* clist) ++{ ++ struct ipsec_alg_capi_cipher *cptr; ++ /* foreach cipher in list ... */ ++ for (cptr=clist;cptr->ciphername;cptr++) { ++ /* ++ * see if cipher has been disabled (0) or ++ * if noauto set and not enabled (1) ++ */ ++ if (cptr->parm[0] == 0 || (noauto && cptr->parm[0] < 0)) { ++ if (debug_crypto>0) ++ printk(KERN_INFO "setup_cipher_list(): " ++ "ciphername=%s skipped at user request: " ++ "noauto=%d parm[0]=%d parm[1]=%d\n" ++ , cptr->ciphername ++ , noauto ++ , cptr->parm[0] ++ , cptr->parm[1]); ++ continue; ++ } ++ /* ++ * use a local ci to avoid touching cptr->ci, ++ * if register ipsec_alg success then bind cipher ++ */ ++ if(cptr->alg.ixt_common.ixt_support.ias_name == NULL) { ++ cptr->alg.ixt_common.ixt_support.ias_name = cptr->ciphername; ++ } ++ ++ if( setup_cipher(cptr->ciphername) ) { ++ if (debug_crypto > 0) ++ printk(KERN_DEBUG "klips_debug:" ++ "setup_cipher_list():" ++ "ciphername=%s found\n" ++ , cptr->ciphername); ++ ++ if (setup_ipsec_alg_capi_cipher(cptr) != 0) { ++ printk(KERN_ERR "klips_debug:" ++ "setup_cipher_list():" ++ "ciphername=%s failed ipsec_alg_register\n" ++ , cptr->ciphername); ++ } ++ } else { ++ printk(KERN_INFO "KLIPS: lookup for ciphername=%s: not found \n", ++ cptr->ciphername); ++ } ++ } ++ return 0; ++} ++/* ++ * deregister ipsec_alg objects and unbind ciphers ++ */ ++static int ++unsetup_cipher_list (struct ipsec_alg_capi_cipher* clist) ++{ ++ struct ipsec_alg_capi_cipher *cptr; ++ /* foreach cipher in list ... */ ++ for (cptr=clist;cptr->ciphername;cptr++) { ++ if (cptr->alg.ixt_common.ixt_state & IPSEC_ALG_ST_REGISTERED) { ++ unregister_ipsec_alg_enc(&cptr->alg); ++ } ++ } ++ return 0; ++} ++/* ++ * test loop for registered algos ++ */ ++static int ++test_cipher_list (struct ipsec_alg_capi_cipher* clist) ++{ ++ int test_ret; ++ struct ipsec_alg_capi_cipher *cptr; ++ /* foreach cipher in list ... */ ++ for (cptr=clist;cptr->ciphername;cptr++) { ++ if (cptr->alg.ixt_common.ixt_state & IPSEC_ALG_ST_REGISTERED) { ++ test_ret=ipsec_alg_test( ++ cptr->alg.ixt_common.ixt_support.ias_exttype, ++ cptr->alg.ixt_common.ixt_support.ias_id, ++ test_crypto); ++ printk("test_cipher_list(alg_type=%d alg_id=%d): test_ret=%d\n", ++ cptr->alg.ixt_common.ixt_support.ias_exttype, ++ cptr->alg.ixt_common.ixt_support.ias_id, ++ test_ret); ++ } ++ } ++ return 0; ++} ++ ++IPSEC_ALG_MODULE_INIT_STATIC( ipsec_cryptoapi_init ) ++{ ++ int ret, test_ret; ++ if ((ret=setup_cipher_list(alg_capi_carray)) < 0) ++ return -EPROTONOSUPPORT; ++ if (ret==0 && test_crypto) { ++ test_ret=test_cipher_list(alg_capi_carray); ++ } ++ return ret; ++} ++IPSEC_ALG_MODULE_EXIT_STATIC( ipsec_cryptoapi_fini ) ++{ ++ unsetup_cipher_list(alg_capi_carray); ++ return; ++} ++#ifdef MODULE_LICENSE ++MODULE_LICENSE("GPL"); ++#endif ++ ++#endif /* NO_CRYPTOAPI_SUPPORT */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_esp.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,615 @@ ++/* ++ * processing code for ESP ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipsec_esp_c_version[] = "RCSID $Id: ipsec_esp.c,v 1.13.2.7 2007-09-05 02:56:09 paul Exp $"; ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++#define __NO_VERSION__ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++#include ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipsec_xmit.h" ++ ++#include "openswan/ipsec_auth.h" ++ ++#ifdef CONFIG_KLIPS_ESP ++#include "openswan/ipsec_esp.h" ++#endif /* CONFIG_KLIPS_ESP */ ++ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipsec_alg.h" ++ ++#ifdef CONFIG_KLIPS_DEBUG ++#define ESP_DMP(_x,_y,_z) if(debug_rcv && sysctl_ipsec_debug_verbose) ipsec_dmp_block(_x,_y,_z) ++#else ++#define ESP_DMP(_x,_y,_z) ++#endif ++ ++#ifdef CONFIG_KLIPS_ESP ++enum ipsec_rcv_value ++ipsec_rcv_esp_checks(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb) ++{ ++ __u8 proto; ++ int len; /* packet length */ ++ ++ len = skb->len; ++ proto = irs->ipp->protocol; ++ ++ /* XXX this will need to be 8 for IPv6 */ ++ if ((proto == IPPROTO_ESP) && ((len - irs->iphlen) % 4)) { ++ printk("klips_error:ipsec_rcv: " ++ "got packet with content length = %d from %s -- should be on 4 octet boundary, packet dropped\n", ++ len - irs->iphlen, ++ irs->ipsaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ return IPSEC_RCV_BADLEN; ++ } ++ ++ if(skb->len < (irs->hard_header_len + sizeof(struct iphdr) + sizeof(struct esphdr))) { ++ KLIPS_PRINT(debug_rcv & DB_RX_INAU, ++ "klips_debug:ipsec_rcv: " ++ "runt esp packet of skb->len=%d received from %s, dropped.\n", ++ skb->len, ++ irs->ipsaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ return IPSEC_RCV_BADLEN; ++ } ++ ++ irs->protostuff.espstuff.espp = (struct esphdr *)skb_transport_header(skb); ++ irs->said.spi = irs->protostuff.espstuff.espp->esp_spi; ++ ++ return IPSEC_RCV_OK; ++} ++ ++enum ipsec_rcv_value ++ipsec_rcv_esp_decrypt_setup(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb, ++ __u32 *replay, ++ unsigned char **authenticator) ++{ ++ struct esphdr *espp = irs->protostuff.espstuff.espp; ++ //unsigned char *idat = (unsigned char *)espp; ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "packet from %s received with seq=%d (iv)=0x%08x%08x iplen=%d esplen=%d sa=%s\n", ++ irs->ipsaddr_txt, ++ (__u32)ntohl(espp->esp_rpl), ++ (__u32)ntohl(*((__u32 *)(espp->esp_iv) )), ++ (__u32)ntohl(*((__u32 *)(espp->esp_iv) + 1)), ++ irs->len, ++ irs->ilen, ++ irs->sa_len ? irs->sa : " (error)"); ++ ++ *replay = ntohl(espp->esp_rpl); ++ *authenticator = &(skb_transport_header(skb)[irs->ilen]); ++ ++ return IPSEC_RCV_OK; ++} ++ ++enum ipsec_rcv_value ++ipsec_rcv_esp_authcalc(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb) ++{ ++ struct auth_alg *aa; ++ struct esphdr *espp = irs->protostuff.espstuff.espp; ++ union { ++ MD5_CTX md5; ++ SHA1_CTX sha1; ++ } tctx; ++ ++#ifdef CONFIG_KLIPS_ALG ++ if (irs->ipsp->ips_alg_auth) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "ipsec_alg hashing proto=%d... ", ++ irs->said.proto); ++ if(irs->said.proto == IPPROTO_ESP) { ++ ipsec_alg_sa_esp_hash(irs->ipsp, ++ (caddr_t)espp, irs->ilen, ++ irs->hash, AHHMAC_HASHLEN); ++ return IPSEC_RCV_OK; ++ } ++ return IPSEC_RCV_BADPROTO; ++ } ++#endif ++ aa = irs->authfuncs; ++ ++ /* copy the initialized keying material */ ++ memcpy(&tctx, irs->ictx, irs->ictx_len); ++ ++#ifdef HASH_DEBUG ++ ESP_DMP("ictx", irs->ictx, irs->ictx_len); ++ ++ ESP_DMP("mac_esp", (caddr_t)espp, irs->ilen); ++#endif ++ (*aa->update)((void *)&tctx, (caddr_t)espp, irs->ilen); ++ ++ (*aa->final)(irs->hash, (void *)&tctx); ++ ++#ifdef HASH_DEBUG ++ ESP_DMP("hash1", irs->hash, aa->hashlen); ++#endif ++ ++ memcpy(&tctx, irs->octx, irs->octx_len); ++ ++#ifdef HASH_DEBUG ++ ESP_DMP("octx", irs->octx, irs->octx_len); ++#endif ++ ++ (*aa->update)((void *)&tctx, irs->hash, aa->hashlen); ++ (*aa->final)(irs->hash, (void *)&tctx); ++ ++ return IPSEC_RCV_OK; ++} ++ ++ ++enum ipsec_rcv_value ++ipsec_rcv_esp_decrypt(struct ipsec_rcv_state *irs) ++{ ++ struct ipsec_sa *ipsp = irs->ipsp; ++ struct esphdr *espp = irs->protostuff.espstuff.espp; ++ int i; ++ int pad = 0, padlen; ++ int badpad = 0; ++ int esphlen = 0; ++ __u8 *idat; /* pointer to content to be decrypted/authenticated */ ++ int encaplen = 0; ++ struct sk_buff *skb; ++ struct ipsec_alg_enc *ixt_e=NULL; ++ ++#ifdef CONFIG_KLIPS_ALG ++ skb=irs->skb; ++ ++ idat = skb_transport_header(skb); ++ ++ /* encaplen is the distance between the end of the IP ++ * header and the beginning of the ESP header. ++ * on ESP headers it is zero, but on UDP-encap ESP ++ * it includes the space for the UDP header. ++ * ++ * Note: UDP-encap code has already moved the ++ * skb->data forward to accomodate this. ++ */ ++ encaplen = skb_transport_header(skb) - (skb_network_header(skb) + irs->iphlen); ++ ++ ixt_e=ipsp->ips_alg_enc; ++ esphlen = ESP_HEADER_LEN + ixt_e->ixt_common.ixt_support.ias_ivlen/8; ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "encalg=%d esphlen=%d\n", ++ ipsp->ips_encalg, esphlen); ++ ++ idat += esphlen; ++ irs->ilen -= esphlen; ++ ++ if (ipsec_alg_esp_encrypt(ipsp, ++ idat, irs->ilen, espp->esp_iv, ++ IPSEC_ALG_DECRYPT) <= 0) { ++#ifdef CONFIG_KLIPS_DEBUG ++ KLIPS_ERROR(debug_rcv, "klips_error:ipsec_rcv: " ++ "got packet with esplen = %d " ++ "from %s -- should be on " ++ "ENC(%d) octet boundary, " ++ "packet dropped\n", ++ irs->ilen, ++ irs->ipsaddr_txt, ++ ipsp->ips_encalg); ++#endif ++ if(irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ return IPSEC_RCV_BAD_DECRYPT; ++#endif /* CONFIG_KLIPS_ALG */ ++ } ++ ++ ESP_DMP("postdecrypt", idat, irs->ilen); ++ ++ irs->next_header = idat[irs->ilen - 1]; ++ padlen = idat[irs->ilen - 2]; ++ pad = padlen + 2 + irs->authlen; ++ ++ KLIPS_PRINT(debug_rcv & DB_RX_IPAD, ++ "klips_debug:ipsec_rcv: " ++ "padlen=%d, contents: 0x: 0x 0x ...\n", ++ padlen); ++ ++ for (i = 1; i <= padlen; i++) { ++ if((i % 16) == 1) { ++ KLIPS_PRINT(debug_rcv & DB_RX_IPAD, ++ "klips_debug: %02x:", ++ i - 1); ++ } ++ KLIPS_PRINTMORE(debug_rcv & DB_RX_IPAD, ++ " %02x", ++ idat[irs->ilen - 2 - padlen + i - 1]); ++ if(i != idat[irs->ilen - 2 - padlen + i - 1]) { ++ badpad = 1; ++ } ++ if((i % 16) == 0) { ++ KLIPS_PRINTMORE(debug_rcv & DB_RX_IPAD, ++ "\n"); ++ } ++ } ++ if((i % 16) != 1) { ++ KLIPS_PRINTMORE(debug_rcv & DB_RX_IPAD, ++ "\n"); ++ } ++ if(badpad) { ++ KLIPS_PRINT(debug_rcv & DB_RX_IPAD, ++ "klips_debug:ipsec_rcv: " ++ "warning, decrypted packet from %s has bad padding\n", ++ irs->ipsaddr_txt); ++ KLIPS_PRINT(debug_rcv & DB_RX_IPAD, ++ "klips_debug:ipsec_rcv: " ++ "...may be bad decryption -- not dropped\n"); ++ ipsp->ips_errs.ips_encpad_errs += 1; ++ } ++ ++ KLIPS_PRINT(debug_rcv & DB_RX_IPAD, ++ "klips_debug:ipsec_rcv: " ++ "packet decrypted from %s: next_header = %d, padding = %d\n", ++ irs->ipsaddr_txt, ++ irs->next_header, ++ pad - 2 - irs->authlen); ++ ++ irs->ipp->tot_len = htons(ntohs(irs->ipp->tot_len) - (esphlen + pad)); ++ ++ /* ++ * move the IP header forward by the size of the ESP header, which ++ * will remove the the ESP header from the packet. ++ * ++ * XXX this is really unnecessary, since odds we are in tunnel ++ * mode, and we will be *removing* this IP header. ++ * ++ */ ++ memmove((void *)(idat - irs->iphlen), ++ (void *)(skb_network_header(skb)), irs->iphlen); ++ ++ ESP_DMP("esp postmove", (idat - irs->iphlen), ++ irs->iphlen + irs->ilen); ++ ++ /* skb_pull below, will move up by esphlen */ ++ ++ /* XXX not clear how this can happen, as the message indicates */ ++ if(skb->len < esphlen) { ++ printk(KERN_WARNING ++ "klips_error:ipsec_rcv: " ++ "tried to skb_pull esphlen=%d, %d available. This should never happen, please report.\n", ++ esphlen, (int)(skb->len)); ++ return IPSEC_RCV_ESP_DECAPFAIL; ++ } ++ skb_pull(skb, esphlen); ++ skb_set_network_header(skb, ipsec_skb_offset(skb, idat - irs->iphlen)); ++ irs->ipp = ip_hdr(skb); ++ ++ ESP_DMP("esp postpull", skb->data, skb->len); ++ ++ /* now, trip off the padding from the end */ ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "trimming to %d.\n", ++ irs->len - esphlen - pad); ++ if(pad + esphlen <= irs->len) { ++ skb_trim(skb, irs->len - esphlen - pad); ++ } else { ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "bogus packet, size is zero or negative, dropping.\n"); ++ return IPSEC_RCV_DECAPFAIL; ++ } ++ ++ return IPSEC_RCV_OK; ++} ++ ++/* ++ * ++ */ ++enum ipsec_xmit_value ++ipsec_xmit_esp_setup(struct ipsec_xmit_state *ixs) ++{ ++#ifdef CONFIG_KLIPS_ENC_3DES ++ __u32 iv[2]; ++#endif ++ struct esphdr *espp; ++ int ilen = 0; ++ int padlen = 0, i; ++ unsigned char *dat; ++ unsigned char *idat, *pad; ++ __u8 hash[AH_AMAX]; ++ union { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ MD5_CTX md5; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ SHA1_CTX sha1; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ } tctx; ++ ++ dat = (unsigned char *)ixs->iph; ++ ++ espp = (struct esphdr *)(dat + ixs->iphlen); ++ espp->esp_spi = ixs->ipsp->ips_said.spi; ++ espp->esp_rpl = htonl(++(ixs->ipsp->ips_replaywin_lastseq)); ++ ++ switch(ixs->ipsp->ips_encalg) { ++#if defined(CONFIG_KLIPS_ENC_3DES) ++#ifdef CONFIG_KLIPS_ENC_3DES ++ case ESP_3DES: ++#endif /* CONFIG_KLIPS_ENC_3DES */ ++ iv[0] = *((__u32*)&(espp->esp_iv) ) = ++ ((__u32*)(ixs->ipsp->ips_iv))[0]; ++ iv[1] = *((__u32*)&(espp->esp_iv) + 1) = ++ ((__u32*)(ixs->ipsp->ips_iv))[1]; ++ break; ++#endif /* defined(CONFIG_KLIPS_ENC_3DES) */ ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESP_BADALG; ++ } ++ ++ idat = dat + ixs->iphlen + sizeof(struct esphdr); ++ ilen = ixs->skb->len - (ixs->iphlen + sizeof(struct esphdr) + ixs->authlen); ++ ++ /* Self-describing padding */ ++ pad = &dat[ixs->skb->len - ixs->tailroom]; ++ padlen = ixs->tailroom - 2 - ixs->authlen; ++ for (i = 0; i < padlen; i++) { ++ pad[i] = i + 1; ++ } ++ dat[ixs->skb->len - ixs->authlen - 2] = padlen; ++ ++ dat[ixs->skb->len - ixs->authlen - 1] = ixs->iph->protocol; ++ ixs->iph->protocol = IPPROTO_ESP; ++ ++ switch(ixs->ipsp->ips_encalg) { ++#ifdef CONFIG_KLIPS_ENC_3DES ++ case ESP_3DES: ++ des_ede3_cbc_encrypt((des_cblock *)idat, ++ (des_cblock *)idat, ++ ilen, ++ ((struct des_eks *)(ixs->ipsp->ips_key_e))[0].ks, ++ ((struct des_eks *)(ixs->ipsp->ips_key_e))[1].ks, ++ ((struct des_eks *)(ixs->ipsp->ips_key_e))[2].ks, ++ (des_cblock *)iv, 1); ++ break; ++#endif /* CONFIG_KLIPS_ENC_3DES */ ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESP_BADALG; ++ } ++ ++ switch(ixs->ipsp->ips_encalg) { ++#if defined(CONFIG_KLIPS_ENC_3DES) ++#ifdef CONFIG_KLIPS_ENC_3DES ++ case ESP_3DES: ++#endif /* CONFIG_KLIPS_ENC_3DES */ ++ /* XXX update IV with the last 8 octets of the encryption */ ++#if KLIPS_IMPAIRMENT_ESPIV_CBC_ATTACK ++ ((__u32*)(ixs->ipsp->ips_iv))[0] = ++ ((__u32 *)(idat))[(ilen >> 2) - 2]; ++ ((__u32*)(ixs->ipsp->ips_iv))[1] = ++ ((__u32 *)(idat))[(ilen >> 2) - 1]; ++#else /* KLIPS_IMPAIRMENT_ESPIV_CBC_ATTACK */ ++ prng_bytes(&ipsec_prng, (char *)ixs->ipsp->ips_iv, EMT_ESPDES_IV_SZ); ++#endif /* KLIPS_IMPAIRMENT_ESPIV_CBC_ATTACK */ ++ break; ++#endif /* defined(CONFIG_KLIPS_ENC_3DES) */ ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESP_BADALG; ++ } ++ ++ switch(ixs->ipsp->ips_authalg) { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: ++ ipsec_xmit_dmp("espp", (char*)espp, ixs->skb->len - ixs->iphlen - ixs->authlen); ++ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->ictx; ++ ipsec_xmit_dmp("ictx", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, (caddr_t)espp, ixs->skb->len - ixs->iphlen - ixs->authlen); ++ ipsec_xmit_dmp("ictx+dat", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Final(hash, &tctx.md5); ++ ipsec_xmit_dmp("ictx hash", (char*)&hash, sizeof(hash)); ++ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->octx; ++ ipsec_xmit_dmp("octx", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, hash, AHMD596_ALEN); ++ ipsec_xmit_dmp("octx+hash", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Final(hash, &tctx.md5); ++ ipsec_xmit_dmp("octx hash", (char*)&hash, sizeof(hash)); ++ memcpy(&(dat[ixs->skb->len - ixs->authlen]), hash, ixs->authlen); ++ ++ /* paranoid */ ++ memset((caddr_t)&tctx.md5, 0, sizeof(tctx.md5)); ++ memset((caddr_t)hash, 0, sizeof(*hash)); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: ++ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->ictx; ++ SHA1Update(&tctx.sha1, (caddr_t)espp, ixs->skb->len - ixs->iphlen - ixs->authlen); ++ SHA1Final(hash, &tctx.sha1); ++ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->octx; ++ SHA1Update(&tctx.sha1, hash, AHSHA196_ALEN); ++ SHA1Final(hash, &tctx.sha1); ++ memcpy(&(dat[ixs->skb->len - ixs->authlen]), hash, ixs->authlen); ++ ++ /* paranoid */ ++ memset((caddr_t)&tctx.sha1, 0, sizeof(tctx.sha1)); ++ memset((caddr_t)hash, 0, sizeof(*hash)); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ case AH_NONE: ++ break; ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_AH_BADALG; ++ } ++ ++ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, espp)); ++ ++ return IPSEC_XMIT_OK; ++} ++ ++ ++struct xform_functions esp_xform_funcs[]={ ++ { rcv_checks: ipsec_rcv_esp_checks, ++ rcv_setup_auth: ipsec_rcv_esp_decrypt_setup, ++ rcv_calc_auth: ipsec_rcv_esp_authcalc, ++ rcv_decrypt: ipsec_rcv_esp_decrypt, ++ ++ xmit_setup: ipsec_xmit_esp_setup, ++ xmit_headroom: sizeof(struct esphdr), ++ xmit_needtailroom: 1, ++ }, ++}; ++ ++#ifdef NET_26 ++struct inet_protocol esp_protocol = { ++ .handler = ipsec_rcv, ++ .no_policy = 1, ++}; ++#else ++struct inet_protocol esp_protocol = ++{ ++ ipsec_rcv, /* ESP handler */ ++ NULL, /* TUNNEL error control */ ++#ifdef NETDEV_25 ++ 1, /* no policy */ ++#else ++ 0, /* next */ ++ IPPROTO_ESP, /* protocol ID */ ++ 0, /* copy */ ++ NULL, /* data */ ++ "ESP" /* name */ ++#endif ++}; ++#endif /* NET_26 */ ++ ++#endif /* !CONFIG_KLIPS_ESP */ ++ ++ ++/* ++ * $Log: ipsec_esp.c,v $ ++ * Revision 1.13.2.7 2007-09-05 02:56:09 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.13.2.6 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.13.2.5 2006/08/24 03:02:01 paul ++ * Compile fixes for when CONFIG_KLIPS_DEBUG is not set. (bug #642) ++ * ++ * Revision 1.13.2.4 2006/05/06 03:07:38 ken ++ * Pull in proper padsize->tailroom fix from #public ++ * Need to do correct math on padlen since padsize is not equal to tailroom ++ * ++ * Revision 1.13.2.3 2006/05/05 03:58:04 ken ++ * ixs->padsize becomes ixs->tailroom ++ * ++ * Revision 1.13.2.2 2006/05/01 14:36:03 mcr ++ * use KLIPS_ERROR for fatal things. ++ * ++ * Revision 1.13.2.1 2006/04/20 16:33:06 mcr ++ * remove all of CONFIG_KLIPS_ALG --- one can no longer build without it. ++ * Fix in-kernel module compilation. Sub-makefiles do not work. ++ * ++ * Revision 1.13 2005/05/21 03:19:57 mcr ++ * hash ctx is not really that interesting most of the time. ++ * ++ * Revision 1.12 2005/05/11 01:28:49 mcr ++ * removed "poor-man"s OOP in favour of proper C structures. ++ * ++ * Revision 1.11 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.10 2005/04/17 04:36:14 mcr ++ * code now deals with ESP and UDP-ESP code. ++ * ++ * Revision 1.9 2005/04/15 19:52:30 mcr ++ * adjustments to use proper skb fields for data. ++ * ++ * Revision 1.8 2004/09/14 00:22:57 mcr ++ * adjustment of MD5* functions. ++ * ++ * Revision 1.7 2004/09/13 02:23:01 mcr ++ * #define inet_protocol if necessary. ++ * ++ * Revision 1.6 2004/09/06 18:35:49 mcr ++ * 2.6.8.1 gets rid of inet_protocol->net_protocol compatibility, ++ * so adjust for that. ++ * ++ * Revision 1.5 2004/08/17 03:27:23 mcr ++ * klips 2.6 edits. ++ * ++ * Revision 1.4 2004/08/04 15:57:07 mcr ++ * moved des .h files to include/des/ * ++ * included 2.6 protocol specific things ++ * started at NAT-T support, but it will require a kernel patch. ++ * ++ * Revision 1.3 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.2 2004/04/06 02:49:25 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_init.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,641 @@ ++/* ++ * @(#) Initialization code. ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998 - 2002 Richard Guy Briggs ++ * 2001 - 2004 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * /proc system code was split out into ipsec_proc.c after rev. 1.70. ++ * ++ */ ++ ++char ipsec_init_c_version[] = "RCSID $Id: ipsec_init.c,v 1.104.2.6 2007-11-16 03:31:52 paul Exp $"; ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include /* struct sockaddr_in */ ++#include ++#include /* get_random_bytes() */ ++#include ++ ++#include ++ ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* 23_SPINLOCK */ ++# include /* *lock* */ ++# endif /* 23_SPINLOCK */ ++#endif /* SPINLOCK */ ++ ++#include ++ ++#ifdef CONFIG_PROC_FS ++# include ++#endif /* CONFIG_PROC_FS */ ++ ++#ifdef NETLINK_SOCK ++# include ++#else ++# include ++#endif ++ ++#include "openswan/radij.h" ++ ++#include "openswan/ipsec_life.h" ++#include "openswan/ipsec_stats.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++ ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipsec_xmit.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++# include "openswan/ipcomp.h" ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipsec_alg.h" ++ ++#include ++#include ++ ++#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++#include ++#endif ++ ++#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL) && !defined(HAVE_XFRM4_UDP_REGISTER) ++#warning "You are trying to build KLIPS2.6 with NAT-T support, but you did not" ++#error "properly apply the NAT-T patch to your 2.6 kernel source tree." ++#endif ++ ++#if !defined(CONFIG_KLIPS_ESP) && !defined(CONFIG_KLIPS_AH) ++#error "kernel configuration must include ESP or AH" ++#endif ++ ++/* ++ * seems to be present in 2.4.10 (Linus), but also in some RH and other ++ * distro kernels of a lower number. ++ */ ++#ifdef MODULE_LICENSE ++MODULE_LICENSE("GPL"); ++#endif ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_eroute = 0; ++int debug_spi = 0; ++int debug_netlink = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++struct prng ipsec_prng; ++ ++ ++#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++xfrm4_rcv_encap_t klips_old_encap = NULL; ++#endif ++ ++extern int ipsec_device_event(struct notifier_block *dnot, unsigned long event, void *ptr); ++/* ++ * the following structure is required so that we receive ++ * event notifications when network devices are enabled and ++ * disabled (ifconfig up and down). ++ */ ++static struct notifier_block ipsec_dev_notifier={ ++ ipsec_device_event, ++ NULL, ++ 0 ++}; ++ ++#ifdef CONFIG_SYSCTL ++extern int ipsec_sysctl_register(void); ++extern void ipsec_sysctl_unregister(void); ++#endif ++ ++/* ++ * inet_*_protocol returns void on 2.4.x, int on 2.6.x ++ * So we need our own wrapper ++ */ ++#if defined(NET_26) || defined(IPSKB_XFRM_TUNNEL_SIZE) ++static inline int ++openswan_inet_add_protocol(struct inet_protocol *prot, unsigned protocol,char *protstr) ++{ ++ int err = inet_add_protocol(prot, protocol); ++ if (err) ++ printk(KERN_ERR "KLIPS: can not register %s protocol - recompile with CONFIG_INET_%s disabled or as module\n", protstr,protstr); ++ return err; ++ ++} ++ ++static inline int ++openswan_inet_del_protocol(struct inet_protocol *prot, unsigned protocol) ++{ ++ return inet_del_protocol(prot, protocol); ++} ++ ++#else ++static inline int ++openswan_inet_add_protocol(struct inet_protocol *prot, unsigned protocol, char *protstr) ++{ ++ inet_add_protocol(prot); ++ return 0; ++} ++ ++static inline int ++openswan_inet_del_protocol(struct inet_protocol *prot, unsigned protocol) ++{ ++ inet_del_protocol(prot); ++ return 0; ++} ++ ++#endif ++ ++/* void */ ++int ++ipsec_klips_init(void) ++{ ++ int error = 0; ++ unsigned char seed[256]; ++#ifdef CONFIG_KLIPS_ENC_3DES ++ extern int des_check_key; ++ ++ /* turn off checking of keys */ ++ des_check_key=0; ++#endif /* CONFIG_KLIPS_ENC_3DES */ ++ ++ KLIPS_PRINT(1, "klips_info:ipsec_init: " ++ "KLIPS startup, Openswan KLIPS IPsec stack version: %s\n", ++ ipsec_version_code()); ++ ++ error |= ipsec_proc_init(); ++ ++#ifdef SPINLOCK ++ ipsec_sadb.sadb_lock = SPIN_LOCK_UNLOCKED; ++#else /* SPINLOCK */ ++ ipsec_sadb.sadb_lock = 0; ++#endif /* SPINLOCK */ ++ ++#ifndef SPINLOCK ++ tdb_lock.lock = 0; ++ eroute_lock.lock = 0; ++#endif /* !SPINLOCK */ ++ ++ error |= ipsec_sadb_init(); ++ error |= ipsec_radijinit(); ++ ++ error |= pfkey_init(); ++ ++ error |= register_netdevice_notifier(&ipsec_dev_notifier); ++ ++#ifdef CONFIG_KLIPS_ESP ++ error |= openswan_inet_add_protocol(&esp_protocol, IPPROTO_ESP,"ESP"); ++#endif /* CONFIG_KLIPS_ESP */ ++ ++#ifdef CONFIG_KLIPS_AH ++ error |= openswan_inet_add_protocol(&ah_protocol, IPPROTO_AH,"AH"); ++#endif /* CONFIG_KLIPS_AH */ ++ ++/* we never actually link IPCOMP to the stack */ ++#ifdef IPCOMP_USED_ALONE ++#ifdef CONFIG_KLIPS_IPCOMP ++ error |= openswan_inet_add_protocol(&comp_protocol, IPPROTO_COMP,"IPCOMP"); ++#endif /* CONFIG_KLIPS_IPCOMP */ ++#endif ++ ++ error |= ipsec_tunnel_init_devices(); ++ ++#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++ /* register our ESP-UDP handler */ ++ if(udp4_register_esp_rcvencap(klips26_rcv_encap ++ , &klips_old_encap)!=0) { ++ printk(KERN_ERR "KLIPS: can not register klips_rcv_encap function\n"); ++ } ++#endif ++ ++ ++#ifdef CONFIG_SYSCTL ++ error |= ipsec_sysctl_register(); ++#endif ++ ++#ifdef CONFIG_KLIPS_ALG ++ ipsec_alg_init(); ++#endif ++ ++ get_random_bytes((void *)seed, sizeof(seed)); ++ prng_init(&ipsec_prng, seed, sizeof(seed)); ++ ++ return error; ++} ++ ++ ++/* void */ ++int ++ipsec_cleanup(void) ++{ ++ int error = 0; ++ ++#ifdef CONFIG_SYSCTL ++ ipsec_sysctl_unregister(); ++#endif ++#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++ if(udp4_unregister_esp_rcvencap(klips_old_encap) < 0) { ++ printk(KERN_ERR "KLIPS: can not unregister klips_rcv_encap function\n"); ++ } ++#endif ++ ++ KLIPS_PRINT(debug_netlink, /* debug_tunnel & DB_TN_INIT, */ ++ "klips_debug:ipsec_cleanup: " ++ "calling ipsec_tunnel_cleanup_devices.\n"); ++ error |= ipsec_tunnel_cleanup_devices(); ++ ++ KLIPS_PRINT(debug_netlink, "called ipsec_tunnel_cleanup_devices"); ++ ++/* we never actually link IPCOMP to the stack */ ++#ifdef IPCOMP_USED_ALONE ++#ifdef CONFIG_KLIPS_IPCOMP ++ if (openswan_inet_del_protocol(&comp_protocol, IPPROTO_COMP) < 0) ++ printk(KERN_INFO "klips_debug:ipsec_cleanup: " ++ "comp close: can't remove protocol\n"); ++#endif /* CONFIG_KLIPS_IPCOMP */ ++#endif /* IPCOMP_USED_ALONE */ ++ ++#ifdef CONFIG_KLIPS_AH ++ if (openswan_inet_del_protocol(&ah_protocol, IPPROTO_AH) < 0) ++ printk(KERN_INFO "klips_debug:ipsec_cleanup: " ++ "ah close: can't remove protocol\n"); ++#endif /* CONFIG_KLIPS_AH */ ++ ++#ifdef CONFIG_KLIPS_ESP ++ if (openswan_inet_del_protocol(&esp_protocol, IPPROTO_ESP) < 0) ++ printk(KERN_INFO "klips_debug:ipsec_cleanup: " ++ "esp close: can't remove protocol\n"); ++#endif /* CONFIG_KLIPS_ESP */ ++ ++ error |= unregister_netdevice_notifier(&ipsec_dev_notifier); ++ ++ KLIPS_PRINT(debug_netlink, /* debug_tunnel & DB_TN_INIT, */ ++ "klips_debug:ipsec_cleanup: " ++ "calling ipsec_sadb_cleanup.\n"); ++ error |= ipsec_sadb_cleanup(0); ++ error |= ipsec_sadb_free(); ++ ++ KLIPS_PRINT(debug_netlink, /* debug_tunnel & DB_TN_INIT, */ ++ "klips_debug:ipsec_cleanup: " ++ "calling ipsec_radijcleanup.\n"); ++ error |= ipsec_radijcleanup(); ++ ++ KLIPS_PRINT(debug_pfkey, /* debug_tunnel & DB_TN_INIT, */ ++ "klips_debug:ipsec_cleanup: " ++ "calling pfkey_cleanup.\n"); ++ error |= pfkey_cleanup(); ++ ++ ipsec_proc_cleanup(); ++ ++ prng_final(&ipsec_prng); ++ ++ return error; ++} ++ ++#ifdef MODULE ++int ++init_module(void) ++{ ++ int error = 0; ++ ++ error |= ipsec_klips_init(); ++ /*if (error) ++ ipsec_cleanup(); ++ */ ++ return error; ++} ++ ++void ++cleanup_module(void) ++{ ++ KLIPS_PRINT(debug_netlink, /* debug_tunnel & DB_TN_INIT, */ ++ "klips_debug:cleanup_module: " ++ "calling ipsec_cleanup.\n"); ++ ++ ipsec_cleanup(); ++ ++ KLIPS_PRINT(1, "klips_info:cleanup_module: " ++ "ipsec module unloaded.\n"); ++} ++#endif /* MODULE */ ++ ++/* ++ * $Log: ipsec_init.c,v $ ++ * Revision 1.104.2.6 2007-11-16 03:31:52 paul ++ * Added log message to openswan_inet_add_protocol() if we fail to register ++ * our protocol with KLIPS (eg ESP because esp4 module is already loaded). ++ * We didnt notice this failure before. We now return a proper error, but ++ * ++ * TODO: ++ * ++ * we still need to do a beter cleanup, as we're leaving files in /proc. ++ * (calling cleanup_module() from init_module() if we see an error caused ++ * its own kernel oopses). ++ * ++ * Revision 1.104.2.5 2007/09/05 02:36:57 paul ++ * include ipsec_init.h. Added an ifdef. Patch by David McCullough ++ * ++ * Revision 1.104.2.4 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.104.2.3 2006/07/31 15:25:20 paul ++ * Check for NETKEY backport in Debian using IPSKB_XFRM_TUNNEL_SIZE to ++ * determine wether inet_add_protocol needs the protocol argument. ++ * ++ * Revision 1.104.2.2 2006/04/20 16:33:06 mcr ++ * remove all of CONFIG_KLIPS_ALG --- one can no longer build without it. ++ * Fix in-kernel module compilation. Sub-makefiles do not work. ++ * ++ * Revision 1.104.2.1 2005/08/12 01:18:20 ken ++ * Warn people who don't have NAT-T patch applied, but try and compile NAT-T code ++ * ++ * Revision 1.105 2005/08/12 00:56:33 mcr ++ * add warning for people who didn't apply nat-t patch. ++ * ++ * Revision 1.104 2005/07/08 15:51:41 mcr ++ * removed duplicate NAT-T code. ++ * if CONFIG_IPSEC_NAT_TRAVERSAL isn't defined, then there is no issue. ++ * ++ * Revision 1.103 2005/07/08 03:02:05 paul ++ * Fixed garbled define that accidentally got commited to the real tree. ++ * ++ * Revision 1.102 2005/07/08 02:56:37 paul ++ * gcc4 fixes that were not commited because vault was down ++ * ++ * Revision 1.101 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.100 2005/04/10 22:56:09 mcr ++ * change to udp.c registration API. ++ * ++ * Revision 1.99 2005/04/08 18:26:13 mcr ++ * register with udp.c, the klips26 encap receive function ++ * ++ * Revision 1.98 2004/09/13 02:23:18 mcr ++ * #define inet_protocol if necessary. ++ * ++ * Revision 1.97 2004/09/06 18:35:49 mcr ++ * 2.6.8.1 gets rid of inet_protocol->net_protocol compatibility, ++ * so adjust for that. ++ * ++ * Revision 1.96 2004/08/17 03:27:23 mcr ++ * klips 2.6 edits. ++ * ++ * Revision 1.95 2004/08/03 18:19:08 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.94 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.93 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.92 2004/03/30 15:30:39 ken ++ * Proper Capitalization ++ * ++ * Revision 1.91 2004/03/22 01:51:51 ken ++ * We are open ++ * ++ * Revision 1.90.4.2 2004/04/05 04:30:46 mcr ++ * patches for alg-branch to compile/work with 2.x openswan ++ * ++ * Revision 1.90.4.1 2003/12/22 15:25:52 jjo ++ * Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.90 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.89.4.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.89 2003/07/31 22:47:16 mcr ++ * preliminary (untested by FS-team) 2.5 patches. ++ * ++ * Revision 1.88 2003/06/22 20:05:36 mcr ++ * clarified why IPCOMP was not being registered, and put a new ++ * #ifdef in rather than #if 0. ++ * ++ * Revision 1.87 2002/09/20 15:40:51 rgb ++ * Added a lock to the global ipsec_sadb struct for future use. ++ * Split ipsec_sadb_cleanup from new funciton ipsec_sadb_free to avoid problem ++ * of freeing newly created structures when clearing the reftable upon startup ++ * to start from a known state. ++ * ++ * Revision 1.86 2002/08/15 18:39:15 rgb ++ * Move ipsec_prng outside debug code. ++ * ++ * Revision 1.85 2002/05/14 02:35:29 rgb ++ * Change reference to tdb to ipsa. ++ * ++ * Revision 1.84 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.83 2002/04/24 07:36:28 mcr ++ * Moved from ./klips/net/ipsec/ipsec_init.c,v ++ * ++ * Revision 1.82 2002/04/20 00:12:25 rgb ++ * Added esp IV CBC attack fix, disabled. ++ * ++ * Revision 1.81 2002/04/09 16:13:32 mcr ++ * switch license to straight GPL. ++ * ++ * Revision 1.80 2002/03/24 07:34:08 rgb ++ * Sanity check for at least one of AH or ESP configured. ++ * ++ * Revision 1.79 2002/02/05 22:55:15 mcr ++ * added MODULE_LICENSE declaration. ++ * This macro does not appear in all kernel versions (see comment). ++ * ++ * Revision 1.78 2002/01/29 17:17:55 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.77 2002/01/29 04:00:51 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.76 2002/01/29 02:13:17 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.75 2001/11/26 09:23:48 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.74 2001/11/22 05:44:11 henry ++ * new version stuff ++ * ++ * Revision 1.71.2.2 2001/10/22 20:51:00 mcr ++ * explicitely set des_check_key. ++ * ++ * Revision 1.71.2.1 2001/09/25 02:19:39 mcr ++ * /proc manipulation code moved to new ipsec_proc.c ++ * ++ * Revision 1.73 2001/11/06 19:47:17 rgb ++ * Changed lifetime_packets to uint32 from uint64. ++ * ++ * Revision 1.72 2001/10/18 04:45:19 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/freeswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.71 2001/09/20 15:32:45 rgb ++ * Minor pfkey lifetime fixes. ++ * ++ * Revision 1.70 2001/07/06 19:51:21 rgb ++ * Added inbound policy checking code for IPIP SAs. ++ * ++ * Revision 1.69 2001/06/14 19:33:26 rgb ++ * Silence startup message for console, but allow it to be logged. ++ * Update copyright date. ++ * ++ * Revision 1.68 2001/05/29 05:14:36 rgb ++ * Added PMTU to /proc/net/ipsec_tncfg output. See 'man 5 ipsec_tncfg'. ++ * ++ * Revision 1.67 2001/05/04 16:34:52 rgb ++ * Rremove erroneous checking of return codes for proc_net_* in 2.4. ++ * ++ * Revision 1.66 2001/05/03 19:40:34 rgb ++ * Check error return codes in startup and shutdown. ++ * ++ * Revision 1.65 2001/02/28 05:03:27 rgb ++ * Clean up and rationalise startup messages. ++ * ++ * Revision 1.64 2001/02/27 22:24:53 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.63 2000/11/29 20:14:06 rgb ++ * Add src= to the output of /proc/net/ipsec_spi and delete dst from IPIP. ++ * ++ * Revision 1.62 2000/11/06 04:31:24 rgb ++ * Ditched spin_lock_irqsave in favour of spin_lock_bh. ++ * Fixed longlong for pre-2.4 kernels (Svenning). ++ * Add Svenning's adaptive content compression. ++ * Disabled registration of ipcomp handler. ++ * ++ * Revision 1.61 2000/10/11 13:37:54 rgb ++ * #ifdef out debug print that causes proc/net/ipsec_version to oops. ++ * ++ * Revision 1.60 2000/09/20 03:59:01 rgb ++ * Change static info functions to DEBUG_NO_STATIC to reveal function names ++ * in oopsen. ++ * ++ * Revision 1.59 2000/09/16 01:06:26 rgb ++ * Added cast of var to silence compiler warning about long fed to int ++ * format. ++ * ++ * Revision 1.58 2000/09/15 11:37:01 rgb ++ * Merge in heavily modified Svenning Soerensen's ++ * IPCOMP zlib deflate code. ++ * ++ * Revision 1.57 2000/09/12 03:21:50 rgb ++ * Moved radij_c_version printing to ipsec_version_get_info(). ++ * Reformatted ipsec_version_get_info(). ++ * Added sysctl_{,un}register() calls. ++ * ++ * Revision 1.56 2000/09/08 19:16:50 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * Removed all references to CONFIG_IPSEC_PFKEYv2. ++ * ++ * Revision 1.55 2000/08/30 05:19:03 rgb ++ * Cleaned up no longer used spi_next, netlink register/unregister, other ++ * minor cleanup. ++ * Removed cruft replaced by TDB_XFORM_NAME. ++ * Removed all the rest of the references to tdb_spi, tdb_proto, tdb_dst. ++ * Moved debug version strings to printk when /proc/net/ipsec_version is ++ * called. ++ * ++ * Revision 1.54 2000/08/20 18:31:05 rgb ++ * Changed cosmetic alignment in spi_info. ++ * Changed addtime and usetime to use actual value which is relative ++ * anyways, as intended. (Momchil) ++ * ++ * Revision 1.53 2000/08/18 17:37:03 rgb ++ * Added an (int) cast to shut up the compiler... ++ * ++ * Revision 1.52 2000/08/01 14:51:50 rgb ++ * Removed _all_ remaining traces of DES. ++ * ++ * Revision 1.51 2000/07/25 20:41:22 rgb ++ * Removed duplicate parameter in spi_getinfo. ++ * ++ * Revision 1.50 2000/07/17 03:21:45 rgb ++ * Removed /proc/net/ipsec_spinew. ++ * ++ * Revision 1.49 2000/06/28 05:46:51 rgb ++ * Renamed ivlen to iv_bits for consistency. ++ * Changed output of add and use times to be relative to now. ++ * ++ * Revision 1.48 2000/05/11 18:26:10 rgb ++ * Commented out calls to netlink_attach/detach to avoid activating netlink ++ * in the kenrel config. ++ * ++ * Revision 1.47 2000/05/10 22:35:26 rgb ++ * Comment out most of the startup version information. ++ * ++ * Revision 1.46 2000/03/22 16:15:36 rgb ++ * Fixed renaming of dev_get (MB). ++ * ++ * Revision 1.45 2000/03/16 06:40:48 rgb ++ * Hardcode PF_KEYv2 support. ++ * ++ * Revision 1.44 2000/01/22 23:19:20 rgb ++ * Simplified code to use existing macro TDB_XFORM_NAME(). ++ * ++ * Revision 1.43 2000/01/21 06:14:04 rgb ++ * Print individual stats only if non-zero. ++ * Removed 'bits' from each keylength for brevity. ++ * Shortened lifetimes legend for brevity. ++ * Changed wording from 'last_used' to the clearer 'idle'. ++ * ++ * Revision 1.42 1999/12/31 14:57:19 rgb ++ * MB fix for new dummy-less proc_get_info in 2.3.35. ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_ipcomp.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,256 @@ ++/* ++ * processing code for IPCOMP ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipsec_ipcomp_c_version[] = "RCSID $Id: ipsec_ipcomp.c,v 1.5.2.3 2007-09-05 02:56:09 paul Exp $"; ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++#define __NO_VERSION__ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipsec_xmit.h" ++ ++#include "openswan/ipsec_auth.h" ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++#include "openswan/ipsec_ipcomp.h" ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#include "openswan/ipsec_proto.h" ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_ipcomp = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++enum ipsec_rcv_value ++ipsec_rcv_ipcomp_checks(struct ipsec_rcv_state *irs, ++ struct sk_buff *skb) ++{ ++ int ipcompminlen; ++ ++ ipcompminlen = sizeof(struct iphdr); ++ ++ if(skb->len < (ipcompminlen + sizeof(struct ipcomphdr))) { ++ KLIPS_PRINT(debug_rcv & DB_RX_INAU, ++ "klips_debug:ipsec_rcv: " ++ "runt comp packet of skb->len=%d received from %s, dropped.\n", ++ skb->len, ++ irs->ipsaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ return IPSEC_RCV_BADLEN; ++ } ++ ++ irs->protostuff.ipcompstuff.compp = (struct ipcomphdr *)skb_transport_header(skb); ++ irs->said.spi = htonl((__u32)ntohs(irs->protostuff.ipcompstuff.compp->ipcomp_cpi)); ++ return IPSEC_RCV_OK; ++} ++ ++enum ipsec_rcv_value ++ipsec_rcv_ipcomp_decomp(struct ipsec_rcv_state *irs) ++{ ++ unsigned int flags = 0; ++ struct ipsec_sa *ipsp = irs->ipsp; ++ struct sk_buff *skb; ++ ++ skb=irs->skb; ++ ++ ipsec_xmit_dmp("ipcomp", skb_transport_header(skb), skb->len); ++ ++ if(ipsp == NULL) { ++ return IPSEC_RCV_SAIDNOTFOUND; ++ } ++ ++ if(sysctl_ipsec_inbound_policy_check && ++ ((((ntohl(ipsp->ips_said.spi) & 0x0000ffff) != ntohl(irs->said.spi)) && ++ (ipsp->ips_encalg != ntohl(irs->said.spi)) /* this is a workaround for peer non-compliance with rfc2393 */ ++ ))) { ++ char sa2[SATOT_BUF]; ++ size_t sa_len2 = 0; ++ ++ sa_len2 = KLIPS_SATOT(debug_rcv, &ipsp->ips_said, 0, sa2, sizeof(sa2)); ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "Incoming packet with SA(IPCA):%s does not match policy SA(IPCA):%s cpi=%04x cpi->spi=%08x spi=%08x, spi->cpi=%04x for SA grouping, dropped.\n", ++ irs->sa_len ? irs->sa : " (error)", ++ ipsp != NULL ? (sa_len2 ? sa2 : " (error)") : "NULL", ++ ntohs(irs->protostuff.ipcompstuff.compp->ipcomp_cpi), ++ (__u32)ntohl(irs->said.spi), ++ ipsp != NULL ? (__u32)ntohl((ipsp->ips_said.spi)) : 0, ++ ipsp != NULL ? (__u16)(ntohl(ipsp->ips_said.spi) & 0x0000ffff) : 0); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_SAIDNOTFOUND; ++ } ++ ++ ipsp->ips_comp_ratio_cbytes += ntohs(irs->ipp->tot_len); ++ irs->next_header = irs->protostuff.ipcompstuff.compp->ipcomp_nh; ++ ++ skb = skb_decompress(skb, ipsp, &flags); ++ if (!skb || flags) { ++ spin_unlock(&tdb_lock); ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "skb_decompress() returned error flags=%x, dropped.\n", ++ flags); ++ if (irs->stats) { ++ if (flags) ++ irs->stats->rx_errors++; ++ else ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_IPCOMPFAILED; ++ } ++ ++ /* make sure we update the pointer */ ++ irs->skb = skb; ++ ++#ifdef NET_21 ++ irs->ipp = ip_hdr(skb); ++#else /* NET_21 */ ++ irs->ipp = skb->ip_hdr; ++#endif /* NET_21 */ ++ ++ ipsp->ips_comp_ratio_dbytes += ntohs(irs->ipp->tot_len); ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "packet decompressed SA(IPCA):%s cpi->spi=%08x spi=%08x, spi->cpi=%04x, nh=%d.\n", ++ irs->sa_len ? irs->sa : " (error)", ++ (__u32)ntohl(irs->said.spi), ++ ipsp != NULL ? (__u32)ntohl((ipsp->ips_said.spi)) : 0, ++ ipsp != NULL ? (__u16)(ntohl(ipsp->ips_said.spi) & 0x0000ffff) : 0, ++ irs->next_header); ++ KLIPS_IP_PRINT(debug_rcv & DB_RX_PKTRX, irs->ipp); ++ ++ return IPSEC_RCV_OK; ++} ++ ++enum ipsec_xmit_value ++ipsec_xmit_ipcomp_setup(struct ipsec_xmit_state *ixs) ++{ ++ unsigned int flags = 0; ++#ifdef CONFIG_KLIPS_DEBUG ++ unsigned int old_tot_len = ntohs(ixs->iph->tot_len); ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ ixs->ipsp->ips_comp_ratio_dbytes += ntohs(ixs->iph->tot_len); ++ ++ ixs->skb = skb_compress(ixs->skb, ixs->ipsp, &flags); ++ ++#ifdef NET_21 ++ ixs->iph = ip_hdr(ixs->skb); ++#else /* NET_21 */ ++ ixs->iph = ixs->skb->ip_hdr; ++#endif /* NET_21 */ ++ ++ ixs->ipsp->ips_comp_ratio_cbytes += ntohs(ixs->iph->tot_len); ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (debug_tunnel & DB_TN_CROUT) ++ { ++ if (old_tot_len > ntohs(ixs->iph->tot_len)) ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_once: " ++ "packet shrunk from %d to %d bytes after compression, cpi=%04x (should be from spi=%08x, spi&0xffff=%04x.\n", ++ old_tot_len, ntohs(ixs->iph->tot_len), ++ ntohs(((struct ipcomphdr*)(((char*)ixs->iph) + ((ixs->iph->ihl) << 2)))->ipcomp_cpi), ++ ntohl(ixs->ipsp->ips_said.spi), ++ (__u16)(ntohl(ixs->ipsp->ips_said.spi) & 0x0000ffff)); ++ else ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_once: " ++ "packet did not compress (flags = %d).\n", ++ flags); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ return IPSEC_XMIT_OK; ++} ++ ++struct xform_functions ipcomp_xform_funcs[]={ ++ {rcv_checks: ipsec_rcv_ipcomp_checks, ++ rcv_decrypt: ipsec_rcv_ipcomp_decomp, ++ xmit_setup: ipsec_xmit_ipcomp_setup, ++ xmit_headroom: 0, ++ xmit_needtailroom: 0, ++ }, ++}; ++ ++#if 0 ++/* We probably don't want to install a pure IPCOMP protocol handler, but ++ only want to handle IPCOMP if it is encapsulated inside an ESP payload ++ (which is already handled) */ ++#ifdef CONFIG_KLIPS_IPCOMP ++struct inet_protocol comp_protocol = ++{ ++ ipsec_rcv, /* COMP handler */ ++ NULL, /* COMP error control */ ++#ifdef NETDEV_25 ++ 1, /* no policy */ ++#else ++ 0, /* next */ ++ IPPROTO_COMP, /* protocol ID */ ++ 0, /* copy */ ++ NULL, /* data */ ++ "COMP" /* name */ ++#endif ++}; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++#endif ++ ++#endif /* CONFIG_KLIPS_IPCOMP */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_ipip.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,122 @@ ++/* ++ * processing code for IPIP ++ * Copyright (C) 2003 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipsec_ipip_c_version[] = "RCSID $Id: ipsec_ipip.c,v 1.3.2.4 2007-09-05 02:56:09 paul Exp $"; ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++#define __NO_VERSION__ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipsec_xmit.h" ++ ++#include "openswan/ipsec_auth.h" ++#include "openswan/ipsec_ipip.h" ++#include "openswan/ipsec_param.h" ++ ++#include "openswan/ipsec_proto.h" ++ ++enum ipsec_xmit_value ++ipsec_xmit_ipip_setup(struct ipsec_xmit_state *ixs) ++{ ++ ixs->iph->version = 4; ++ ++ switch(sysctl_ipsec_tos) { ++ case 0: ++#ifdef NET_21 ++ ixs->iph->tos = ip_hdr(ixs->skb)->tos; ++#else /* NET_21 */ ++ ixs->iph->tos = ixs->skb->ip_hdr->tos; ++#endif /* NET_21 */ ++ break; ++ case 1: ++ ixs->iph->tos = 0; ++ break; ++ default: ++ break; ++ } ++ ixs->iph->ttl = SYSCTL_IPSEC_DEFAULT_TTL; ++ ixs->iph->frag_off = 0; ++ ixs->iph->saddr = ((struct sockaddr_in*)(ixs->ipsp->ips_addr_s))->sin_addr.s_addr; ++ ixs->iph->daddr = ((struct sockaddr_in*)(ixs->ipsp->ips_addr_d))->sin_addr.s_addr; ++ ixs->iph->protocol = IPPROTO_IPIP; ++ ixs->iph->ihl = sizeof(struct iphdr) >> 2; ++ ++ KLIPS_IP_SELECT_IDENT(ixs->iph, ixs->skb); ++ ++ ixs->newdst = (__u32)ixs->iph->daddr; ++ ixs->newsrc = (__u32)ixs->iph->saddr; ++ ++#ifdef NET_21 ++ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ip_hdr(ixs->skb))); ++#endif /* NET_21 */ ++ return IPSEC_XMIT_OK; ++} ++ ++struct xform_functions ipip_xform_funcs[]={ ++ { rcv_checks: NULL, ++ rcv_setup_auth: NULL, ++ rcv_calc_auth: NULL, ++ rcv_decrypt: NULL, ++ ++ xmit_setup: ipsec_xmit_ipip_setup, ++ xmit_headroom: sizeof(struct iphdr), ++ xmit_needtailroom: 0, ++ }, ++}; ++ ++ ++ ++ ++ ++ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_kern24.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,74 @@ ++/* ++ * Copyright 2005 (C) Michael Richardson ++ * ++ * This is a file of functions which are present in 2.6 kernels, ++ * but are not available by default in the 2.4 series. ++ * ++ * As such this code is usually from the Linux kernel, and is covered by ++ * GPL. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * $Id: ipsec_kern24.c,v 1.2 2005-05-20 03:19:18 mcr Exp $ ++ * ++ */ ++ ++#include ++#include ++#include ++ ++/* ++ * printk rate limiting, lifted from the networking subsystem. ++ * ++ * This enforces a rate limit: not more than one kernel message ++ * every printk_ratelimit_jiffies to make a denial-of-service ++ * attack impossible. ++ */ ++static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED; ++ ++int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) ++{ ++ static unsigned long toks = 10*5*HZ; ++ static unsigned long last_msg; ++ static int missed; ++ unsigned long flags; ++ unsigned long now = jiffies; ++ ++ spin_lock_irqsave(&ratelimit_lock, flags); ++ toks += now - last_msg; ++ last_msg = now; ++ if (toks > (ratelimit_burst * ratelimit_jiffies)) ++ toks = ratelimit_burst * ratelimit_jiffies; ++ if (toks >= ratelimit_jiffies) { ++ int lost = missed; ++ missed = 0; ++ toks -= ratelimit_jiffies; ++ spin_unlock_irqrestore(&ratelimit_lock, flags); ++ if (lost) ++ printk(KERN_WARNING "printk: %d messages suppressed.\n", lost); ++ return 1; ++ } ++ missed++; ++ spin_unlock_irqrestore(&ratelimit_lock, flags); ++ return 0; ++} ++ ++/* minimum time in jiffies between messages */ ++int printk_ratelimit_jiffies = 5*HZ; ++ ++/* number of messages we send before ratelimiting */ ++int printk_ratelimit_burst = 10; ++ ++int printk_ratelimit(void) ++{ ++ return __printk_ratelimit(printk_ratelimit_jiffies, ++ printk_ratelimit_burst); ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_life.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,277 @@ ++/* ++ * @(#) lifetime structure utilities ++ * ++ * Copyright (C) 2001 Richard Guy Briggs ++ * and Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_life.c,v 1.13.10.2 2007-09-05 02:39:38 paul Exp $ ++ * ++ */ ++ ++/* ++ * This provides series of utility functions for dealing with lifetime ++ * structures. ++ * ++ * ipsec_check_lifetime - returns -1 hard lifetime exceeded ++ * 0 soft lifetime exceeded ++ * 1 everything is okay ++ * based upon whether or not the count exceeds hard/soft ++ * ++ */ ++ ++#define __NO_VERSION__ ++#include ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif /* for CONFIG_IP_FORWARD */ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#include /* struct device, struct net_device_stats and other headers */ ++#include /* eth_type_trans */ ++#include ++#include ++#include ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_life.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_eroute.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++ ++#include "openswan/ipsec_sa.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_ipe4.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++#include "openswan/ipcomp.h" ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++ ++ ++enum ipsec_life_alive ++ipsec_lifetime_check(struct ipsec_lifetime64 *il64, ++ const char *lifename, ++ const char *saname, ++ enum ipsec_life_type ilt, ++ enum ipsec_direction idir, ++ struct ipsec_sa *ips) ++{ ++ __u64 count; ++ const char *dir; ++ ++ if(saname == NULL) { ++ saname = "unknown-SA"; ++ } ++ ++ if(idir == ipsec_incoming) { ++ dir = "incoming"; ++ } else { ++ dir = "outgoing"; ++ } ++ ++ ++ if(ilt == ipsec_life_timebased) { ++ count = jiffies/HZ - il64->ipl_count; ++ } else { ++ count = il64->ipl_count; ++ } ++ ++ if(il64->ipl_hard && ++ (count > il64->ipl_hard)) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_lifetime_check: " ++ "hard %s lifetime of SA:<%s%s%s> %s has been reached, SA expired, " ++ "%s packet dropped.\n", ++ lifename, ++ IPS_XFORM_NAME(ips), ++ saname, ++ dir); ++ ++ pfkey_expire(ips, 1); ++ return ipsec_life_harddied; ++ } ++ ++ if(il64->ipl_soft && ++ (count > il64->ipl_soft)) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_lifetime_check: " ++ "soft %s lifetime of SA:<%s%s%s> %s has been reached, SA expiring, " ++ "soft expire message sent up, %s packet still processed.\n", ++ lifename, ++ IPS_XFORM_NAME(ips), ++ saname, ++ dir); ++ ++ if(ips->ips_state != SADB_SASTATE_DYING) { ++ pfkey_expire(ips, 0); ++ } ++ ips->ips_state = SADB_SASTATE_DYING; ++ ++ return ipsec_life_softdied; ++ } ++ return ipsec_life_okay; ++} ++ ++ ++/* ++ * This function takes a buffer (with length), a lifetime name and type, ++ * and formats a string to represent the current values of the lifetime. ++ * ++ * It returns the number of bytes that the format took (or would take, ++ * if the buffer were large enough: snprintf semantics). ++ * This is used in /proc routines and in debug output. ++ */ ++int ++ipsec_lifetime_format(char *buffer, ++ int buflen, ++ char *lifename, ++ enum ipsec_life_type timebaselife, ++ struct ipsec_lifetime64 *lifetime) ++{ ++ int len = 0; ++ __u64 count; ++ ++ if(timebaselife == ipsec_life_timebased) { ++ count = jiffies/HZ - lifetime->ipl_count; ++ } else { ++ count = lifetime->ipl_count; ++ } ++ ++ if(lifetime->ipl_count > 1 || ++ lifetime->ipl_soft || ++ lifetime->ipl_hard) { ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)) ++ len = ipsec_snprintf(buffer, buflen, ++ "%s(%Lu,%Lu,%Lu)", ++ lifename, ++ count, ++ lifetime->ipl_soft, ++ lifetime->ipl_hard); ++#else /* XXX high 32 bits are not displayed */ ++ len = ipsec_snprintf(buffer, buflen, ++ "%s(%lu,%lu,%lu)", ++ lifename, ++ (unsigned long)count, ++ (unsigned long)lifetime->ipl_soft, ++ (unsigned long)lifetime->ipl_hard); ++#endif ++ } ++ ++ return len; ++} ++ ++void ++ipsec_lifetime_update_hard(struct ipsec_lifetime64 *lifetime, ++ __u64 newvalue) ++{ ++ if(newvalue && ++ (!lifetime->ipl_hard || ++ (newvalue < lifetime->ipl_hard))) { ++ lifetime->ipl_hard = newvalue; ++ ++ if(!lifetime->ipl_soft && ++ (lifetime->ipl_hard < lifetime->ipl_soft)) { ++ lifetime->ipl_soft = lifetime->ipl_hard; ++ } ++ } ++} ++ ++void ++ipsec_lifetime_update_soft(struct ipsec_lifetime64 *lifetime, ++ __u64 newvalue) ++{ ++ if(newvalue && ++ (!lifetime->ipl_soft || ++ (newvalue < lifetime->ipl_soft))) { ++ lifetime->ipl_soft = newvalue; ++ ++ if(lifetime->ipl_hard && ++ (lifetime->ipl_hard < lifetime->ipl_soft)) { ++ lifetime->ipl_soft = lifetime->ipl_hard; ++ } ++ } ++} ++ ++ ++/* ++ * $Log: ipsec_life.c,v $ ++ * Revision 1.13.10.2 2007-09-05 02:39:38 paul ++ * include ip.h to account for header file surgery in 2.6.22 [david] ++ * ++ * Revision 1.13.10.1 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.13 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.12 2004/04/23 20:44:35 ken ++ * Update comments ++ * ++ * Revision 1.11 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.10 2004/03/30 11:03:10 paul ++ * two more occurances of snprintf, found by Sam from a users oops msg. ++ * ++ * Revision 1.9 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.8.4.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.8 2003/02/06 02:00:10 rgb ++ * Fixed incorrect debugging text label ++ * ++ * Revision 1.7 2002/05/23 07:16:26 rgb ++ * Fixed absolute/relative reference to lifetime count printout. ++ * ++ * Revision 1.6 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.5 2002/04/24 07:36:28 mcr ++ * Moved from ./klips/net/ipsec/ipsec_life.c,v ++ * ++ * Revision 1.4 2002/01/29 17:17:55 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.3 2002/01/29 02:13:17 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.2 2001/11/26 09:16:14 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:25:57 mcr ++ * lifetime structure created and common functions created. ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_mast.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1099 @@ ++/* ++ * IPSEC MAST code. ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001, 2002 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipsec_mast_c_version[] = "RCSID $Id: ipsec_mast.c,v 1.7.2.1 2006-10-06 21:39:26 paul Exp $"; ++ ++#define __NO_VERSION__ ++#include ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif /* for CONFIG_IP_FORWARD */ ++#include ++#include /* printk() */ ++ ++#include "freeswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, struct net_device_stats, dev_queue_xmit() and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include /* struct tcphdr */ ++#include /* struct udphdr */ ++#include ++#include ++#include ++#include ++#undef dev_kfree_skb ++#define dev_kfree_skb(a,b) kfree_skb(a) ++#define PHYSDEV_TYPE ++#include /* icmp_send() */ ++#include ++#include ++ ++#include ++ ++#include "freeswan/radij.h" ++#include "freeswan/ipsec_life.h" ++#include "freeswan/ipsec_xform.h" ++#include "freeswan/ipsec_eroute.h" ++#include "freeswan/ipsec_encap.h" ++#include "freeswan/ipsec_radij.h" ++#include "freeswan/ipsec_sa.h" ++#include "freeswan/ipsec_tunnel.h" ++#include "freeswan/ipsec_mast.h" ++#include "freeswan/ipsec_ipe4.h" ++#include "freeswan/ipsec_ah.h" ++#include "freeswan/ipsec_esp.h" ++ ++#include ++#include ++ ++#include "freeswan/ipsec_proto.h" ++ ++int ipsec_maxdevice_count = -1; ++ ++DEBUG_NO_STATIC int ++ipsec_mast_open(struct net_device *dev) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ ++ /* ++ * Can't open until attached. ++ */ ++ ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_open: " ++ "dev = %s, prv->dev = %s\n", ++ dev->name, prv->dev?prv->dev->name:"NONE"); ++ ++ if (prv->dev == NULL) ++ return -ENODEV; ++ ++ KLIPS_INC_USE; ++ return 0; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_mast_close(struct net_device *dev) ++{ ++ KLIPS_DEC_USE; ++ return 0; ++} ++ ++static inline int ipsec_mast_xmit2(struct sk_buff *skb) ++{ ++ return ip_send(skb); ++} ++ ++enum ipsec_xmit_value ++ipsec_mast_send(struct ipsec_xmit_state*ixs) ++{ ++ /* new route/dst cache code from James Morris */ ++ ixs->skb->dev = ixs->physdev; ++ /*skb_orphan(ixs->skb);*/ ++ if((ixs->error = ip_route_output(&ixs->route, ++ ixs->skb->nh.iph->daddr, ++ ixs->pass ? 0 : ixs->skb->nh.iph->saddr, ++ RT_TOS(ixs->skb->nh.iph->tos), ++ ixs->physdev->iflink /* rgb: should this be 0? */))) { ++ ixs->stats->tx_errors++; ++ KLIPS_PRINT(debug_mast & DB_MAST_XMIT, ++ "klips_debug:ipsec_xmit_send: " ++ "ip_route_output failed with error code %d, rt->u.dst.dev=%s, dropped\n", ++ ixs->error, ++ ixs->route->u.dst.dev->name); ++ return IPSEC_XMIT_ROUTEERR; ++ } ++ if(ixs->dev == ixs->route->u.dst.dev) { ++ ip_rt_put(ixs->route); ++ /* This is recursion, drop it. */ ++ ixs->stats->tx_errors++; ++ KLIPS_PRINT(debug_mast & DB_MAST_XMIT, ++ "klips_debug:ipsec_xmit_send: " ++ "suspect recursion, dev=rt->u.dst.dev=%s, dropped\n", ++ ixs->dev->name); ++ return IPSEC_XMIT_RECURSDETECT; ++ } ++ dst_release(ixs->skb->dst); ++ ixs->skb->dst = &ixs->route->u.dst; ++ ixs->stats->tx_bytes += ixs->skb->len; ++ if(ixs->skb->len < ixs->skb->nh.raw - ixs->skb->data) { ++ ixs->stats->tx_errors++; ++ printk(KERN_WARNING ++ "klips_error:ipsec_xmit_send: " ++ "tried to __skb_pull nh-data=%ld, %d available. This should never happen, please report.\n", ++ (unsigned long)(ixs->skb->nh.raw - ixs->skb->data), ++ ixs->skb->len); ++ return IPSEC_XMIT_PUSHPULLERR; ++ } ++ __skb_pull(ixs->skb, ixs->skb->nh.raw - ixs->skb->data); ++#ifdef SKB_RESET_NFCT ++ nf_conntrack_put(ixs->skb->nfct); ++ ixs->skb->nfct = NULL; ++#ifdef CONFIG_NETFILTER_DEBUG ++ ixs->skb->nf_debug = 0; ++#endif /* CONFIG_NETFILTER_DEBUG */ ++#endif /* SKB_RESET_NFCT */ ++ KLIPS_PRINT(debug_mast & DB_MAST_XMIT, ++ "klips_debug:ipsec_xmit_send: " ++ "...done, calling ip_send() on device:%s\n", ++ ixs->skb->dev ? ixs->skb->dev->name : "NULL"); ++ KLIPS_IP_PRINT(debug_mast & DB_MAST_XMIT, ixs->skb->nh.iph); ++ { ++ int err; ++ ++ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, ixs->skb, NULL, ixs->route->u.dst.dev, ++ ipsec_mast_xmit2); ++ if(err != NET_XMIT_SUCCESS && err != NET_XMIT_CN) { ++ if(net_ratelimit()) ++ printk(KERN_ERR ++ "klips_error:ipsec_xmit_send: " ++ "ip_send() failed, err=%d\n", ++ -err); ++ ixs->stats->tx_errors++; ++ ixs->stats->tx_aborted_errors++; ++ ixs->skb = NULL; ++ return IPSEC_XMIT_IPSENDFAILURE; ++ } ++ } ++ ixs->stats->tx_packets++; ++ ++ ixs->skb = NULL; ++ ++ return IPSEC_XMIT_OK; ++} ++ ++void ++ipsec_mast_cleanup(struct ipsec_xmit_state*ixs) ++{ ++#if defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE) ++ netif_wake_queue(ixs->dev); ++#else /* defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE) */ ++ ixs->dev->tbusy = 0; ++#endif /* defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE) */ ++ if(ixs->saved_header) { ++ kfree(ixs->saved_header); ++ } ++ if(ixs->skb) { ++ dev_kfree_skb(ixs->skb, FREE_WRITE); ++ } ++ if(ixs->oskb) { ++ dev_kfree_skb(ixs->oskb, FREE_WRITE); ++ } ++ if (ixs->ips.ips_ident_s.data) { ++ kfree(ixs->ips.ips_ident_s.data); ++ } ++ if (ixs->ips.ips_ident_d.data) { ++ kfree(ixs->ips.ips_ident_d.data); ++ } ++} ++ ++#if 0 ++/* ++ * This function assumes it is being called from dev_queue_xmit() ++ * and that skb is filled properly by that function. ++ */ ++int ++ipsec_mast_start_xmit(struct sk_buff *skb, struct net_device *dev, IPsecSAref_t SAref) ++{ ++ struct ipsec_xmit_state ixs_mem; ++ struct ipsec_xmit_state *ixs = &ixs_mem; ++ enum ipsec_xmit_value stat = IPSEC_XMIT_OK; ++ ++ /* dev could be a mast device, but should be optional, I think... */ ++ /* SAref is also optional, but one of the two must be present. */ ++ /* I wonder if it could accept no device or saref and guess? */ ++ ++/* ipsec_xmit_sanity_check_dev(ixs); */ ++ ++ ipsec_xmit_sanity_check_skb(ixs); ++ ++ ipsec_xmit_adjust_hard_header(ixs); ++ ++ stat = ipsec_xmit_encap_bundle(ixs); ++ if(stat != IPSEC_XMIT_OK) { ++ /* SA processing failed */ ++ } ++ ++ ipsec_xmit_hard_header_restore(); ++} ++#endif ++ ++DEBUG_NO_STATIC struct net_device_stats * ++ipsec_mast_get_stats(struct net_device *dev) ++{ ++ return &(((struct ipsecpriv *)(dev->priv))->mystats); ++} ++ ++/* ++ * Revectored calls. ++ * For each of these calls, a field exists in our private structure. ++ */ ++ ++DEBUG_NO_STATIC int ++ipsec_mast_hard_header(struct sk_buff *skb, struct net_device *dev, ++ unsigned short type, void *daddr, void *saddr, unsigned len) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ struct net_device *tmp; ++ int ret; ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(skb == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_hard_header: " ++ "no skb...\n"); ++ return -ENODATA; ++ } ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_hard_header: " ++ "no device...\n"); ++ return -ENODEV; ++ } ++ ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_hard_header: " ++ "skb->dev=%s dev=%s.\n", ++ skb->dev ? skb->dev->name : "NULL", ++ dev->name); ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_hard_header: " ++ "no private space associated with dev=%s\n", ++ dev->name ? dev->name : "NULL"); ++ return -ENODEV; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_hard_header: " ++ "no physical device associated with dev=%s\n", ++ dev->name ? dev->name : "NULL"); ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++ /* check if we have to send a IPv6 packet. It might be a Router ++ Solicitation, where the building of the packet happens in ++ reverse order: ++ 1. ll hdr, ++ 2. IPv6 hdr, ++ 3. ICMPv6 hdr ++ -> skb->nh.raw is still uninitialized when this function is ++ called!! If this is no IPv6 packet, we can print debugging ++ messages, otherwise we skip all debugging messages and just ++ build the ll header */ ++ if(type != ETH_P_IPV6) { ++ /* execute this only, if we don't have to build the ++ header for a IPv6 packet */ ++ if(!prv->hard_header) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_hard_header: " ++ "physical device has been detached, packet dropped 0p%p->0p%p len=%d type=%d dev=%s->NULL ", ++ saddr, ++ daddr, ++ len, ++ type, ++ dev->name); ++ KLIPS_PRINTMORE(debug_mast & DB_MAST_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(skb->nh.iph->saddr), ++ (__u32)ntohl(skb->nh.iph->daddr) ); ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++#define da ((struct net_device *)(prv->dev))->dev_addr ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_hard_header: " ++ "Revectored 0p%p->0p%p len=%d type=%d dev=%s->%s dev_addr=%02x:%02x:%02x:%02x:%02x:%02x ", ++ saddr, ++ daddr, ++ len, ++ type, ++ dev->name, ++ prv->dev->name, ++ da[0], da[1], da[2], da[3], da[4], da[5]); ++ KLIPS_PRINTMORE(debug_mast & DB_MAST_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(skb->nh.iph->saddr), ++ (__u32)ntohl(skb->nh.iph->daddr) ); ++ } else { ++ KLIPS_PRINT(debug_mast, ++ "klips_debug:ipsec_mast_hard_header: " ++ "is IPv6 packet, skip debugging messages, only revector and build linklocal header.\n"); ++ } ++ tmp = skb->dev; ++ skb->dev = prv->dev; ++ ret = prv->hard_header(skb, prv->dev, type, (void *)daddr, (void *)saddr, len); ++ skb->dev = tmp; ++ return ret; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_mast_rebuild_header(struct sk_buff *skb) ++{ ++ struct ipsecpriv *prv = skb->dev->priv; ++ struct net_device *tmp; ++ int ret; ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(skb->dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_rebuild_header: " ++ "no device..."); ++ return -ENODEV; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_rebuild_header: " ++ "no private space associated with dev=%s", ++ skb->dev->name ? skb->dev->name : "NULL"); ++ return -ENODEV; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_rebuild_header: " ++ "no physical device associated with dev=%s", ++ skb->dev->name ? skb->dev->name : "NULL"); ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++ if(!prv->rebuild_header) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_rebuild_header: " ++ "physical device has been detached, packet dropped skb->dev=%s->NULL ", ++ skb->dev->name); ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(skb->nh.iph->saddr), ++ (__u32)ntohl(skb->nh.iph->daddr) ); ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast: " ++ "Revectored rebuild_header dev=%s->%s ", ++ skb->dev->name, prv->dev->name); ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(skb->nh.iph->saddr), ++ (__u32)ntohl(skb->nh.iph->daddr) ); ++ tmp = skb->dev; ++ skb->dev = prv->dev; ++ ++ ret = prv->rebuild_header(skb); ++ skb->dev = tmp; ++ return ret; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_mast_set_mac_address(struct net_device *dev, void *addr) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_set_mac_address: " ++ "no device..."); ++ return -ENODEV; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_set_mac_address: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return -ENODEV; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_set_mac_address: " ++ "no physical device associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++ if(!prv->set_mac_address) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_set_mac_address: " ++ "physical device has been detached, cannot set - skb->dev=%s->NULL\n", ++ dev->name); ++ return -ENODEV; ++ } ++ ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_set_mac_address: " ++ "Revectored dev=%s->%s addr=0p%p\n", ++ dev->name, prv->dev->name, addr); ++ return prv->set_mac_address(prv->dev, addr); ++ ++} ++ ++DEBUG_NO_STATIC void ++ipsec_mast_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_cache_update: " ++ "no device..."); ++ return; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_cache_update: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_cache_update: " ++ "no physical device associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ stats->tx_dropped++; ++ return; ++ } ++ ++ if(!prv->header_cache_update) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_cache_update: " ++ "physical device has been detached, cannot set - skb->dev=%s->NULL\n", ++ dev->name); ++ return; ++ } ++ ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast: " ++ "Revectored cache_update\n"); ++ prv->header_cache_update(hh, prv->dev, haddr); ++ return; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_mast_neigh_setup(struct neighbour *n) ++{ ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_neigh_setup:\n"); ++ ++ if (n->nud_state == NUD_NONE) { ++ n->ops = &arp_broken_ops; ++ n->output = n->ops->output; ++ } ++ return 0; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_mast_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) ++{ ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_neigh_setup_dev: " ++ "setting up %s\n", ++ dev ? dev->name : "NULL"); ++ ++ if (p->tbl->family == AF_INET) { ++ p->neigh_setup = ipsec_mast_neigh_setup; ++ p->ucast_probes = 0; ++ p->mcast_probes = 0; ++ } ++ return 0; ++} ++ ++/* ++ * We call the attach routine to attach another device. ++ */ ++ ++DEBUG_NO_STATIC int ++ipsec_mast_attach(struct net_device *dev, struct net_device *physdev) ++{ ++ int i; ++ struct ipsecpriv *prv = dev->priv; ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_attach: " ++ "no device..."); ++ return -ENODEV; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_attach: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return -ENODATA; ++ } ++ ++ prv->dev = physdev; ++ prv->hard_start_xmit = physdev->hard_start_xmit; ++ prv->get_stats = physdev->get_stats; ++ ++ if (physdev->hard_header) { ++ prv->hard_header = physdev->hard_header; ++ dev->hard_header = ipsec_mast_hard_header; ++ } else ++ dev->hard_header = NULL; ++ ++ if (physdev->rebuild_header) { ++ prv->rebuild_header = physdev->rebuild_header; ++ dev->rebuild_header = ipsec_mast_rebuild_header; ++ } else ++ dev->rebuild_header = NULL; ++ ++ if (physdev->set_mac_address) { ++ prv->set_mac_address = physdev->set_mac_address; ++ dev->set_mac_address = ipsec_mast_set_mac_address; ++ } else ++ dev->set_mac_address = NULL; ++ ++ if (physdev->header_cache_update) { ++ prv->header_cache_update = physdev->header_cache_update; ++ dev->header_cache_update = ipsec_mast_cache_update; ++ } else ++ dev->header_cache_update = NULL; ++ ++ dev->hard_header_len = physdev->hard_header_len; ++ ++/* prv->neigh_setup = physdev->neigh_setup; */ ++ dev->neigh_setup = ipsec_mast_neigh_setup_dev; ++ dev->mtu = 16260; /* 0xfff0; */ /* dev->mtu; */ ++ prv->mtu = physdev->mtu; ++ ++#ifdef PHYSDEV_TYPE ++ dev->type = physdev->type; /* ARPHRD_MAST; */ ++#endif /* PHYSDEV_TYPE */ ++ ++ dev->addr_len = physdev->addr_len; ++ for (i=0; iaddr_len; i++) { ++ dev->dev_addr[i] = physdev->dev_addr[i]; ++ } ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_mast & DB_MAST_INIT) { ++ printk(KERN_INFO "klips_debug:ipsec_mast_attach: " ++ "physical device %s being attached has HW address: %2x", ++ physdev->name, physdev->dev_addr[0]); ++ for (i=1; i < physdev->addr_len; i++) { ++ printk(":%02x", physdev->dev_addr[i]); ++ } ++ printk("\n"); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ return 0; ++} ++ ++/* ++ * We call the detach routine to detach the ipsec mast from another device. ++ */ ++ ++DEBUG_NO_STATIC int ++ipsec_mast_detach(struct net_device *dev) ++{ ++ int i; ++ struct ipsecpriv *prv = dev->priv; ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_detach: " ++ "no device..."); ++ return -ENODEV; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_REVEC, ++ "klips_debug:ipsec_mast_detach: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return -ENODATA; ++ } ++ ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_detach: " ++ "physical device %s being detached from virtual device %s\n", ++ prv->dev ? prv->dev->name : "NULL", ++ dev->name); ++ ++ prv->dev = NULL; ++ prv->hard_start_xmit = NULL; ++ prv->get_stats = NULL; ++ ++ prv->hard_header = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->hard_header = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++ prv->rebuild_header = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->rebuild_header = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++ prv->set_mac_address = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->set_mac_address = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++ prv->header_cache_update = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->header_cache_update = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++#ifdef DETACH_AND_DOWN ++ dev->neigh_setup = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++ dev->hard_header_len = 0; ++#ifdef DETACH_AND_DOWN ++ dev->mtu = 0; ++#endif /* DETACH_AND_DOWN */ ++ prv->mtu = 0; ++ for (i=0; idev_addr[i] = 0; ++ } ++ dev->addr_len = 0; ++#ifdef PHYSDEV_TYPE ++ dev->type = ARPHRD_VOID; /* ARPHRD_MAST; */ ++#endif /* PHYSDEV_TYPE */ ++ ++ return 0; ++} ++ ++/* ++ * We call the clear routine to detach all ipsec masts from other devices. ++ */ ++DEBUG_NO_STATIC int ++ipsec_mast_clear(void) ++{ ++ int i; ++ struct net_device *ipsecdev = NULL, *prvdev; ++ struct ipsecpriv *prv; ++ char name[9]; ++ int ret; ++ ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_clear: .\n"); ++ ++ for(i = 0; i < IPSEC_NUM_IF; i++) { ++ sprintf(name, IPSEC_DEV_FORMAT, i); ++ if((ipsecdev = ipsec_dev_get(name)) != NULL) { ++ if((prv = (struct ipsecpriv *)(ipsecdev->priv))) { ++ prvdev = (struct net_device *)(prv->dev); ++ if(prvdev) { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_clear: " ++ "physical device for device %s is %s\n", ++ name, prvdev->name); ++ if((ret = ipsec_mast_detach(ipsecdev))) { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_clear: " ++ "error %d detatching device %s from device %s.\n", ++ ret, name, prvdev->name); ++ return ret; ++ } ++ } ++ } ++ } ++ } ++ return 0; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_mast_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ++{ ++ struct ipsecmastconf *cf = (struct ipsecmastconf *)&ifr->ifr_data; ++ struct ipsecpriv *prv = dev->priv; ++ struct net_device *them; /* physical device */ ++#ifdef CONFIG_IP_ALIAS ++ char *colon; ++ char realphysname[IFNAMSIZ]; ++#endif /* CONFIG_IP_ALIAS */ ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "device not supplied.\n"); ++ return -ENODEV; ++ } ++ ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "tncfg service call #%d for dev=%s\n", ++ cmd, ++ dev->name ? dev->name : "NULL"); ++ switch (cmd) { ++ /* attach a virtual ipsec? device to a physical device */ ++ case IPSEC_SET_DEV: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "calling ipsec_mast_attatch...\n"); ++#ifdef CONFIG_IP_ALIAS ++ /* If this is an IP alias interface, get its real physical name */ ++ strncpy(realphysname, cf->cf_name, IFNAMSIZ); ++ realphysname[IFNAMSIZ-1] = 0; ++ colon = strchr(realphysname, ':'); ++ if (colon) *colon = 0; ++ them = ipsec_dev_get(realphysname); ++#else /* CONFIG_IP_ALIAS */ ++ them = ipsec_dev_get(cf->cf_name); ++#endif /* CONFIG_IP_ALIAS */ ++ ++ if (them == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "physical device %s requested is null\n", ++ cf->cf_name); ++ return -ENXIO; ++ } ++ ++#if 0 ++ if (them->flags & IFF_UP) { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "physical device %s requested is not up.\n", ++ cf->cf_name); ++ return -ENXIO; ++ } ++#endif ++ ++ if (prv && prv->dev) { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "virtual device is already connected to %s.\n", ++ prv->dev->name ? prv->dev->name : "NULL"); ++ return -EBUSY; ++ } ++ return ipsec_mast_attach(dev, them); ++ ++ case IPSEC_DEL_DEV: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "calling ipsec_mast_detatch.\n"); ++ if (! prv->dev) { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "physical device not connected.\n"); ++ return -ENODEV; ++ } ++ return ipsec_mast_detach(dev); ++ ++ case IPSEC_CLR_DEV: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "calling ipsec_mast_clear.\n"); ++ return ipsec_mast_clear(); ++ ++ default: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_ioctl: " ++ "unknown command %d.\n", ++ cmd); ++ return -EOPNOTSUPP; ++ } ++} ++ ++int ++ipsec_mast_device_event(struct notifier_block *unused, unsigned long event, void *ptr) ++{ ++ struct net_device *dev = ptr; ++ struct net_device *ipsec_dev; ++ struct ipsecpriv *priv; ++ char name[9]; ++ int i; ++ ++ if (dev == NULL) { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "dev=NULL for event type %ld.\n", ++ event); ++ return(NOTIFY_DONE); ++ } ++ ++ /* check for loopback devices */ ++ if (dev && (dev->flags & IFF_LOOPBACK)) { ++ return(NOTIFY_DONE); ++ } ++ ++ switch (event) { ++ case NETDEV_DOWN: ++ /* look very carefully at the scope of these compiler ++ directives before changing anything... -- RGB */ ++ ++ case NETDEV_UNREGISTER: ++ switch (event) { ++ case NETDEV_DOWN: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_DOWN dev=%s flags=%x\n", ++ dev->name, ++ dev->flags); ++ if(strncmp(dev->name, "ipsec", strlen("ipsec")) == 0) { ++ printk(KERN_CRIT "IPSEC EVENT: KLIPS device %s shut down.\n", ++ dev->name); ++ } ++ break; ++ case NETDEV_UNREGISTER: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_UNREGISTER dev=%s flags=%x\n", ++ dev->name, ++ dev->flags); ++ break; ++ } ++ ++ /* find the attached physical device and detach it. */ ++ for(i = 0; i < IPSEC_NUM_IF; i++) { ++ sprintf(name, IPSEC_DEV_FORMAT, i); ++ ipsec_dev = ipsec_dev_get(name); ++ if(ipsec_dev) { ++ priv = (struct ipsecpriv *)(ipsec_dev->priv); ++ if(priv) { ++ ; ++ if(((struct net_device *)(priv->dev)) == dev) { ++ /* dev_close(ipsec_dev); */ ++ /* return */ ipsec_mast_detach(ipsec_dev); ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "device '%s' has been detached.\n", ++ ipsec_dev->name); ++ break; ++ } ++ } else { ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "device '%s' has no private data space!\n", ++ ipsec_dev->name); ++ } ++ } ++ } ++ break; ++ case NETDEV_UP: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_UP dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_REBOOT: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_REBOOT dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_CHANGE: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_CHANGE dev=%s flags=%x\n", ++ dev->name, ++ dev->flags); ++ break; ++ case NETDEV_REGISTER: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_REGISTER dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_CHANGEMTU: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_CHANGEMTU dev=%s to mtu=%d\n", ++ dev->name, ++ dev->mtu); ++ break; ++ case NETDEV_CHANGEADDR: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_CHANGEADDR dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_GOING_DOWN: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_GOING_DOWN dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_CHANGENAME: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "NETDEV_CHANGENAME dev=%s\n", ++ dev->name); ++ break; ++ default: ++ KLIPS_PRINT(debug_mast & DB_MAST_INIT, ++ "klips_debug:ipsec_mast_device_event: " ++ "event type %ld unrecognised for dev=%s\n", ++ event, ++ dev->name); ++ break; ++ } ++ return NOTIFY_DONE; ++} ++ ++/* ++ * Called when an ipsec mast device is initialized. ++ * The ipsec mast device structure is passed to us. ++ */ ++ ++int ++ipsec_mast_init(struct net_device *dev) ++{ ++ int i; ++ ++ KLIPS_PRINT(debug_mast, ++ "klips_debug:ipsec_mast_init: " ++ "allocating %lu bytes initialising device: %s\n", ++ (unsigned long) sizeof(struct ipsecpriv), ++ dev->name ? dev->name : "NULL"); ++ ++ /* Add our mast functions to the device */ ++ dev->open = ipsec_mast_open; ++ dev->stop = ipsec_mast_close; ++ dev->hard_start_xmit = ipsec_mast_start_xmit; ++ dev->get_stats = ipsec_mast_get_stats; ++ ++ dev->priv = kmalloc(sizeof(struct ipsecpriv), GFP_KERNEL); ++ if (dev->priv == NULL) ++ return -ENOMEM; ++ memset((caddr_t)(dev->priv), 0, sizeof(struct ipsecpriv)); ++ ++ for(i = 0; i < sizeof(zeroes); i++) { ++ ((__u8*)(zeroes))[i] = 0; ++ } ++ ++ dev->set_multicast_list = NULL; ++ dev->do_ioctl = ipsec_mast_ioctl; ++ dev->hard_header = NULL; ++ dev->rebuild_header = NULL; ++ dev->set_mac_address = NULL; ++ dev->header_cache_update= NULL; ++ dev->neigh_setup = ipsec_mast_neigh_setup_dev; ++ dev->hard_header_len = 0; ++ dev->mtu = 0; ++ dev->addr_len = 0; ++ dev->type = ARPHRD_VOID; /* ARPHRD_MAST; */ /* ARPHRD_ETHER; */ ++ dev->tx_queue_len = 10; /* Small queue */ ++ memset((caddr_t)(dev->broadcast),0xFF, ETH_ALEN); /* what if this is not attached to ethernet? */ ++ ++ /* New-style flags. */ ++ dev->flags = IFF_NOARP /* 0 */ /* Petr Novak */; ++ dev_init_buffers(dev); ++ ++ /* We're done. Have I forgotten anything? */ ++ return 0; ++} ++ ++/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ ++/* Module specific interface (but it links with the rest of IPSEC) */ ++/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ ++ ++int ++ipsec_mast_probe(struct net_device *dev) ++{ ++ ipsec_mast_init(dev); ++ return 0; ++} ++ ++int ++ipsec_mast_init_devices(void) ++{ ++ return 0; ++} ++ ++/* void */ ++int ++ipsec_mast_cleanup_devices(void) ++{ ++ int error = 0; ++ int i; ++ char name[10]; ++ struct net_device *dev_mast; ++ ++ for(i = 0; i < ipsec_mastdevice_count; i++) { ++ sprintf(name, MAST_DEV_FORMAT, i); ++ if((dev_mast = ipsec_dev_get(name)) == NULL) { ++ break; ++ } ++ unregister_netdev(dev_mast); ++ kfree(dev_mast->priv); ++ dev_mast->priv=NULL; ++ } ++ return error; ++} ++ ++/* ++ * $Log: ipsec_mast.c,v $ ++ * Revision 1.7.2.1 2006-10-06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.7 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.6 2004/12/03 21:25:57 mcr ++ * compile time fixes for running on 2.6. ++ * still experimental. ++ * ++ * Revision 1.5 2004/08/03 18:19:08 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.4 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.3 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.2.4.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.2 2003/06/22 20:06:17 mcr ++ * refactored mast code still had lots of ipsecX junk in it. ++ * ++ * Revision 1.1 2003/02/12 19:31:12 rgb ++ * Refactored from ipsec_tunnel.c ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_md5c.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,453 @@ ++/* ++ * RCSID $Id: ipsec_md5c.c,v 1.10 2005-04-15 01:25:57 mcr Exp $ ++ */ ++ ++/* ++ * The rest of the code is derived from MD5C.C by RSADSI. Minor cosmetic ++ * changes to accomodate it in the kernel by ji. ++ */ ++ ++#include ++#include ++ ++#include "openswan/ipsec_md5h.h" ++ ++/* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm ++ */ ++ ++/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All ++rights reserved. ++ ++License to copy and use this software is granted provided that it ++is identified as the "RSA Data Security, Inc. MD5 Message-Digest ++Algorithm" in all material mentioning or referencing this software ++or this function. ++ ++License is also granted to make and use derivative works provided ++that such works are identified as "derived from the RSA Data ++Security, Inc. MD5 Message-Digest Algorithm" in all material ++mentioning or referencing the derived work. ++ ++RSA Data Security, Inc. makes no representations concerning either ++the merchantability of this software or the suitability of this ++software for any particular purpose. It is provided "as is" ++without express or implied warranty of any kind. ++ ++These notices must be retained in any copies of any part of this ++documentation and/or software. ++ */ ++ ++/* ++ * Additions by JI ++ * ++ * HAVEMEMCOPY is defined if mem* routines are available ++ * ++ * HAVEHTON is defined if htons() and htonl() can be used ++ * for big/little endian conversions ++ * ++ */ ++ ++#define HAVEMEMCOPY ++#ifdef __LITTLE_ENDIAN ++#define LITTLENDIAN ++#endif ++#ifdef __BIG_ENDIAN ++#define BIGENDIAN ++#endif ++ ++/* Constants for MD5Transform routine. ++ */ ++ ++#define S11 7 ++#define S12 12 ++#define S13 17 ++#define S14 22 ++#define S21 5 ++#define S22 9 ++#define S23 14 ++#define S24 20 ++#define S31 4 ++#define S32 11 ++#define S33 16 ++#define S34 23 ++#define S41 6 ++#define S42 10 ++#define S43 15 ++#define S44 21 ++ ++static void MD5Transform PROTO_LIST ((UINT4 [4], unsigned char [64])); ++ ++#ifdef LITTLEENDIAN ++#define Encode MD5_memcpy ++#define Decode MD5_memcpy ++#else ++static void Encode PROTO_LIST ++ ((unsigned char *, UINT4 *, unsigned int)); ++static void Decode PROTO_LIST ++ ((UINT4 *, unsigned char *, unsigned int)); ++#endif ++ ++#ifdef HAVEMEMCOPY ++/* no need to include here; defines these */ ++#define MD5_memcpy memcpy ++#define MD5_memset memset ++#else ++#ifdef HAVEBCOPY ++#define MD5_memcpy(_a,_b,_c) bcopy((_b),(_a),(_c)) ++#define MD5_memset(_a,_b,_c) bzero((_a),(_c)) ++#else ++static void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int)); ++static void MD5_memset PROTO_LIST ((POINTER, int, unsigned int)); ++#endif ++#endif ++static unsigned char PADDING[64] = { ++ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ++}; ++ ++/* F, G, H and I are basic MD5 functions. ++ */ ++#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) ++#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) ++#define H(x, y, z) ((x) ^ (y) ^ (z)) ++#define I(x, y, z) ((y) ^ ((x) | (~z))) ++ ++/* ROTATE_LEFT rotates x left n bits. ++ */ ++#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) ++ ++/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. ++Rotation is separate from addition to prevent recomputation. ++ */ ++#define FF(a, b, c, d, x, s, ac) { \ ++ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \ ++ (a) = ROTATE_LEFT ((a), (s)); \ ++ (a) += (b); \ ++ } ++#define GG(a, b, c, d, x, s, ac) { \ ++ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \ ++ (a) = ROTATE_LEFT ((a), (s)); \ ++ (a) += (b); \ ++ } ++#define HH(a, b, c, d, x, s, ac) { \ ++ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \ ++ (a) = ROTATE_LEFT ((a), (s)); \ ++ (a) += (b); \ ++ } ++#define II(a, b, c, d, x, s, ac) { \ ++ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \ ++ (a) = ROTATE_LEFT ((a), (s)); \ ++ (a) += (b); \ ++ } ++ ++/* ++ * MD5 initialization. Begins an MD5 operation, writing a new context. ++ */ ++void osMD5Init(void *vcontext) ++{ ++ MD5_CTX *context = vcontext; ++ ++ context->count[0] = context->count[1] = 0; ++ /* Load magic initialization constants.*/ ++ context->state[0] = 0x67452301; ++ context->state[1] = 0xefcdab89; ++ context->state[2] = 0x98badcfe; ++ context->state[3] = 0x10325476; ++} ++ ++/* MD5 block update operation. Continues an MD5 message-digest ++ operation, processing another message block, and updating the ++ context. ++ */ ++void osMD5Update (vcontext, input, inputLen) ++ void *vcontext; ++ unsigned char *input; /* input block */ ++ __u32 inputLen; /* length of input block */ ++{ ++ MD5_CTX *context = vcontext; ++ __u32 i; ++ unsigned int index, partLen; ++ ++ /* Compute number of bytes mod 64 */ ++ index = (unsigned int)((context->count[0] >> 3) & 0x3F); ++ ++ /* Update number of bits */ ++ if ((context->count[0] += ((UINT4)inputLen << 3)) ++ < ((UINT4)inputLen << 3)) ++ context->count[1]++; ++ context->count[1] += ((UINT4)inputLen >> 29); ++ ++ partLen = 64 - index; ++ ++ /* Transform as many times as possible. ++*/ ++ if (inputLen >= partLen) { ++ MD5_memcpy ++ ((POINTER)&context->buffer[index], (POINTER)input, partLen); ++ MD5Transform (context->state, context->buffer); ++ ++ for (i = partLen; i + 63 < inputLen; i += 64) ++ MD5Transform (context->state, &input[i]); ++ ++ index = 0; ++ } ++ else ++ i = 0; ++ ++ /* Buffer remaining input */ ++ MD5_memcpy ++ ((POINTER)&context->buffer[index], (POINTER)&input[i], ++ inputLen-i); ++} ++ ++/* MD5 finalization. Ends an MD5 message-digest operation, writing the ++ the message digest and zeroizing the context. ++ */ ++void osMD5Final (digest, vcontext) ++unsigned char digest[16]; /* message digest */ ++void *vcontext; /* context */ ++{ ++ MD5_CTX *context = vcontext; ++ unsigned char bits[8]; ++ unsigned int index, padLen; ++ ++ /* Save number of bits */ ++ Encode (bits, context->count, 8); ++ ++ /* Pad out to 56 mod 64. ++*/ ++ index = (unsigned int)((context->count[0] >> 3) & 0x3f); ++ padLen = (index < 56) ? (56 - index) : (120 - index); ++ osMD5Update (context, PADDING, padLen); ++ ++ /* Append length (before padding) */ ++ osMD5Update (context, bits, 8); ++ ++ if (digest != NULL) /* Bill Simpson's padding */ ++ { ++ /* store state in digest */ ++ Encode (digest, context->state, 16); ++ ++ /* Zeroize sensitive information. ++ */ ++ MD5_memset ((POINTER)context, 0, sizeof (*context)); ++ } ++} ++ ++/* MD5 basic transformation. Transforms state based on block. ++ */ ++static void MD5Transform (state, block) ++UINT4 state[4]; ++unsigned char block[64]; ++{ ++ UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16]; ++ ++ Decode (x, block, 64); ++ ++ /* Round 1 */ ++ FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ ++ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ ++ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ ++ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ ++ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ ++ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ ++ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ ++ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ ++ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ ++ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ ++ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ ++ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ ++ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ ++ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ ++ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ ++ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ ++ ++ /* Round 2 */ ++ GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ ++ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ ++ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ ++ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ ++ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ ++ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ ++ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ ++ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ ++ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ ++ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ ++ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ ++ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ ++ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ ++ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ ++ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ ++ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ ++ ++ /* Round 3 */ ++ HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ ++ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ ++ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ ++ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ ++ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ ++ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ ++ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ ++ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ ++ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ ++ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ ++ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ ++ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ ++ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ ++ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ ++ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ ++ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ ++ ++ /* Round 4 */ ++ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ ++ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ ++ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ ++ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ ++ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ ++ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ ++ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ ++ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ ++ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ ++ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ ++ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ ++ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ ++ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ ++ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ ++ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ ++ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ ++ ++ state[0] += a; ++ state[1] += b; ++ state[2] += c; ++ state[3] += d; ++ ++ /* Zeroize sensitive information. ++*/ ++ MD5_memset ((POINTER)x, 0, sizeof (x)); ++} ++ ++#ifndef LITTLEENDIAN ++ ++/* Encodes input (UINT4) into output (unsigned char). Assumes len is ++ a multiple of 4. ++ */ ++static void Encode (output, input, len) ++unsigned char *output; ++UINT4 *input; ++unsigned int len; ++{ ++ unsigned int i, j; ++ ++ for (i = 0, j = 0; j < len; i++, j += 4) { ++ output[j] = (unsigned char)(input[i] & 0xff); ++ output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); ++ output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); ++ output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); ++ } ++} ++ ++/* Decodes input (unsigned char) into output (UINT4). Assumes len is ++ a multiple of 4. ++ */ ++static void Decode (output, input, len) ++UINT4 *output; ++unsigned char *input; ++unsigned int len; ++{ ++ unsigned int i, j; ++ ++ for (i = 0, j = 0; j < len; i++, j += 4) ++ output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) | ++ (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24); ++} ++ ++#endif ++ ++#ifndef HAVEMEMCOPY ++#ifndef HAVEBCOPY ++/* Note: Replace "for loop" with standard memcpy if possible. ++ */ ++ ++static void MD5_memcpy (output, input, len) ++POINTER output; ++POINTER input; ++unsigned int len; ++{ ++ unsigned int i; ++ ++ for (i = 0; i < len; i++) ++ ++ output[i] = input[i]; ++} ++ ++/* Note: Replace "for loop" with standard memset if possible. ++ */ ++ ++static void MD5_memset (output, value, len) ++POINTER output; ++int value; ++unsigned int len; ++{ ++ unsigned int i; ++ ++ for (i = 0; i < len; i++) ++ ((char *)output)[i] = (char)value; ++} ++#endif ++#endif ++ ++/* ++ * $Log: ipsec_md5c.c,v $ ++ * Revision 1.10 2005-04-15 01:25:57 mcr ++ * minor fix to comments. ++ * ++ * Revision 1.9 2004/09/08 17:21:36 ken ++ * Rename MD5* -> osMD5 functions to prevent clashes with other symbols exported by kernel modules (CIFS in 2.6 initiated this) ++ * ++ * Revision 1.8 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.7 2002/09/10 01:45:14 mcr ++ * changed type of MD5_CTX and SHA1_CTX to void * so that ++ * the function prototypes would match, and could be placed ++ * into a pointer to a function. ++ * ++ * Revision 1.6 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.5 2002/04/24 07:36:28 mcr ++ * Moved from ./klips/net/ipsec/ipsec_md5c.c,v ++ * ++ * Revision 1.4 1999/12/13 13:59:12 rgb ++ * Quick fix to argument size to Update bugs. ++ * ++ * Revision 1.3 1999/05/21 18:09:28 henry ++ * unnecessary include causes trouble in 2.2 ++ * ++ * Revision 1.2 1999/04/06 04:54:26 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.1 1998/06/18 21:27:48 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.2 1998/04/23 20:54:02 rgb ++ * Fixed md5 and sha1 include file nesting issues, to be cleaned up when ++ * verified. ++ * ++ * Revision 1.1 1998/04/09 03:06:08 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:04 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.3 1996/11/20 14:48:53 ji ++ * Release update only. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_proc.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1206 @@ ++/* ++ * @(#) /proc file system interface code. ++ * ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs ++ * 2001 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * Split out from ipsec_init.c version 1.70. ++ */ ++ ++char ipsec_proc_c_version[] = "RCSID $Id: ipsec_proc.c,v 1.39.2.7 2007-11-06 18:24:44 paul Exp $"; ++ ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#define __NO_VERSION__ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ++#include ++#endif ++#include ++#include /* printk() */ ++#include /* struct iphdr */ ++ ++#include "openswan/ipsec_kversion.h" ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct sockaddr_in */ ++#include ++#include /* copy_from_user */ ++#include ++#ifdef SPINLOCK ++#ifdef SPINLOCK_23 ++#include /* *lock* */ ++#else /* SPINLOCK_23 */ ++#include /* *lock* */ ++#endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++#ifdef CONFIG_PROC_FS ++#include ++#endif /* CONFIG_PROC_FS */ ++#ifdef NETLINK_SOCK ++#include ++#else ++#include ++#endif ++ ++#include "openswan/radij.h" ++ ++#include "openswan/ipsec_life.h" ++#include "openswan/ipsec_stats.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_xmit.h" ++ ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++#include "openswan/ipsec_kern24.h" ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++#include "openswan/ipcomp.h" ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#include "openswan/ipsec_proto.h" ++ ++#include ++#include ++ ++#ifdef CONFIG_PROC_FS ++ ++#ifdef IPSEC_PROC_SUBDIRS ++static struct proc_dir_entry *proc_net_ipsec_dir = NULL; ++static struct proc_dir_entry *proc_eroute_dir = NULL; ++static struct proc_dir_entry *proc_spi_dir = NULL; ++static struct proc_dir_entry *proc_spigrp_dir = NULL; ++static struct proc_dir_entry *proc_birth_dir = NULL; ++static struct proc_dir_entry *proc_stats_dir = NULL; ++#endif ++ ++struct ipsec_birth_reply ipsec_ipv4_birth_packet; ++struct ipsec_birth_reply ipsec_ipv6_birth_packet; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_esp = 0; ++int debug_ah = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++#define DECREMENT_UNSIGNED(X, amount) ((amount < (X)) ? (X)-amount : 0) ++ ++#ifdef CONFIG_KLIPS_ALG ++extern int ipsec_xform_get_info(char *buffer, char **start, ++ off_t offset, int length IPSEC_PROC_LAST_ARG); ++#endif /* CONFIG_KLIPS_ALG */ ++ ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_eroute_get_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length IPSEC_PROC_LAST_ARG) ++{ ++ struct wsbuf w = {buffer, length, offset, 0, 0}; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (debug_radij & DB_RJ_DUMPTREES) ++ rj_dumptrees(); /* XXXXXXXXX */ ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_eroute_get_info: " ++ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n", ++ buffer, ++ *start, ++ (int)offset, ++ length); ++ ++ spin_lock_bh(&eroute_lock); ++ ++ rj_walktree(rnh, ipsec_rj_walker_procprint, &w); ++/* rj_walktree(mask_rjhead, ipsec_rj_walker_procprint, &w); */ ++ ++ spin_unlock_bh(&eroute_lock); ++ ++ *start = buffer + (offset - w.begin); /* Start of wanted data */ ++ return w.len - (offset - w.begin); ++} ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_spi_get_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length IPSEC_PROC_LAST_ARG) ++{ ++ const int max_content = length > 0? length-1 : 0; ++ int len = 0; ++ off_t begin = 0; ++ int i; ++ struct ipsec_sa *sa_p; ++ char sa[SATOT_BUF]; ++ char buf_s[SUBNETTOA_BUF]; ++ char buf_d[SUBNETTOA_BUF]; ++ size_t sa_len; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_spi_get_info: " ++ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n", ++ buffer, ++ *start, ++ (int)offset, ++ length); ++ ++ spin_lock_bh(&tdb_lock); ++ ++ for (i = 0; i < SADB_HASHMOD; i++) { ++ for (sa_p = ipsec_sadb_hash[i]; ++ sa_p; ++ sa_p = sa_p->ips_hnext) { ++ atomic_inc(&sa_p->ips_refcount); ++ sa_len = satot(&sa_p->ips_said, 'x', sa, sizeof(sa)); ++ len += ipsec_snprintf(buffer+len, length-len, "%s ", ++ sa_len ? sa : " (error)"); ++ ++ len += ipsec_snprintf(buffer+len, length-len, "%s%s%s", ++ IPS_XFORM_NAME(sa_p)); ++ ++ len += ipsec_snprintf(buffer+len, length-len, ": dir=%s", ++ (sa_p->ips_flags & EMT_INBOUND) ? ++ "in " : "out"); ++ ++ if(sa_p->ips_addr_s) { ++ addrtoa(((struct sockaddr_in*)(sa_p->ips_addr_s))->sin_addr, ++ 0, buf_s, sizeof(buf_s)); ++ len += ipsec_snprintf(buffer+len, length-len, " src=%s", ++ buf_s); ++ } ++ ++ if((sa_p->ips_said.proto == IPPROTO_IPIP) ++ && (sa_p->ips_flags & SADB_X_SAFLAGS_INFLOW)) { ++ subnettoa(sa_p->ips_flow_s.u.v4.sin_addr, ++ sa_p->ips_mask_s.u.v4.sin_addr, ++ 0, ++ buf_s, ++ sizeof(buf_s)); ++ ++ subnettoa(sa_p->ips_flow_d.u.v4.sin_addr, ++ sa_p->ips_mask_d.u.v4.sin_addr, ++ 0, ++ buf_d, ++ sizeof(buf_d)); ++ ++ len += ipsec_snprintf(buffer+len, length-len, " policy=%s->%s", ++ buf_s, buf_d); ++ } ++ ++ if(sa_p->ips_iv_bits) { ++ int j; ++ len += ipsec_snprintf(buffer+len, length-len, " iv_bits=%dbits iv=0x", ++ sa_p->ips_iv_bits); ++ ++ for(j = 0; j < sa_p->ips_iv_bits / 8; j++) { ++ len += ipsec_snprintf(buffer+len, length-len, "%02x", ++ (__u32)((__u8*)(sa_p->ips_iv))[j]); ++ } ++ } ++ ++ if(sa_p->ips_encalg || sa_p->ips_authalg) { ++ if(sa_p->ips_replaywin) { ++ len += ipsec_snprintf(buffer+len, length-len, " ooowin=%d", ++ sa_p->ips_replaywin); ++ } ++ if(sa_p->ips_errs.ips_replaywin_errs) { ++ len += ipsec_snprintf(buffer+len, length-len, " ooo_errs=%d", ++ sa_p->ips_errs.ips_replaywin_errs); ++ } ++ if(sa_p->ips_replaywin_lastseq) { ++ len += ipsec_snprintf(buffer+len, length-len, " seq=%d", ++ sa_p->ips_replaywin_lastseq); ++ } ++ if(sa_p->ips_replaywin_bitmap) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) ++ len += ipsec_snprintf(buffer+len, length-len, " bit=0x%Lx", ++ sa_p->ips_replaywin_bitmap); ++#else ++ len += ipsec_snprintf(buffer+len, length-len, " bit=0x%x%08x", ++ (__u32)(sa_p->ips_replaywin_bitmap >> 32), ++ (__u32)sa_p->ips_replaywin_bitmap); ++#endif ++ } ++ if(sa_p->ips_replaywin_maxdiff) { ++ len += ipsec_snprintf(buffer+len, length-len, " max_seq_diff=%d", ++ sa_p->ips_replaywin_maxdiff); ++ } ++ } ++ if(sa_p->ips_flags & ~EMT_INBOUND) { ++ len += ipsec_snprintf(buffer+len, length-len, " flags=0x%x", ++ sa_p->ips_flags & ~EMT_INBOUND); ++ len += ipsec_snprintf(buffer+len, length-len, "<"); ++ /* flag printing goes here */ ++ len += ipsec_snprintf(buffer+len, length-len, ">"); ++ } ++ if(sa_p->ips_auth_bits) { ++ len += ipsec_snprintf(buffer+len, length-len, " alen=%d", ++ sa_p->ips_auth_bits); ++ } ++ if(sa_p->ips_key_bits_a) { ++ len += ipsec_snprintf(buffer+len, length-len, " aklen=%d", ++ sa_p->ips_key_bits_a); ++ } ++ if(sa_p->ips_errs.ips_auth_errs) { ++ len += ipsec_snprintf(buffer+len, length-len, " auth_errs=%d", ++ sa_p->ips_errs.ips_auth_errs); ++ } ++ if(sa_p->ips_key_bits_e) { ++ len += ipsec_snprintf(buffer+len, length-len, " eklen=%d", ++ sa_p->ips_key_bits_e); ++ } ++ if(sa_p->ips_errs.ips_encsize_errs) { ++ len += ipsec_snprintf(buffer+len, length-len, " encr_size_errs=%d", ++ sa_p->ips_errs.ips_encsize_errs); ++ } ++ if(sa_p->ips_errs.ips_encpad_errs) { ++ len += ipsec_snprintf(buffer+len, length-len, " encr_pad_errs=%d", ++ sa_p->ips_errs.ips_encpad_errs); ++ } ++ ++ len += ipsec_snprintf(buffer+len, length-len, " life(c,s,h)="); ++ ++ len += ipsec_lifetime_format(buffer + len, ++ length - len, ++ "alloc", ++ ipsec_life_countbased, ++ &sa_p->ips_life.ipl_allocations); ++ ++ len += ipsec_lifetime_format(buffer + len, ++ length - len, ++ "bytes", ++ ipsec_life_countbased, ++ &sa_p->ips_life.ipl_bytes); ++ ++ len += ipsec_lifetime_format(buffer + len, ++ length - len, ++ "addtime", ++ ipsec_life_timebased, ++ &sa_p->ips_life.ipl_addtime); ++ ++ len += ipsec_lifetime_format(buffer + len, ++ length - len, ++ "usetime", ++ ipsec_life_timebased, ++ &sa_p->ips_life.ipl_usetime); ++ ++ len += ipsec_lifetime_format(buffer + len, ++ length - len, ++ "packets", ++ ipsec_life_countbased, ++ &sa_p->ips_life.ipl_packets); ++ ++ if(sa_p->ips_life.ipl_usetime.ipl_last) { /* XXX-MCR should be last? */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) ++ len += ipsec_snprintf(buffer+len, length-len, " idle=%Ld", ++ jiffies / HZ - sa_p->ips_life.ipl_usetime.ipl_last); ++#else ++ len += ipsec_snprintf(buffer+len, length-len, " idle=%lu", ++ jiffies / HZ - (unsigned long)sa_p->ips_life.ipl_usetime.ipl_last); ++#endif ++ } ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++ if(sa_p->ips_said.proto == IPPROTO_COMP && ++ (sa_p->ips_comp_ratio_dbytes || ++ sa_p->ips_comp_ratio_cbytes)) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) ++ len += ipsec_snprintf(buffer+len, length-len, " ratio=%Ld:%Ld", ++ sa_p->ips_comp_ratio_dbytes, ++ sa_p->ips_comp_ratio_cbytes); ++#else ++ len += ipsec_snprintf(buffer+len, length-len, " ratio=%lu:%lu", ++ (unsigned long)sa_p->ips_comp_ratio_dbytes, ++ (unsigned long)sa_p->ips_comp_ratio_cbytes); ++#endif ++ } ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ { ++ char *natttype_name; ++ ++ switch(sa_p->ips_natt_type) ++ { ++ case 0: ++ natttype_name="none"; ++ break; ++ case ESPINUDP_WITH_NON_IKE: ++ natttype_name="nonike"; ++ break; ++ case ESPINUDP_WITH_NON_ESP: ++ natttype_name="nonesp"; ++ break; ++ default: ++ natttype_name = "unknown"; ++ break; ++ } ++ ++ len += ipsec_snprintf(buffer + len, length-len, " natencap=%s", ++ natttype_name); ++ ++ len += ipsec_snprintf(buffer + len, length-len, " natsport=%d", ++ sa_p->ips_natt_sport); ++ ++ len += ipsec_snprintf(buffer + len,length-len, " natdport=%d", ++ sa_p->ips_natt_dport); ++ } ++#else ++ len += ipsec_snprintf(buffer + len, length-len, " natencap=na"); ++#endif /* CONFIG_IPSEC_NAT_TRAVERSAL */ ++ ++ len += ipsec_snprintf(buffer + len,length-len, " refcount=%d", ++ atomic_read(&sa_p->ips_refcount)); ++ ++ len += ipsec_snprintf(buffer+len, length-len, " ref=%d", ++ sa_p->ips_ref); ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_xform) { ++ len += ipsec_snprintf(buffer+len, length-len, " reftable=%lu refentry=%lu", ++ (unsigned long)IPsecSAref2table(sa_p->ips_ref), ++ (unsigned long)IPsecSAref2entry(sa_p->ips_ref)); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ len += ipsec_snprintf(buffer+len, length-len, "\n"); ++ ++ atomic_dec(&sa_p->ips_refcount); ++ ++ if (len >= max_content) { ++ /* we've done all that can fit -- stop loops */ ++ len = max_content; /* truncate crap */ ++ goto done_spi_i; ++ } else { ++ const off_t pos = begin + len; /* file position of end of what we've generated */ ++ ++ if (pos <= offset) { ++ /* all is before first interesting character: ++ * discard, but note where we are. ++ */ ++ len = 0; ++ begin = pos; ++ } ++ } ++ } ++ } ++ ++done_spi_i: ++ spin_unlock_bh(&tdb_lock); ++ ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ return len - (offset - begin); ++} ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_spigrp_get_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length IPSEC_PROC_LAST_ARG) ++{ ++ /* Limit of useful snprintf output */ ++ const int max_content = length > 0? length-1 : 0; ++ ++ int len = 0; ++ off_t begin = 0; ++ int i; ++ struct ipsec_sa *sa_p, *sa_p2; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_spigrp_get_info: " ++ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n", ++ buffer, ++ *start, ++ (int)offset, ++ length); ++ ++ spin_lock_bh(&tdb_lock); ++ ++ for (i = 0; i < SADB_HASHMOD; i++) { ++ for (sa_p = ipsec_sadb_hash[i]; ++ sa_p != NULL; ++ sa_p = sa_p->ips_hnext) ++ { ++ atomic_inc(&sa_p->ips_refcount); ++ if(sa_p->ips_inext == NULL) { ++ sa_p2 = sa_p; ++ while(sa_p2 != NULL) { ++ atomic_inc(&sa_p2->ips_refcount); ++ sa_len = satot(&sa_p2->ips_said, ++ 'x', sa, sizeof(sa)); ++ ++ len += ipsec_snprintf(buffer+len, length-len, "%s ", ++ sa_len ? sa : " (error)"); ++ atomic_dec(&sa_p2->ips_refcount); ++ sa_p2 = sa_p2->ips_onext; ++ } ++ len += ipsec_snprintf(buffer+len, length-len, "\n"); ++ } ++ ++ atomic_dec(&sa_p->ips_refcount); ++ ++ if (len >= max_content) { ++ /* we've done all that can fit -- stop loops */ ++ len = max_content; /* truncate crap */ ++ goto done_spigrp_i; ++ } else { ++ const off_t pos = begin + len; ++ ++ if (pos <= offset) { ++ /* all is before first interesting character: ++ * discard, but note where we are. ++ */ ++ len = 0; ++ begin = pos; ++ } ++ } ++ } ++ } ++ ++done_spigrp_i: ++ spin_unlock_bh(&tdb_lock); ++ ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ return len - (offset - begin); ++} ++ ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_tncfg_get_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length IPSEC_PROC_LAST_ARG) ++{ ++ /* limit of useful snprintf output */ ++ const int max_content = length > 0? length-1 : 0; ++ int len = 0; ++ off_t begin = 0; ++ int i; ++ char name[9]; ++ struct net_device *dev, *privdev; ++ struct ipsecpriv *priv; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_tncfg_get_info: " ++ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n", ++ buffer, ++ *start, ++ (int)offset, ++ length); ++ ++ for(i = 0; i < IPSEC_NUM_IF; i++) { ++ ipsec_snprintf(name, (ssize_t) sizeof(name), IPSEC_DEV_FORMAT, i); ++ dev = __ipsec_dev_get(name); ++ if(dev) { ++ priv = (struct ipsecpriv *)(dev->priv); ++ len += ipsec_snprintf(buffer+len, length-len, "%s", ++ dev->name); ++ if(priv) { ++ privdev = (struct net_device *)(priv->dev); ++ len += ipsec_snprintf(buffer+len, length-len, " -> %s", ++ privdev ? privdev->name : "NULL"); ++ len += ipsec_snprintf(buffer+len, length-len, " mtu=%d(%d) -> %d", ++ dev->mtu, ++ priv->mtu, ++ privdev ? privdev->mtu : 0); ++ } else { ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_tncfg_get_info: device '%s' has no private data space!\n", ++ dev->name); ++ } ++ len += ipsec_snprintf(buffer+len, length-len, "\n"); ++ ++ if (len >= max_content) { ++ /* we've done all that can fit -- stop loop */ ++ len = max_content; /* truncate crap */ ++ break; ++ } else { ++ const off_t pos = begin + len; ++ if (pos <= offset) { ++ len = 0; ++ begin = pos; ++ } ++ } ++ } ++ } ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ len -= (offset - begin); /* Start slop */ ++ if (len > length) ++ len = length; ++ return len; ++} ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_version_get_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length IPSEC_PROC_LAST_ARG) ++{ ++ int len = 0; ++ off_t begin = 0; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_version_get_info: " ++ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n", ++ buffer, ++ *start, ++ (int)offset, ++ length); ++ ++ len += ipsec_snprintf(buffer + len,length-len, "Openswan version: %s\n", ++ ipsec_version_code()); ++#if 0 ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_version_get_info: " ++ "ipsec_init version: %s\n", ++ ipsec_init_c_version); ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_version_get_info: " ++ "ipsec_tunnel version: %s\n", ++ ipsec_tunnel_c_version); ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_version_get_info: " ++ "ipsec_netlink version: %s\n", ++ ipsec_netlink_c_version); ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_version_get_info: " ++ "radij_c_version: %s\n", ++ radij_c_version); ++#endif ++ ++ ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ len -= (offset - begin); /* Start slop */ ++ if (len > length) ++ len = length; ++ return len; ++} ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++unsigned int natt_available = 1; ++#else ++unsigned int natt_available = 0; ++#endif ++#ifdef module_param ++module_param(natt_available, int, 0444); ++#else ++MODULE_PARM("natt_available","i"); ++#endif ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_natt_get_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length IPSEC_PROC_LAST_ARG) ++{ ++ int len = 0; ++ off_t begin = 0; ++ ++ len += ipsec_snprintf(buffer + len, ++ length-len, "%d\n", ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ 1 ++#else ++ 0 ++#endif ++ ); ++ ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ len -= (offset - begin); /* Start slop */ ++ if (len > length) ++ len = length; ++ return len; ++} ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_birth_info(char *page, ++ char **start, ++ off_t offset, ++ int count, ++ int *eof, ++ void *data) ++{ ++ struct ipsec_birth_reply *ibr = (struct ipsec_birth_reply *)data; ++ int len; ++ ++ if(offset >= ibr->packet_template_len) { ++ if(eof) { ++ *eof=1; ++ } ++ return 0; ++ } ++ ++ len = ibr->packet_template_len; ++ len -= offset; ++ if (len > count) ++ len = count; ++ ++ memcpy(page + offset, ibr->packet_template+offset, len); ++ ++ return len; ++} ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_birth_set(struct file *file, const char *buffer, ++ unsigned long count, void *data) ++{ ++ struct ipsec_birth_reply *ibr = (struct ipsec_birth_reply *)data; ++ int len; ++ ++ KLIPS_INC_USE; ++ if(count > IPSEC_BIRTH_TEMPLATE_MAXLEN) { ++ len = IPSEC_BIRTH_TEMPLATE_MAXLEN; ++ } else { ++ len = count; ++ } ++ ++ if(copy_from_user(ibr->packet_template, buffer, len)) { ++ KLIPS_DEC_USE; ++ return -EFAULT; ++ } ++ ibr->packet_template_len = len; ++ ++ KLIPS_DEC_USE; ++ ++ return len; ++} ++ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_klipsdebug_get_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length IPSEC_PROC_LAST_ARG) ++{ ++ int len = 0; ++ off_t begin = 0; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS, ++ "klips_debug:ipsec_klipsdebug_get_info: " ++ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n", ++ buffer, ++ *start, ++ (int)offset, ++ length); ++ ++ len += ipsec_snprintf(buffer+len, length-len, "debug_tunnel=%08x.\n", debug_tunnel); ++ len += ipsec_snprintf(buffer+len, length-len, "debug_xform=%08x.\n", debug_xform); ++ len += ipsec_snprintf(buffer+len, length-len, "debug_eroute=%08x.\n", debug_eroute); ++ len += ipsec_snprintf(buffer+len, length-len, "debug_spi=%08x.\n", debug_spi); ++ len += ipsec_snprintf(buffer+len, length-len, "debug_radij=%08x.\n", debug_radij); ++ len += ipsec_snprintf(buffer+len, length-len, "debug_esp=%08x.\n", debug_esp); ++ len += ipsec_snprintf(buffer+len, length-len, "debug_ah=%08x.\n", debug_ah); ++ len += ipsec_snprintf(buffer+len, length-len, "debug_rcv=%08x.\n", debug_rcv); ++ len += ipsec_snprintf(buffer+len, length-len, "debug_pfkey=%08x.\n", debug_pfkey); ++ ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ len -= (offset - begin); /* Start slop */ ++ if (len > length) ++ len = length; ++ return len; ++} ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++IPSEC_PROCFS_DEBUG_NO_STATIC ++int ++ipsec_stats_get_int_info(char *buffer, ++ char **start, ++ off_t offset, ++ int length, ++ int *eof, ++ void *data) ++{ ++ ++ const int max_content = length > 0? length-1 : 0; ++ int len = 0; ++ int *thing; ++ ++ thing = (int *)data; ++ ++ len = ipsec_snprintf(buffer+len, length-len, "%08x\n", *thing); ++ ++ if (len >= max_content) ++ len = max_content; /* truncate crap */ ++ ++ *start = buffer + offset; /* Start of wanted data */ ++ return len > offset? len - offset : 0; ++ ++} ++ ++#ifndef PROC_FS_2325 ++struct proc_dir_entry ipsec_eroute = ++{ ++ 0, ++ 12, "ipsec_eroute", ++ S_IFREG | S_IRUGO, 1, 0, 0, 0, ++ &proc_net_inode_operations, ++ ipsec_eroute_get_info, ++ NULL, NULL, NULL, NULL, NULL ++}; ++ ++struct proc_dir_entry ipsec_spi = ++{ ++ 0, ++ 9, "ipsec_spi", ++ S_IFREG | S_IRUGO, 1, 0, 0, 0, ++ &proc_net_inode_operations, ++ ipsec_spi_get_info, ++ NULL, NULL, NULL, NULL, NULL ++}; ++ ++struct proc_dir_entry ipsec_spigrp = ++{ ++ 0, ++ 12, "ipsec_spigrp", ++ S_IFREG | S_IRUGO, 1, 0, 0, 0, ++ &proc_net_inode_operations, ++ ipsec_spigrp_get_info, ++ NULL, NULL, NULL, NULL, NULL ++}; ++ ++struct proc_dir_entry ipsec_tncfg = ++{ ++ 0, ++ 11, "ipsec_tncfg", ++ S_IFREG | S_IRUGO, 1, 0, 0, 0, ++ &proc_net_inode_operations, ++ ipsec_tncfg_get_info, ++ NULL, NULL, NULL, NULL, NULL ++}; ++ ++struct proc_dir_entry ipsec_version = ++{ ++ 0, ++ 13, "ipsec_version", ++ S_IFREG | S_IRUGO, 1, 0, 0, 0, ++ &proc_net_inode_operations, ++ ipsec_version_get_info, ++ NULL, NULL, NULL, NULL, NULL ++}; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++struct proc_dir_entry ipsec_klipsdebug = ++{ ++ 0, ++ 16, "ipsec_klipsdebug", ++ S_IFREG | S_IRUGO, 1, 0, 0, 0, ++ &proc_net_inode_operations, ++ ipsec_klipsdebug_get_info, ++ NULL, NULL, NULL, NULL, NULL ++}; ++#endif /* CONFIG_KLIPS_DEBUG */ ++#endif /* !PROC_FS_2325 */ ++#endif /* CONFIG_PROC_FS */ ++ ++#if defined(PROC_FS_2325) ++struct ipsec_proc_list { ++ char *name; ++ struct proc_dir_entry **parent; ++ struct proc_dir_entry **dir; ++ read_proc_t *readthing; ++ write_proc_t *writething; ++ void *data; ++}; ++static struct ipsec_proc_list proc_items[]={ ++#ifdef CONFIG_KLIPS_DEBUG ++ {"klipsdebug", &proc_net_ipsec_dir, NULL, ipsec_klipsdebug_get_info, NULL, NULL}, ++#endif ++ {"eroute", &proc_net_ipsec_dir, &proc_eroute_dir, NULL, NULL, NULL}, ++ {"all", &proc_eroute_dir, NULL, ipsec_eroute_get_info, NULL, NULL}, ++ {"spi", &proc_net_ipsec_dir, &proc_spi_dir, NULL, NULL, NULL}, ++ {"all", &proc_spi_dir, NULL, ipsec_spi_get_info, NULL, NULL}, ++ {"spigrp", &proc_net_ipsec_dir, &proc_spigrp_dir, NULL, NULL, NULL}, ++ {"all", &proc_spigrp_dir, NULL, ipsec_spigrp_get_info, NULL, NULL}, ++ {"birth", &proc_net_ipsec_dir, &proc_birth_dir, NULL, NULL, NULL}, ++ {"ipv4", &proc_birth_dir, NULL, ipsec_birth_info, ipsec_birth_set, (void *)&ipsec_ipv4_birth_packet}, ++ {"ipv6", &proc_birth_dir, NULL, ipsec_birth_info, ipsec_birth_set, (void *)&ipsec_ipv6_birth_packet}, ++ {"tncfg", &proc_net_ipsec_dir, NULL, ipsec_tncfg_get_info, NULL, NULL}, ++#ifdef CONFIG_KLIPS_ALG ++ {"xforms", &proc_net_ipsec_dir, NULL, ipsec_xform_get_info, NULL, NULL}, ++#endif /* CONFIG_KLIPS_ALG */ ++ {"stats", &proc_net_ipsec_dir, &proc_stats_dir, NULL, NULL, NULL}, ++ {"trap_count", &proc_stats_dir, NULL, ipsec_stats_get_int_info, NULL, &ipsec_xmit_trap_count}, ++ {"trap_sendcount", &proc_stats_dir, NULL, ipsec_stats_get_int_info, NULL, &ipsec_xmit_trap_sendcount}, ++ {"version", &proc_net_ipsec_dir, NULL, ipsec_version_get_info, NULL, NULL}, ++ {NULL, NULL, NULL, NULL, NULL, NULL} ++}; ++#endif ++ ++int ++ipsec_proc_init() ++{ ++ int error = 0; ++#ifdef IPSEC_PROC_SUBDIRS ++ struct proc_dir_entry *item; ++#endif ++ ++ /* ++ * just complain because pluto won't run without /proc! ++ */ ++#ifndef CONFIG_PROC_FS ++#error You must have PROC_FS built in to use KLIPS ++#endif ++ ++ /* for 2.0 kernels */ ++#if !defined(PROC_FS_2325) && !defined(PROC_FS_21) ++ error |= proc_register_dynamic(&proc_net, &ipsec_eroute); ++ error |= proc_register_dynamic(&proc_net, &ipsec_spi); ++ error |= proc_register_dynamic(&proc_net, &ipsec_spigrp); ++ error |= proc_register_dynamic(&proc_net, &ipsec_tncfg); ++ error |= proc_register_dynamic(&proc_net, &ipsec_version); ++#ifdef CONFIG_KLIPS_DEBUG ++ error |= proc_register_dynamic(&proc_net, &ipsec_klipsdebug); ++#endif /* CONFIG_KLIPS_DEBUG */ ++#endif ++ ++ /* for 2.2 kernels */ ++#if !defined(PROC_FS_2325) && defined(PROC_FS_21) ++ error |= proc_register(proc_net, &ipsec_eroute); ++ error |= proc_register(proc_net, &ipsec_spi); ++ error |= proc_register(proc_net, &ipsec_spigrp); ++ error |= proc_register(proc_net, &ipsec_tncfg); ++ error |= proc_register(proc_net, &ipsec_version); ++#ifdef CONFIG_KLIPS_DEBUG ++ error |= proc_register(proc_net, &ipsec_klipsdebug); ++#endif /* CONFIG_KLIPS_DEBUG */ ++#endif ++ ++ /* for 2.4 kernels */ ++#if defined(PROC_FS_2325) ++ /* create /proc/net/ipsec */ ++ ++ /* zero these out before we initialize /proc/net/ipsec/birth/stuff */ ++ memset(&ipsec_ipv4_birth_packet, 0, sizeof(struct ipsec_birth_reply)); ++ memset(&ipsec_ipv6_birth_packet, 0, sizeof(struct ipsec_birth_reply)); ++ ++ proc_net_ipsec_dir = proc_mkdir("ipsec", proc_net); ++ if(proc_net_ipsec_dir == NULL) { ++ /* no point in continuing */ ++ return 1; ++ } ++ ++ { ++ struct ipsec_proc_list *it; ++ ++ it=proc_items; ++ while(it->name!=NULL) { ++ if(it->dir) { ++ /* make a dir instead */ ++ item = proc_mkdir(it->name, *it->parent); ++ *it->dir = item; ++ } else { ++ item = create_proc_entry(it->name, 0400, *it->parent); ++ } ++ if(item) { ++ item->read_proc = it->readthing; ++ item->write_proc = it->writething; ++ item->data = it->data; ++#ifdef MODULE ++ item->owner = THIS_MODULE; ++#endif ++ } else { ++ error |= 1; ++ } ++ it++; ++ } ++ } ++ ++ /* now create some symlinks to provide compatibility */ ++ proc_symlink("ipsec_eroute", proc_net, "ipsec/eroute/all"); ++ proc_symlink("ipsec_spi", proc_net, "ipsec/spi/all"); ++ proc_symlink("ipsec_spigrp", proc_net, "ipsec/spigrp/all"); ++ proc_symlink("ipsec_tncfg", proc_net, "ipsec/tncfg"); ++ proc_symlink("ipsec_version",proc_net, "ipsec/version"); ++ proc_symlink("ipsec_klipsdebug",proc_net,"ipsec/klipsdebug"); ++ ++#endif /* !PROC_FS_2325 */ ++ ++ return error; ++} ++ ++void ++ipsec_proc_cleanup() ++{ ++ ++ /* for 2.0 and 2.2 kernels */ ++#if !defined(PROC_FS_2325) ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (proc_net_unregister(ipsec_klipsdebug.low_ino) != 0) ++ printk("klips_debug:ipsec_cleanup: " ++ "cannot unregister /proc/net/ipsec_klipsdebug\n"); ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ if (proc_net_unregister(ipsec_version.low_ino) != 0) ++ printk("klips_debug:ipsec_cleanup: " ++ "cannot unregister /proc/net/ipsec_version\n"); ++ if (proc_net_unregister(ipsec_eroute.low_ino) != 0) ++ printk("klips_debug:ipsec_cleanup: " ++ "cannot unregister /proc/net/ipsec_eroute\n"); ++ if (proc_net_unregister(ipsec_spi.low_ino) != 0) ++ printk("klips_debug:ipsec_cleanup: " ++ "cannot unregister /proc/net/ipsec_spi\n"); ++ if (proc_net_unregister(ipsec_spigrp.low_ino) != 0) ++ printk("klips_debug:ipsec_cleanup: " ++ "cannot unregister /proc/net/ipsec_spigrp\n"); ++ if (proc_net_unregister(ipsec_tncfg.low_ino) != 0) ++ printk("klips_debug:ipsec_cleanup: " ++ "cannot unregister /proc/net/ipsec_tncfg\n"); ++#endif ++ ++ /* for 2.4 kernels */ ++#if defined(PROC_FS_2325) ++ { ++ struct ipsec_proc_list *it; ++ ++ /* find end of list */ ++ it=proc_items; ++ while(it->name!=NULL) { ++ it++; ++ } ++ it--; ++ ++ do { ++ remove_proc_entry(it->name, *it->parent); ++ it--; ++ } while(it >= proc_items); ++ } ++ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ remove_proc_entry("ipsec_klipsdebug", proc_net); ++#endif /* CONFIG_KLIPS_DEBUG */ ++ remove_proc_entry("ipsec_eroute", proc_net); ++ remove_proc_entry("ipsec_spi", proc_net); ++ remove_proc_entry("ipsec_spigrp", proc_net); ++ remove_proc_entry("ipsec_tncfg", proc_net); ++ remove_proc_entry("ipsec_version", proc_net); ++ remove_proc_entry("ipsec", proc_net); ++#endif /* 2.4 kernel */ ++} ++ ++/* ++ * $Log: ipsec_proc.c,v $ ++ * Revision 1.39.2.7 2007-11-06 18:24:44 paul ++ * include linux/moduleparam.h on linux 2.4.x kernels. ++ * ++ * Revision 1.39.2.6 2007/09/05 02:41:20 paul ++ * Added xforms info to /proc file. Patch by David McCullough ++ * ++ * Revision 1.39.2.5 2007/08/09 14:37:45 paul ++ * Patch by sergeil to compile on 2.4.35. ++ * ++ * Revision 1.39.2.4 2006/11/15 22:21:39 paul ++ * backport of creating a /sys/ file to test for nat-t capability in kernel. ++ * ++ * Revision 1.39.2.3 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.39.2.2 2006/02/13 18:48:12 paul ++ * Fix by Ankit Desai for module unloading. ++ * ++ * Revision 1.39.2.1 2005/09/07 00:45:59 paul ++ * pull up of mcr's nat-t klips detection patch from head ++ * ++ * Revision 1.39 2005/05/20 03:19:18 mcr ++ * modifications for use on 2.4.30 kernel, with backported ++ * printk_ratelimit(). all warnings removed. ++ * ++ * Revision 1.38 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.37 2005/04/13 22:49:49 mcr ++ * moved KLIPS specific snprintf() wrapper to seperate file. ++ * ++ * Revision 1.36 2005/04/06 17:44:36 mcr ++ * when NAT-T is compiled out, show encap as "NA" ++ * ++ * Revision 1.35 2005/01/26 00:50:35 mcr ++ * adjustment of confusion of CONFIG_IPSEC_NAT vs CONFIG_KLIPS_NAT, ++ * and make sure that NAT_TRAVERSAL is set as well to match ++ * userspace compiles of code. ++ * ++ * Revision 1.34 2004/12/03 21:25:57 mcr ++ * compile time fixes for running on 2.6. ++ * still experimental. ++ * ++ * Revision 1.33 2004/08/17 03:27:23 mcr ++ * klips 2.6 edits. ++ * ++ * Revision 1.32 2004/08/03 18:19:08 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.31 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.30 2004/04/25 21:23:11 ken ++ * Pull in dhr's changes from FreeS/WAN 2.06 ++ * ++ * Revision 1.29 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.28 2004/03/28 20:29:58 paul ++ * ssize_t, not ssized_t ++ * ++ * Revision 1.27 2004/03/28 20:27:20 paul ++ * Included tested and confirmed fixes mcr made and dhr verified for ++ * snprint statements. Changed one other snprintf to use ipsec_snprintf ++ * so it wouldnt break compatibility with 2.0/2.2 kernels. Verified with ++ * dhr. (thanks dhr!) ++ * ++ * Revision 1.26 2004/02/09 22:07:06 mcr ++ * added information about nat-traversal setting to spi-output. ++ * ++ * Revision 1.25.4.1 2004/04/05 04:30:46 mcr ++ * patches for alg-branch to compile/work with 2.x openswan ++ * ++ * Revision 1.25 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.24.4.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.24 2003/06/20 01:42:21 mcr ++ * added counters to measure how many ACQUIREs we send to pluto, ++ * and how many are successfully sent. ++ * ++ * Revision 1.23 2003/04/03 17:38:09 rgb ++ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}. ++ * ++ * Revision 1.22 2002/09/20 15:40:57 rgb ++ * Renamed saref macros for consistency and brevity. ++ * ++ * Revision 1.21 2002/09/20 05:01:35 rgb ++ * Print ref and reftable, refentry seperately. ++ * ++ * Revision 1.20 2002/09/19 02:35:39 mcr ++ * do not define structures needed by /proc/net/ipsec/ if we ++ * aren't going create that directory. ++ * ++ * Revision 1.19 2002/09/10 01:43:25 mcr ++ * fixed problem in /-* comment. ++ * ++ * Revision 1.18 2002/09/03 16:22:11 mcr ++ * fixed initialization of birth/stuff values - some simple ++ * screw ups in the code. ++ * removed debugging that was left in by mistake. ++ * ++ * Revision 1.17 2002/09/02 17:54:53 mcr ++ * changed how the table driven /proc entries are created so that ++ * making subdirs is now explicit rather than implicit. ++ * ++ * Revision 1.16 2002/08/30 01:23:37 mcr ++ * reorganized /proc creating code to clear up ifdefs, ++ * make the 2.4 code table driven, and put things into ++ * /proc/net/ipsec subdir. Symlinks are left for compatibility. ++ * ++ * Revision 1.15 2002/08/13 19:01:25 mcr ++ * patches from kenb to permit compilation of FreeSWAN on ia64. ++ * des library patched to use proper DES_LONG type for ia64. ++ * ++ * Revision 1.14 2002/07/26 08:48:31 rgb ++ * Added SA ref table code. ++ * ++ * Revision 1.13 2002/07/24 18:44:54 rgb ++ * Type fiddling to tame ia64 compiler. ++ * ++ * Revision 1.12 2002/05/27 18:56:07 rgb ++ * Convert to dynamic ipsec device allocation. ++ * ++ * Revision 1.11 2002/05/23 07:14:50 rgb ++ * Added refcount code. ++ * Cleaned up %p variants to 0p%p for test suite cleanup. ++ * Convert "usecount" to "refcount" to remove ambiguity. ++ * ++ * Revision 1.10 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.9 2002/04/24 07:36:28 mcr ++ * Moved from ./klips/net/ipsec/ipsec_proc.c,v ++ * ++ * Revision 1.8 2002/01/29 17:17:55 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.7 2002/01/29 04:00:52 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.6 2002/01/29 02:13:17 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.5 2002/01/12 02:54:30 mcr ++ * beginnings of /proc/net/ipsec dir. ++ * ++ * Revision 1.4 2001/12/11 02:21:05 rgb ++ * Don't include module version here, fixing 2.2 compile bug. ++ * ++ * Revision 1.3 2001/12/05 07:19:44 rgb ++ * Fixed extraneous #include "version.c" bug causing modular KLIPS failure. ++ * ++ * Revision 1.2 2001/11/26 09:16:14 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.74 2001/11/22 05:44:11 henry ++ * new version stuff ++ * ++ * Revision 1.1.2.1 2001/09/25 02:19:40 mcr ++ * /proc manipulation code moved to new ipsec_proc.c ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_radij.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,893 @@ ++/* ++ * Interface between the IPSEC code and the radix (radij) tree code ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_radij.c,v 1.73.2.2 2007-09-05 02:56:09 paul Exp $ ++ */ ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, struct net_device_stats and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* 23_SPINLOCK */ ++# include /* *lock* */ ++# endif /* 23_SPINLOCK */ ++#endif /* SPINLOCK */ ++ ++#include ++ ++#include "openswan/ipsec_eroute.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_tunnel.h" /* struct ipsecpriv */ ++#include "openswan/ipsec_xform.h" ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_radij = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++struct radij_node_head *rnh = NULL; ++#ifdef SPINLOCK ++spinlock_t eroute_lock = SPIN_LOCK_UNLOCKED; ++#else /* SPINLOCK */ ++spinlock_t eroute_lock; ++#endif /* SPINLOCK */ ++ ++int ++ipsec_radijinit(void) ++{ ++ maj_keylen = sizeof (struct sockaddr_encap); ++ ++ rj_init(); ++ ++ if (rj_inithead((void **)&rnh, /*16*/offsetof(struct sockaddr_encap, sen_type) * sizeof(__u8)) == 0) /* 16 is bit offset of sen_type */ ++ return -1; ++ return 0; ++} ++ ++int ++ipsec_radijcleanup(void) ++{ ++ int error; ++ ++ spin_lock_bh(&eroute_lock); ++ ++ error = radijcleanup(); ++ ++ spin_unlock_bh(&eroute_lock); ++ ++ return error; ++} ++ ++int ++ipsec_cleareroutes(void) ++{ ++ int error; ++ ++ spin_lock_bh(&eroute_lock); ++ ++ error = radijcleartree(); ++ ++ spin_unlock_bh(&eroute_lock); ++ ++ return error; ++} ++ ++int ++ipsec_breakroute(struct sockaddr_encap *eaddr, ++ struct sockaddr_encap *emask, ++ struct sk_buff **first, ++ struct sk_buff **last) ++{ ++ struct eroute *ro; ++ struct radij_node *rn; ++ int error; ++#ifdef CONFIG_KLIPS_DEBUG ++ ++ if (debug_eroute) { ++ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF]; ++ subnettoa(eaddr->sen_ip_src, emask->sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(eaddr->sen_ip_dst, emask->sen_ip_dst, 0, buf2, sizeof(buf2)); ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_breakroute: " ++ "attempting to delete eroute for %s:%d->%s:%d %d\n", ++ buf1, ntohs(eaddr->sen_sport), ++ buf2, ntohs(eaddr->sen_dport), eaddr->sen_proto); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ spin_lock_bh(&eroute_lock); ++ ++ if ((error = rj_delete(eaddr, emask, rnh, &rn)) != 0) { ++ spin_unlock_bh(&eroute_lock); ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_breakroute: " ++ "node not found, eroute delete failed.\n"); ++ return error; ++ } ++ ++ spin_unlock_bh(&eroute_lock); ++ ++ ro = (struct eroute *)rn; ++ ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_breakroute: " ++ "deleted eroute=0p%p, ident=0p%p->0p%p, first=0p%p, last=0p%p\n", ++ ro, ++ ro->er_ident_s.data, ++ ro->er_ident_d.data, ++ ro->er_first, ++ ro->er_last); ++ ++ if (ro->er_ident_s.data != NULL) { ++ kfree(ro->er_ident_s.data); ++ } ++ if (ro->er_ident_d.data != NULL) { ++ kfree(ro->er_ident_d.data); ++ } ++ if (ro->er_first != NULL) { ++#if 0 ++ struct net_device_stats *stats = (struct net_device_stats *) &(((struct ipsecpriv *)(ro->er_first->dev->priv))->mystats); ++ stats->tx_dropped--; ++#endif ++ *first = ro->er_first; ++ } ++ if (ro->er_last != NULL) { ++#if 0 ++ struct net_device_stats *stats = (struct net_device_stats *) &(((struct ipsecpriv *)(ro->er_last->dev->priv))->mystats); ++ stats->tx_dropped--; ++#endif ++ *last = ro->er_last; ++ } ++ ++ if (rn->rj_flags & (RJF_ACTIVE | RJF_ROOT)) ++ panic ("ipsec_breakroute RMT_DELEROUTE root or active node\n"); ++ memset((caddr_t)rn, 0, sizeof (struct eroute)); ++ kfree(rn); ++ ++ return 0; ++} ++ ++int ++ipsec_makeroute(struct sockaddr_encap *eaddr, ++ struct sockaddr_encap *emask, ++ ip_said said, ++ uint32_t pid, ++ struct sk_buff *skb, ++ struct ident *ident_s, ++ struct ident *ident_d) ++{ ++ struct eroute *retrt; ++ int error; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ ++ if (debug_eroute) { ++ ++ { ++ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF]; ++ ++ subnettoa(eaddr->sen_ip_src, emask->sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(eaddr->sen_ip_dst, emask->sen_ip_dst, 0, buf2, sizeof(buf2)); ++ sa_len = satot(&said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_makeroute: " ++ "attempting to allocate %lu bytes to insert eroute for %s->%s, SA: %s, PID:%d, skb=0p%p, ident:%s->%s\n", ++ (unsigned long) sizeof(struct eroute), ++ buf1, ++ buf2, ++ sa_len ? sa : " (error)", ++ pid, ++ skb, ++ (ident_s ? (ident_s->data ? ident_s->data : "NULL") : "NULL"), ++ (ident_d ? (ident_d->data ? ident_d->data : "NULL") : "NULL")); ++ } ++ { ++ char buf1[sizeof(struct sockaddr_encap)*2 + 1], ++ buf2[sizeof(struct sockaddr_encap)*2 + 1]; ++ int i; ++ unsigned char *b1 = buf1, ++ *b2 = buf2, ++ *ea = (unsigned char *)eaddr, ++ *em = (unsigned char *)emask; ++ ++ ++ for (i=0; ier_eaddr = *eaddr; ++ retrt->er_emask = *emask; ++ retrt->er_said = said; ++ retrt->er_pid = pid; ++ retrt->er_count = 0; ++ retrt->er_lasttime = jiffies/HZ; ++ ++ { ++ /* this is because gcc 3. doesn't like cast's as lvalues */ ++ struct rjtentry *rje = (struct rjtentry *)&(retrt->er_rjt); ++ caddr_t er = (caddr_t)&(retrt->er_eaddr); ++ ++ rje->rd_nodes->rj_key= er; ++ } ++ ++ if (ident_s && ident_s->type != SADB_IDENTTYPE_RESERVED) { ++ int data_len = ident_s->len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident); ++ ++ retrt->er_ident_s.type = ident_s->type; ++ retrt->er_ident_s.id = ident_s->id; ++ retrt->er_ident_s.len = ident_s->len; ++ if(data_len) { ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_makeroute: " ++ "attempting to allocate %u bytes for ident_s.\n", ++ data_len); ++ if(!(retrt->er_ident_s.data = kmalloc(data_len, GFP_KERNEL))) { ++ kfree(retrt); ++ printk("klips_error:ipsec_makeroute: not able to allocate kernel memory (%d)\n", data_len); ++ return ENOMEM; ++ } ++ memcpy(retrt->er_ident_s.data, ident_s->data, data_len); ++ } else { ++ retrt->er_ident_s.data = NULL; ++ } ++ } ++ ++ if (ident_d && ident_d->type != SADB_IDENTTYPE_RESERVED) { ++ int data_len = ident_d->len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident); ++ ++ retrt->er_ident_d.type = ident_d->type; ++ retrt->er_ident_d.id = ident_d->id; ++ retrt->er_ident_d.len = ident_d->len; ++ if(data_len) { ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_makeroute: " ++ "attempting to allocate %u bytes for ident_d.\n", ++ data_len); ++ if(!(retrt->er_ident_d.data = kmalloc(data_len, GFP_KERNEL))) { ++ if (retrt->er_ident_s.data) ++ kfree(retrt->er_ident_s.data); ++ kfree(retrt); ++ printk("klips_error:ipsec_makeroute: not able to allocate kernel memory (%d)\n", data_len); ++ return ENOMEM; ++ } ++ memcpy(retrt->er_ident_d.data, ident_d->data, data_len); ++ } else { ++ retrt->er_ident_d.data = NULL; ++ } ++ } ++ retrt->er_first = skb; ++ retrt->er_last = NULL; ++ ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_makeroute: " ++ "calling rj_addroute now\n"); ++ ++ spin_lock_bh(&eroute_lock); ++ ++ error = rj_addroute(&(retrt->er_eaddr), &(retrt->er_emask), ++ rnh, retrt->er_rjt.rd_nodes); ++ ++ spin_unlock_bh(&eroute_lock); ++ ++ if(error) { ++ sa_len = KLIPS_SATOT(debug_eroute, &said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_makeroute: " ++ "rj_addroute not able to insert eroute for SA:%s (error:%d)\n", ++ sa_len ? sa : " (error)", error); ++ if (retrt->er_ident_s.data) ++ kfree(retrt->er_ident_s.data); ++ if (retrt->er_ident_d.data) ++ kfree(retrt->er_ident_d.data); ++ ++ kfree(retrt); ++ ++ return error; ++ } ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (debug_eroute) { ++ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF]; ++/* ++ subnettoa(eaddr->sen_ip_src, emask->sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(eaddr->sen_ip_dst, emask->sen_ip_dst, 0, buf2, sizeof(buf2)); ++*/ ++ subnettoa(rd_key((&(retrt->er_rjt)))->sen_ip_src, rd_mask((&(retrt->er_rjt)))->sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(rd_key((&(retrt->er_rjt)))->sen_ip_dst, rd_mask((&(retrt->er_rjt)))->sen_ip_dst, 0, buf2, sizeof(buf2)); ++ sa_len = satot(&retrt->er_said, 0, sa, sizeof(sa)); ++ ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_makeroute: " ++ "pid=%05d " ++ "count=%10d " ++ "lasttime=%6d " ++ "%-18s -> %-18s => %s\n", ++ retrt->er_pid, ++ retrt->er_count, ++ (int)(jiffies/HZ - retrt->er_lasttime), ++ buf1, ++ buf2, ++ sa_len ? sa : " (error)"); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_makeroute: " ++ "succeeded.\n"); ++ return 0; ++} ++ ++struct eroute * ++ipsec_findroute(struct sockaddr_encap *eaddr) ++{ ++ struct radij_node *rn; ++#ifdef CONFIG_KLIPS_DEBUG ++ char buf1[ADDRTOA_BUF], buf2[ADDRTOA_BUF]; ++ ++ if (debug_radij & DB_RJ_FINDROUTE) { ++ addrtoa(eaddr->sen_ip_src, 0, buf1, sizeof(buf1)); ++ addrtoa(eaddr->sen_ip_dst, 0, buf2, sizeof(buf2)); ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:ipsec_findroute: " ++ "%s:%d->%s:%d %d\n", ++ buf1, ntohs(eaddr->sen_sport), ++ buf2, ntohs(eaddr->sen_dport), ++ eaddr->sen_proto); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ rn = rj_match((caddr_t)eaddr, rnh); ++ if(rn) { ++ KLIPS_PRINT(debug_eroute && sysctl_ipsec_debug_verbose, ++ "klips_debug:ipsec_findroute: " ++ "found, points to proto=%d, spi=%x, dst=%x.\n", ++ ((struct eroute*)rn)->er_said.proto, ++ ntohl(((struct eroute*)rn)->er_said.spi), ++ ntohl(((struct eroute*)rn)->er_said.dst.u.v4.sin_addr.s_addr)); ++ } ++ return (struct eroute *)rn; ++} ++ ++#ifdef CONFIG_PROC_FS ++/** ipsec_rj_walker_procprint: print one line of eroute table output. ++ * ++ * Theoretical BUG: if w->length is less than the length ++ * of some line we should produce, that line will never ++ * be finished. In effect, the "file" will stop part way ++ * through that line. ++ */ ++int ++ipsec_rj_walker_procprint(struct radij_node *rn, void *w0) ++{ ++ struct eroute *ro = (struct eroute *)rn; ++ struct rjtentry *rd = (struct rjtentry *)rn; ++ struct wsbuf *w = (struct wsbuf *)w0; ++ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF]; ++ char buf3[16]; ++ char sa[SATOT_BUF]; ++ size_t sa_len, buf_len; ++ struct sockaddr_encap *key, *mask; ++ ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:ipsec_rj_walker_procprint: " ++ "rn=0p%p, w0=0p%p\n", ++ rn, ++ w0); ++ if (rn->rj_b >= 0) { ++ return 0; ++ } ++ ++ key = rd_key(rd); ++ mask = rd_mask(rd); ++ ++ if (key == NULL || mask == NULL) { ++ return 0; ++ } ++ ++ buf_len = subnettoa(key->sen_ip_src, mask->sen_ip_src, 0, buf1, sizeof(buf1)); ++ if(key->sen_sport != 0) { ++ sprintf(buf1+buf_len-1, ":%d", ntohs(key->sen_sport)); ++ } ++ ++ buf_len = subnettoa(key->sen_ip_dst, mask->sen_ip_dst, 0, buf2, sizeof(buf2)); ++ if(key->sen_dport != 0) { ++ sprintf(buf2+buf_len-1, ":%d", ntohs(key->sen_dport)); ++ } ++ ++ buf3[0]='\0'; ++ if(key->sen_proto != 0) { ++ sprintf(buf3, ":%d", key->sen_proto); ++ } ++ ++ sa_len = satot(&ro->er_said, 'x', sa, sizeof(sa)); ++ w->len += ipsec_snprintf(w->buffer + w->len, ++ w->length - w->len, ++ "%-10d " ++ "%-18s -> %-18s => %s%s\n", ++ ro->er_count, ++ buf1, ++ buf2, ++ sa_len ? sa : " (error)", ++ buf3); ++ ++ { ++ /* snprintf can only fill the last character with NUL ++ * so the maximum useful character is w->length-1. ++ * However, if w->length == 0, we cannot go back. ++ * (w->length surely cannot be negative.) ++ */ ++ int max_content = w->length > 0? w->length-1 : 0; ++ ++ if (w->len >= max_content) { ++ /* we've done all that can fit -- stop treewalking */ ++ w->len = max_content; /* truncate crap */ ++ return -ENOBUFS; ++ } else { ++ const off_t pos = w->begin + w->len; /* file position of end of what we've generated */ ++ ++ if (pos <= w->offset) { ++ /* all is before first interesting character: ++ * discard, but note where we are. ++ */ ++ w->len = 0; ++ w->begin = pos; ++ } ++ return 0; ++ } ++ } ++} ++#endif /* CONFIG_PROC_FS */ ++ ++int ++ipsec_rj_walker_delete(struct radij_node *rn, void *w0) ++{ ++ struct eroute *ro; ++ struct rjtentry *rd = (struct rjtentry *)rn; ++ struct radij_node *rn2; ++ int error; ++ struct sockaddr_encap *key, *mask; ++ ++ key = rd_key(rd); ++ mask = rd_mask(rd); ++ ++ if(!key || !mask) { ++ return -ENODATA; ++ } ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_radij) { ++ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF]; ++ subnettoa(key->sen_ip_src, mask->sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(key->sen_ip_dst, mask->sen_ip_dst, 0, buf2, sizeof(buf2)); ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:ipsec_rj_walker_delete: " ++ "deleting: %s -> %s\n", ++ buf1, ++ buf2); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ if((error = rj_delete(key, mask, rnh, &rn2))) { ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:ipsec_rj_walker_delete: " ++ "rj_delete failed with error=%d.\n", error); ++ return error; ++ } ++ ++ if(rn2 != rn) { ++ printk("klips_debug:ipsec_rj_walker_delete: " ++ "tried to delete a different node?!? This should never happen!\n"); ++ } ++ ++ ro = (struct eroute *)rn; ++ ++ if (ro->er_ident_s.data) ++ kfree(ro->er_ident_s.data); ++ if (ro->er_ident_d.data) ++ kfree(ro->er_ident_d.data); ++ ++ memset((caddr_t)rn, 0, sizeof (struct eroute)); ++ kfree(rn); ++ ++ return 0; ++} ++ ++/* ++ * $Log: ipsec_radij.c,v $ ++ * Revision 1.73.2.2 2007-09-05 02:56:09 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.73.2.1 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.73 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.72 2004/12/03 21:25:57 mcr ++ * compile time fixes for running on 2.6. ++ * still experimental. ++ * ++ * Revision 1.71 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.70 2004/04/25 21:10:52 ken ++ * Pull in dhr's changes from FreeS/WAN 2.06 ++ * ++ * Revision 1.69 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.68 2004/03/28 20:27:20 paul ++ * Included tested and confirmed fixes mcr made and dhr verified for ++ * snprint statements. Changed one other snprintf to use ipsec_snprintf ++ * so it wouldnt break compatibility with 2.0/2.2 kernels. Verified with ++ * dhr. (thanks dhr!) ++ * ++ * Revision 1.67.4.1 2004/04/05 04:30:46 mcr ++ * patches for alg-branch to compile/work with 2.x openswan ++ * ++ * Revision 1.67 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.66.24.2 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.66.24.1 2003/09/21 13:59:56 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.66 2002/10/12 23:11:53 dhr ++ * ++ * [KenB + DHR] more 64-bit cleanup ++ * ++ * Revision 1.65 2002/09/20 05:01:40 rgb ++ * Added memory allocation debugging. ++ * ++ * Revision 1.64 2002/05/31 01:46:05 mcr ++ * added && sysctl_ipsec_debug_verbose verbose to ipsec_findroute ++ * as requested in PR#14. ++ * ++ * Revision 1.63 2002/05/23 07:14:11 rgb ++ * Cleaned up %p variants to 0p%p for test suite cleanup. ++ * ++ * Revision 1.62 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.61 2002/04/24 07:36:29 mcr ++ * Moved from ./klips/net/ipsec/ipsec_radij.c,v ++ * ++ * Revision 1.60 2002/02/19 23:59:45 rgb ++ * Removed redundant compiler directives. ++ * ++ * Revision 1.59 2002/02/06 04:13:47 mcr ++ * missing #ifdef CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.58 2002/01/29 17:17:56 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.57 2002/01/29 04:00:52 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.56 2002/01/29 02:13:17 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.55 2001/11/26 09:23:48 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.53.2.1 2001/09/25 02:26:32 mcr ++ * headers adjusted for new usage. ++ * ++ * Revision 1.54 2001/10/18 04:45:20 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/freeswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.53 2001/09/19 17:19:40 rgb ++ * Debug output bugfix for NetCelo's PF_KEY ident patch. ++ * ++ * Revision 1.52 2001/09/19 16:33:37 rgb ++ * Temporarily disable ident fields to /proc/net/ipsec_eroute. ++ * ++ * Revision 1.51 2001/09/15 16:24:04 rgb ++ * Re-inject first and last HOLD packet when an eroute REPLACE is done. ++ * ++ * Revision 1.50 2001/09/14 16:58:36 rgb ++ * Added support for storing the first and last packets through a HOLD. ++ * ++ * Revision 1.49 2001/09/08 21:13:32 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.48 2001/06/15 04:12:56 rgb ++ * Fixed kernel memory allocation error return code polarity bug. ++ * ++ * Revision 1.47 2001/06/14 19:35:09 rgb ++ * Update copyright date. ++ * ++ * Revision 1.46 2001/06/08 08:47:18 rgb ++ * Fixed for debug disabled. ++ * ++ * Revision 1.45 2001/05/27 06:12:11 rgb ++ * Added structures for pid, packet count and last access time to eroute. ++ * Added packet count to beginning of /proc/net/ipsec_eroute. ++ * ++ * Revision 1.44 2001/05/03 19:41:01 rgb ++ * Initialise error return variable. ++ * Use more appropriate return value for ipsec_rj_walker_delete(). ++ * ++ * Revision 1.43 2001/02/27 22:24:54 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.42 2001/02/27 06:21:57 rgb ++ * Added findroute success instrumentation. ++ * ++ * Revision 1.41 2000/11/06 04:32:08 rgb ++ * Ditched spin_lock_irqsave in favour of spin_lock_bh. ++ * ++ * Revision 1.40 2000/09/08 19:12:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.39 2000/08/30 05:25:20 rgb ++ * Correct debug text in ipsec_breakroute() from incorrect ++ * "ipsec_callback". ++ * ++ * Revision 1.38 2000/07/28 14:58:31 rgb ++ * Changed kfree_s to kfree, eliminating extra arg to fix 2.4.0-test5. ++ * ++ * Revision 1.37 2000/03/16 14:02:50 rgb ++ * Fixed debug scope to enable compilation with debug off. ++ * ++ * Revision 1.36 2000/01/21 06:14:46 rgb ++ * Added debugging text to ipsec_rj_walker_delete(). ++ * Set return code to negative for consistency. ++ * ++ * Revision 1.35 1999/11/23 23:05:24 rgb ++ * Use provided macro ADDRTOA_BUF instead of hardcoded value. ++ * ++ * Revision 1.34 1999/11/18 04:13:56 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * Added CONFIG_PROC_FS compiler directives in case it is shut off. ++ * ++ * Revision 1.33 1999/11/17 15:53:39 rgb ++ * Changed all occurrences of #include "../../../lib/freeswan.h" ++ * to #include which works due to -Ilibfreeswan in the ++ * klips/net/ipsec/Makefile. ++ * ++ * Revision 1.32 1999/10/26 13:58:33 rgb ++ * Put spinlock flags variable declaration outside the debug compiler ++ * directive to enable compilation with debug shut off. ++ * ++ * Revision 1.31 1999/10/15 22:13:29 rgb ++ * Clean out cruft. ++ * Align /proc/net/ipsec_eroute output for easier readability. ++ * Fix double linefeed in radij debug output. ++ * Fix double locking bug that locks up 2.0.36 but not 2.0.38. ++ * ++ * Revision 1.30 1999/10/08 18:37:33 rgb ++ * Fix end-of-line spacing to sate whining PHMs. ++ * ++ * Revision 1.29 1999/10/03 18:52:45 rgb ++ * Spinlock support for 2.0.xx. ++ * Dumb return code spin_unlock fix. ++ * ++ * Revision 1.28 1999/10/01 16:22:24 rgb ++ * Switch from assignment init. to functional init. of spinlocks. ++ * ++ * Revision 1.27 1999/10/01 15:44:53 rgb ++ * Move spinlock header include to 2.1> scope. ++ * ++ * Revision 1.26 1999/10/01 00:01:23 rgb ++ * Added eroute structure locking. ++ * ++ * Revision 1.25 1999/06/10 16:07:30 rgb ++ * Silence delete eroute on no debug. ++ * ++ * Revision 1.24 1999/05/09 03:25:36 rgb ++ * Fix bug introduced by 2.2 quick-and-dirty patch. ++ * ++ * Revision 1.23 1999/05/05 22:02:31 rgb ++ * Add a quick and dirty port to 2.2 kernels by Marc Boucher . ++ * ++ * Revision 1.22 1999/04/29 15:17:23 rgb ++ * Add return values to init and cleanup functions. ++ * Add sanity checking for null pointer arguments. ++ * ++ * Revision 1.21 1999/04/11 00:28:58 henry ++ * GPL boilerplate ++ * ++ * Revision 1.20 1999/04/06 04:54:26 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.19 1999/02/17 16:50:35 rgb ++ * Clean out unused cruft. ++ * Consolidate for space and speed efficiency. ++ * Convert DEBUG_IPSEC to KLIPS_PRINT ++ * ++ * Revision 1.18 1999/01/22 06:22:06 rgb ++ * Cruft clean-out. ++ * 64-bit clean-up. ++ * ++ * Revision 1.17 1998/12/02 03:09:39 rgb ++ * Clean up debug printing conditionals to compile with debugging off. ++ * ++ * Revision 1.16 1998/12/01 13:49:39 rgb ++ * Wrap version info printing in debug switches. ++ * ++ * Revision 1.15 1998/11/30 13:22:54 rgb ++ * Rationalised all the klips kernel file headers. They are much shorter ++ * now and won't conflict under RH5.2. ++ * ++ * Revision 1.14 1998/10/31 06:48:17 rgb ++ * Fixed up comments in #endif directives. ++ * ++ * Revision 1.13 1998/10/27 13:48:09 rgb ++ * Cleaned up /proc/net/ipsec_* filesystem for easy parsing by scripts. ++ * Fixed less(1) truncated output bug. ++ * Code clean-up. ++ * ++ * Revision 1.12 1998/10/25 02:41:36 rgb ++ * Change return type on ipsec_breakroute and ipsec_makeroute and add an ++ * argument to be able to transmit more infomation about errors. ++ * Fix cut-and-paste debug statement identifier. ++ * ++ * Revision 1.11 1998/10/22 06:45:39 rgb ++ * Cleaned up cruft. ++ * Convert to use satoa for printk. ++ * ++ * Revision 1.10 1998/10/19 14:44:28 rgb ++ * Added inclusion of freeswan.h. ++ * sa_id structure implemented and used: now includes protocol. ++ * ++ * Revision 1.9 1998/10/09 04:30:52 rgb ++ * Added 'klips_debug' prefix to all klips printk debug statements. ++ * Deleted old commented out cruft. ++ * ++ * Revision 1.8 1998/08/06 17:24:23 rgb ++ * Fix addrtoa return code bug from stale manpage advice preventing packets ++ * from being erouted. ++ * ++ * Revision 1.7 1998/08/06 07:44:59 rgb ++ * Fixed /proc/net/ipsec_eroute subnettoa and addrtoa return value bug that ++ * ended up in nothing being printed. ++ * ++ * Revision 1.6 1998/08/05 22:16:41 rgb ++ * Cleanup to prevent cosmetic errors (ie. debug output) from being fatal. ++ * ++ * Revision 1.5 1998/07/29 20:38:44 rgb ++ * Debug and fix subnettoa and addrtoa output. ++ * ++ * Revision 1.4 1998/07/28 00:02:39 rgb ++ * Converting to exclusive use of addrtoa. ++ * Fix eroute delete. ++ * ++ * Revision 1.3 1998/07/14 18:21:26 rgb ++ * Add function to clear the eroute table. ++ * ++ * Revision 1.2 1998/06/23 02:59:14 rgb ++ * Added debugging output to eroute add/delete routines. ++ * ++ * Revision 1.9 1998/06/18 21:29:06 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid kernel ++ * build scripts happier in presence of symbolic links ++ * ++ * Revision 1.8 1998/06/05 02:32:26 rgb ++ * Fix spi ntoh kernel debug output. ++ * ++ * Revision 1.7 1998/05/25 20:30:37 rgb ++ * Remove temporary ipsec_walk, rj_deltree and rj_delnodes functions. ++ * ++ * Rename ipsec_rj_walker (ipsec_walk) to ipsec_rj_walker_procprint and ++ * add ipsec_rj_walker_delete. ++ * ++ * Revision 1.6 1998/05/21 13:08:57 rgb ++ * Rewrote procinfo subroutines to avoid *bad things* when more that 3k of ++ * information is available for printout. ++ * ++ * Revision 1.5 1998/05/18 21:35:55 rgb ++ * Clean up output for numerical consistency and readability. Zero freed ++ * eroute memory. ++ * ++ * Revision 1.4 1998/04/21 21:28:58 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.3 1998/04/14 17:30:39 rgb ++ * Fix up compiling errors for radij tree memory reclamation. ++ * ++ * Revision 1.2 1998/04/12 22:03:23 rgb ++ * Updated ESP-3DES-HMAC-MD5-96, ++ * ESP-DES-HMAC-MD5-96, ++ * AH-HMAC-MD5-96, ++ * AH-HMAC-SHA1-96 since Henry started freeswan cvs repository ++ * from old standards (RFC182[5-9] to new (as of March 1998) drafts. ++ * ++ * Fixed eroute references in /proc/net/ipsec*. ++ * ++ * Started to patch module unloading memory leaks in ipsec_netlink and ++ * radij tree unloading. ++ * ++ * Revision 1.1 1998/04/09 03:06:10 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:03 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * No changes. ++ * ++ * Revision 0.3 1996/11/20 14:39:04 ji ++ * Minor cleanups. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_rcv.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,2340 @@ ++/* ++ * receive code ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998-2003 Richard Guy Briggs. ++ * Copyright (C) 2004 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipsec_rcv_c_version[] = "RCSID $Id: ipsec_rcv.c,v 1.171.2.15 2007-10-30 21:37:45 paul Exp $"; ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++#define __NO_VERSION__ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++ ++#include ++#include ++#include ++#include ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++ ++#include "openswan/ipsec_kern24.h" ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" ++ ++#include "openswan/ipsec_auth.h" ++ ++#include "openswan/ipsec_esp.h" ++ ++#ifdef CONFIG_KLIPS_AH ++#include "openswan/ipsec_ah.h" ++#endif /* CONFIG_KLIPS_AH */ ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++#include "openswan/ipsec_ipcomp.h" ++#endif /* CONFIG_KLIPS_COMP */ ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipsec_alg.h" ++#include "openswan/ipsec_kern24.h" ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_rcv = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++int sysctl_ipsec_inbound_policy_check = 1; ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++#include ++#endif ++ ++/* This is a private use protocol, and AT&T should be ashamed. They should have ++ * used protocol # 59, which is "no next header" instead of 0xFE. ++ */ ++#ifndef IPPROTO_ATT_HEARTBEAT ++#define IPPROTO_ATT_HEARTBEAT 0xFE ++#endif ++ ++/* ++ * Check-replay-window routine, adapted from the original ++ * by J. Hughes, from draft-ietf-ipsec-esp-des-md5-03.txt ++ * ++ * This is a routine that implements a 64 packet window. This is intend- ++ * ed on being an implementation sample. ++ */ ++ ++DEBUG_NO_STATIC int ++ipsec_checkreplaywindow(struct ipsec_sa*ipsp, __u32 seq) ++{ ++ __u32 diff; ++ ++ if (ipsp->ips_replaywin == 0) /* replay shut off */ ++ return 1; ++ if (seq == 0) ++ return 0; /* first == 0 or wrapped */ ++ ++ /* new larger sequence number */ ++ if (seq > ipsp->ips_replaywin_lastseq) { ++ return 1; /* larger is good */ ++ } ++ diff = ipsp->ips_replaywin_lastseq - seq; ++ ++ /* too old or wrapped */ /* if wrapped, kill off SA? */ ++ if (diff >= ipsp->ips_replaywin) { ++ return 0; ++ } ++ /* this packet already seen */ ++ if (ipsp->ips_replaywin_bitmap & (1 << diff)) ++ return 0; ++ return 1; /* out of order but good */ ++} ++ ++DEBUG_NO_STATIC int ++ipsec_updatereplaywindow(struct ipsec_sa*ipsp, __u32 seq) ++{ ++ __u32 diff; ++ ++ if (ipsp->ips_replaywin == 0) /* replay shut off */ ++ return 1; ++ if (seq == 0) ++ return 0; /* first == 0 or wrapped */ ++ ++ /* new larger sequence number */ ++ if (seq > ipsp->ips_replaywin_lastseq) { ++ diff = seq - ipsp->ips_replaywin_lastseq; ++ ++ /* In win, set bit for this pkt */ ++ if (diff < ipsp->ips_replaywin) ++ ipsp->ips_replaywin_bitmap = ++ (ipsp->ips_replaywin_bitmap << diff) | 1; ++ else ++ /* This packet has way larger seq num */ ++ ipsp->ips_replaywin_bitmap = 1; ++ ++ if(seq - ipsp->ips_replaywin_lastseq - 1 > ipsp->ips_replaywin_maxdiff) { ++ ipsp->ips_replaywin_maxdiff = seq - ipsp->ips_replaywin_lastseq - 1; ++ } ++ ipsp->ips_replaywin_lastseq = seq; ++ return 1; /* larger is good */ ++ } ++ diff = ipsp->ips_replaywin_lastseq - seq; ++ ++ /* too old or wrapped */ /* if wrapped, kill off SA? */ ++ if (diff >= ipsp->ips_replaywin) { ++/* ++ if(seq < 0.25*max && ipsp->ips_replaywin_lastseq > 0.75*max) { ++ ipsec_sa_delchain(ipsp); ++ } ++*/ ++ return 0; ++ } ++ /* this packet already seen */ ++ if (ipsp->ips_replaywin_bitmap & (1 << diff)) ++ return 0; ++ ipsp->ips_replaywin_bitmap |= (1 << diff); /* mark as seen */ ++ return 1; /* out of order but good */ ++} ++ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++struct auth_alg ipsec_rcv_md5[]={ ++ {osMD5Init, osMD5Update, osMD5Final, AHMD596_ALEN} ++}; ++ ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++struct auth_alg ipsec_rcv_sha1[]={ ++ {SHA1Init, SHA1Update, SHA1Final, AHSHA196_ALEN} ++}; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++ ++/* ++ * decapsulate a single layer of the system ++ * ++ * the following things should be setup to enter this function. ++ * ++ * irs->stats == stats structure (or NULL) ++ * irs->ipp = IP header. ++ * irs->len = total length of packet ++ * skb->nh.iph = ipp; ++ * skb->h.raw = start of payload ++ * irs->ipsp = NULL. ++ * irs->iphlen = N/A = is recalculated. ++ * irs->ilen = 0; ++ * irs->authlen = 0; ++ * irs->authfuncs = NULL; ++ * irs->skb = the skb; ++ * ++ * proto_funcs should be from ipsec_esp.c, ipsec_ah.c or ipsec_ipcomp.c. ++ * ++ */ ++enum ipsec_rcv_value ++ipsec_rcv_decap_once(struct ipsec_rcv_state *irs ++ , struct xform_functions *proto_funcs) ++{ ++ int iphlen; ++ __u8 proto; ++ struct in_addr ipsaddr; ++ struct in_addr ipdaddr; ++ int replay = 0; /* replay value in AH or ESP packet */ ++ struct ipsec_sa* ipsnext = NULL; /* next SA towards inside of packet */ ++ struct ipsec_sa *newipsp; ++ struct iphdr *ipp; ++ struct sk_buff *skb; ++ struct ipsec_alg_auth *ixt_a=NULL; ++ ++ skb = irs->skb; ++ irs->len = skb->len; ++ ipp = irs->ipp; ++ proto = ipp->protocol; ++ ipsaddr.s_addr = ipp->saddr; ++ addrtoa(ipsaddr, 0, irs->ipsaddr_txt, sizeof(irs->ipsaddr_txt)); ++ ipdaddr.s_addr = ipp->daddr; ++ addrtoa(ipdaddr, 0, irs->ipdaddr_txt, sizeof(irs->ipdaddr_txt)); ++ ++ iphlen = ipp->ihl << 2; ++ irs->iphlen=iphlen; ++ ipp->check = 0; /* we know the sum is good */ ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv_decap_once: " ++ "decap (%d) from %s -> %s\n", ++ proto, irs->ipsaddr_txt, irs->ipdaddr_txt); ++ ++ /* ++ * Find tunnel control block and (indirectly) call the ++ * appropriate tranform routine. The resulting sk_buf ++ * is a valid IP packet ready to go through input processing. ++ */ ++ ++ irs->said.dst.u.v4.sin_addr.s_addr = ipp->daddr; ++ irs->said.dst.u.v4.sin_family = AF_INET; ++ ++ /* note: rcv_checks set up the said.spi value, if appropriate */ ++ if(proto_funcs->rcv_checks) { ++ enum ipsec_rcv_value retval = ++ (*proto_funcs->rcv_checks)(irs, skb); ++ ++ if(retval < 0) { ++ return retval; ++ } ++ } ++ ++ irs->said.proto = proto; ++ irs->sa_len = satot(&irs->said, 0, irs->sa, sizeof(irs->sa)); ++ if(irs->sa_len == 0) { ++ strcpy(irs->sa, "(error)"); ++ } ++ ++ newipsp = ipsec_sa_getbyid(&irs->said); ++ if (newipsp == NULL) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "no ipsec_sa for SA:%s: incoming packet with no SA dropped\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_SAIDNOTFOUND; ++ } ++ ++ /* MCR - XXX this is bizarre. ipsec_sa_getbyid returned it, having ++ * incremented the refcount, why in the world would we decrement it ++ * here? */ ++ /* ipsec_sa_put(irs->ipsp);*/ /* incomplete */ ++ ++ /* If it is in larval state, drop the packet, we cannot process yet. */ ++ if(newipsp->ips_state == SADB_SASTATE_LARVAL) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "ipsec_sa in larval state, cannot be used yet, dropping packet.\n"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ ipsec_sa_put(newipsp); ++ return IPSEC_RCV_SAIDNOTLIVE; ++ } ++ ++ if(newipsp->ips_state == SADB_SASTATE_DEAD) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "ipsec_sa in dead state, cannot be used any more, dropping packet.\n"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ ipsec_sa_put(newipsp); ++ return IPSEC_RCV_SAIDNOTLIVE; ++ } ++ ++ if(sysctl_ipsec_inbound_policy_check) { ++ if(irs->ipp->saddr != ((struct sockaddr_in*)(newipsp->ips_addr_s))->sin_addr.s_addr) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s, src=%s of pkt does not agree with expected SA source address policy.\n", ++ irs->sa_len ? irs->sa : " (error)", ++ irs->ipsaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ ipsec_sa_put(newipsp); ++ return IPSEC_RCV_FAILEDINBOUND; ++ } ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s, src=%s of pkt agrees with expected SA source address policy.\n", ++ irs->sa_len ? irs->sa : " (error)", ++ irs->ipsaddr_txt); ++ ++ /* ++ * at this point, we have looked up a new SA, and we want to make sure that if this ++ * isn't the first SA in the list, that the previous SA actually points at this one. ++ */ ++ if(irs->ipsp) { ++ if(irs->ipsp->ips_inext != newipsp) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "unexpected SA:%s: does not agree with ips->inext policy, dropped\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ ipsec_sa_put(newipsp); ++ return IPSEC_RCV_FAILEDINBOUND; ++ } ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s grouping from previous SA is OK.\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ } else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s First SA in group.\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ } ++ ++ ++ ++ ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ if (proto == IPPROTO_ESP) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "natt_type=%u tdbp->ips_natt_type=%u : %s\n", ++ irs->natt_type, newipsp->ips_natt_type, ++ (irs->natt_type==newipsp->ips_natt_type)?"ok":"bad"); ++ if (irs->natt_type != newipsp->ips_natt_type) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s does not agree with expected NAT-T policy.\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ ipsec_sa_put(newipsp); ++ return IPSEC_RCV_FAILEDINBOUND; ++ } ++ } ++#endif ++ } ++ ++ /* okay, SA checks out, so free any previous SA, and record a new one*/ ++ ++ if(irs->ipsp) { ++ ipsec_sa_put(irs->ipsp); ++ } ++ irs->ipsp=newipsp; ++ ++ /* note that the outer code will free the irs->ipsp ++ if there is an error */ ++ ++ ++ /* now check the lifetimes */ ++ if(ipsec_lifetime_check(&irs->ipsp->ips_life.ipl_bytes, "bytes", ++ irs->sa, ipsec_life_countbased, ipsec_incoming, ++ irs->ipsp) == ipsec_life_harddied || ++ ipsec_lifetime_check(&irs->ipsp->ips_life.ipl_addtime, "addtime", ++ irs->sa, ipsec_life_timebased, ipsec_incoming, ++ irs->ipsp) == ipsec_life_harddied || ++ ipsec_lifetime_check(&irs->ipsp->ips_life.ipl_addtime, "usetime", ++ irs->sa, ipsec_life_timebased, ipsec_incoming, ++ irs->ipsp) == ipsec_life_harddied || ++ ipsec_lifetime_check(&irs->ipsp->ips_life.ipl_packets, "packets", ++ irs->sa, ipsec_life_countbased, ipsec_incoming, ++ irs->ipsp) == ipsec_life_harddied) { ++ ipsec_sa_delchain(irs->ipsp); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv_decap_once: " ++ "decap (%d) failed lifetime check\n", ++ proto); ++ ++ return IPSEC_RCV_LIFETIMEFAILED; ++ } ++ ++#if 0 ++ /* ++ * This is removed for some reasons: ++ * 1) it needs to happen *after* authentication. ++ * 2) do we really care, if it authenticates, if it came ++ * from the wrong location? ++ * 3) the NAT_KA messages in IKE will also get to pluto ++ * and it will figure out that stuff has moved. ++ * 4) the 2.6 udp-esp encap function does not pass us ++ * the originating port number, and I can't tell ++ * if skb->sk is guaranteed to be valid here. ++ * 2005-04-16: mcr@xelerance.com ++ */ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ /* ++ * ++ * XXX we should ONLY update pluto if the SA passes all checks, ++ * which we clearly do not now. ++ */ ++ if ((irs->natt_type) && ++ ( (irs->ipp->saddr != (((struct sockaddr_in*)(newipsp->ips_addr_s))->sin_addr.s_addr)) || ++ (irs->natt_sport != newipsp->ips_natt_sport) ++ )) { ++ struct sockaddr sipaddr; ++ struct sockaddr_in *psin = (struct sockaddr_in*)(newipsp->ips_addr_s); ++ ++ /** Advertise NAT-T addr change to pluto **/ ++ sipaddr.sa_family = AF_INET; ++ ((struct sockaddr_in*)&sipaddr)->sin_addr.s_addr = irs->ipp->saddr; ++ ((struct sockaddr_in*)&sipaddr)->sin_port = htons(irs->natt_sport); ++ pfkey_nat_t_new_mapping(newipsp, &sipaddr, irs->natt_sport); ++ ++ /** ++ * Then allow or block packet depending on ++ * sysctl_ipsec_inbound_policy_check. ++ * ++ * In all cases, pluto will update SA if new mapping is ++ * accepted. ++ */ ++ if (sysctl_ipsec_inbound_policy_check) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s, src=%s:%u of pkt does not agree with expected " ++ "SA source address [%08x:%u] (notifying pluto of change).\n", ++ irs->sa_len ? irs->sa : " (error)", ++ irs->ipsaddr_txt, irs->natt_sport, ++ psin->sin_addr.s_addr, ++ newipsp->ips_natt_sport); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ ipsec_sa_put(newipsp); ++ return IPSEC_RCV_FAILEDINBOUND; ++ } ++ } ++#endif ++#endif ++ ++ irs->authfuncs=NULL; ++ ++ /* authenticate, if required */ ++ if ((ixt_a=irs->ipsp->ips_alg_auth)) { ++ irs->authlen = AHHMAC_HASHLEN; ++ irs->authfuncs = NULL; ++ irs->ictx = NULL; ++ irs->octx = NULL; ++ irs->ictx_len = 0; ++ irs->octx_len = 0; ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "authalg=%d authlen=%d\n", ++ irs->ipsp->ips_authalg, ++ irs->authlen); ++ } else ++ switch(irs->ipsp->ips_authalg) { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: ++ irs->authlen = AHHMAC_HASHLEN; ++ irs->authfuncs = ipsec_rcv_md5; ++ irs->ictx = (void *)&((struct md5_ctx*)(irs->ipsp->ips_key_a))->ictx; ++ irs->octx = (void *)&((struct md5_ctx*)(irs->ipsp->ips_key_a))->octx; ++ irs->ictx_len = sizeof(((struct md5_ctx*)(irs->ipsp->ips_key_a))->ictx); ++ irs->octx_len = sizeof(((struct md5_ctx*)(irs->ipsp->ips_key_a))->octx); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: ++ irs->authlen = AHHMAC_HASHLEN; ++ irs->authfuncs = ipsec_rcv_sha1; ++ irs->ictx = (void *)&((struct sha1_ctx*)(irs->ipsp->ips_key_a))->ictx; ++ irs->octx = (void *)&((struct sha1_ctx*)(irs->ipsp->ips_key_a))->octx; ++ irs->ictx_len = sizeof(((struct sha1_ctx*)(irs->ipsp->ips_key_a))->ictx); ++ irs->octx_len = sizeof(((struct sha1_ctx*)(irs->ipsp->ips_key_a))->octx); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ case AH_NONE: ++ irs->authlen = 0; ++ irs->authfuncs = NULL; ++ irs->ictx = NULL; ++ irs->octx = NULL; ++ irs->ictx_len = 0; ++ irs->octx_len = 0; ++ break; ++ default: ++ irs->ipsp->ips_errs.ips_alg_errs += 1; ++ if(irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ return IPSEC_RCV_BADAUTH; ++ } ++ ++ /* ilen counts number of bytes in ESP portion */ ++ irs->ilen = ((irs->skb->data + irs->skb->len) - skb_transport_header(irs->skb)) - irs->authlen; ++ if(irs->ilen <= 0) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "runt %s packet with no data, dropping.\n", ++ (proto == IPPROTO_ESP ? "esp" : "ah")); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_BADLEN; ++ } ++ ++ if(irs->authfuncs || ixt_a) { ++ unsigned char *authenticator = NULL; ++ ++ if(proto_funcs->rcv_setup_auth) { ++ enum ipsec_rcv_value retval ++ = (*proto_funcs->rcv_setup_auth)(irs, skb, ++ &replay, ++ &authenticator); ++ if(retval < 0) { ++ return retval; ++ } ++ } ++ ++ if(!authenticator) { ++ irs->ipsp->ips_errs.ips_auth_errs += 1; ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_BADAUTH; ++ } ++ ++ if(!ipsec_checkreplaywindow(irs->ipsp, replay)) { ++ irs->ipsp->ips_errs.ips_replaywin_errs += 1; ++ KLIPS_PRINT(debug_rcv & DB_RX_REPLAY, ++ "klips_debug:ipsec_rcv: " ++ "duplicate frame from %s, packet dropped\n", ++ irs->ipsaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_REPLAYFAILED; ++ } ++ ++ /* ++ * verify authenticator ++ */ ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "encalg = %d, authalg = %d.\n", ++ irs->ipsp->ips_encalg, ++ irs->ipsp->ips_authalg); ++ ++ /* calculate authenticator */ ++ if(proto_funcs->rcv_calc_auth == NULL) { ++ return IPSEC_RCV_BADAUTH; ++ } ++ (*proto_funcs->rcv_calc_auth)(irs, skb); ++ ++ if (memcmp(irs->hash, authenticator, irs->authlen)) { ++ irs->ipsp->ips_errs.ips_auth_errs += 1; ++ KLIPS_PRINT(debug_rcv & DB_RX_INAU, ++ "klips_debug:ipsec_rcv: " ++ "auth failed on incoming packet from %s: hash=%08x%08x%08x auth=%08x%08x%08x, dropped\n", ++ irs->ipsaddr_txt, ++ ntohl(*(__u32*)&irs->hash[0]), ++ ntohl(*(__u32*)&irs->hash[4]), ++ ntohl(*(__u32*)&irs->hash[8]), ++ ntohl(*(__u32*)authenticator), ++ ntohl(*((__u32*)authenticator + 1)), ++ ntohl(*((__u32*)authenticator + 2))); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_AUTHFAILED; ++ } else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "authentication successful.\n"); ++ } ++ ++ /* Crypto hygiene: clear memory used to calculate autheticator. ++ * The length varies with the algorithm. ++ */ ++ memset(irs->hash, 0, irs->authlen); ++ ++ /* If the sequence number == 0, expire SA, it had rolled */ ++ if(irs->ipsp->ips_replaywin && !replay /* !irs->ipsp->ips_replaywin_lastseq */) { ++ ipsec_sa_delchain(irs->ipsp); ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "replay window counter rolled, expiring SA.\n"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_REPLAYROLLED; ++ } ++ ++ /* now update the replay counter */ ++ if (!ipsec_updatereplaywindow(irs->ipsp, replay)) { ++ irs->ipsp->ips_errs.ips_replaywin_errs += 1; ++ KLIPS_PRINT(debug_rcv & DB_RX_REPLAY, ++ "klips_debug:ipsec_rcv: " ++ "duplicate frame from %s, packet dropped\n", ++ irs->ipsaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_REPLAYROLLED; ++ } ++ } ++ ++ if(proto_funcs->rcv_decrypt) { ++ enum ipsec_rcv_value retval = ++ (*proto_funcs->rcv_decrypt)(irs); ++ ++ if(retval != IPSEC_RCV_OK) { ++ return retval; ++ } ++ } ++ ++ /* ++ * Adjust pointers ++ */ ++ skb = irs->skb; ++ irs->len = skb->len; ++ ipp = irs->ipp = ip_hdr(skb); ++ irs->iphlen = ipp->ihl<<2; ++ skb_set_transport_header(skb, ipsec_skb_offset(skb, skb_network_header(skb) + irs->iphlen)); ++ ++ /* zero any options that there might be */ ++ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); ++ ++ ipsaddr.s_addr = ipp->saddr; ++ addrtoa(ipsaddr, 0, irs->ipsaddr_txt, sizeof(irs->ipsaddr_txt)); ++ ipdaddr.s_addr = ipp->daddr; ++ addrtoa(ipdaddr, 0, irs->ipdaddr_txt, sizeof(irs->ipdaddr_txt)); ++ ++ /* ++ * Discard the original ESP/AH header ++ */ ++ ipp->protocol = irs->next_header; ++ ++ ipp->check = 0; /* NOTE: this will be included in checksum */ ++ ipp->check = ip_fast_csum((unsigned char *)ip_hdr(skb), irs->iphlen >> 2); ++ ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "after <%s%s%s>, SA:%s:\n", ++ IPS_XFORM_NAME(irs->ipsp), ++ irs->sa_len ? irs->sa : " (error)"); ++ KLIPS_IP_PRINT(debug_rcv & DB_RX_PKTRX, ipp); ++ ++ skb->protocol = htons(ETH_P_IP); ++ skb->ip_summed = 0; ++ ++ ipsnext = irs->ipsp->ips_inext; ++ if(sysctl_ipsec_inbound_policy_check) { ++ if(ipsnext) { ++ if( ++ ipp->protocol != IPPROTO_AH ++ && ipp->protocol != IPPROTO_ESP ++#ifdef CONFIG_KLIPS_IPCOMP ++ && ipp->protocol != IPPROTO_COMP ++ && (ipsnext->ips_said.proto != IPPROTO_COMP ++ || ipsnext->ips_inext) ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ && ipp->protocol != IPPROTO_IPIP ++ && ipp->protocol != IPPROTO_ATT_HEARTBEAT /* heartbeats to AT&T SIG/GIG */ ++ ) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "packet with incomplete policy dropped, last successful SA:%s.\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ return IPSEC_RCV_FAILEDINBOUND; ++ } ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s, Another IPSEC header to process.\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ } else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "No ips_inext from this SA:%s.\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ } ++ } ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++ /* update ipcomp ratio counters, even if no ipcomp packet is present */ ++ if (ipsnext ++ && ipsnext->ips_said.proto == IPPROTO_COMP ++ && ipp->protocol != IPPROTO_COMP) { ++ ipsnext->ips_comp_ratio_cbytes += ntohs(ipp->tot_len); ++ ipsnext->ips_comp_ratio_dbytes += ntohs(ipp->tot_len); ++ } ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++ irs->ipsp->ips_life.ipl_bytes.ipl_count += irs->len; ++ irs->ipsp->ips_life.ipl_bytes.ipl_last = irs->len; ++ ++ if(!irs->ipsp->ips_life.ipl_usetime.ipl_count) { ++ irs->ipsp->ips_life.ipl_usetime.ipl_count = jiffies / HZ; ++ } ++ irs->ipsp->ips_life.ipl_usetime.ipl_last = jiffies / HZ; ++ irs->ipsp->ips_life.ipl_packets.ipl_count += 1; ++ ++#ifdef CONFIG_NETFILTER ++ if(proto == IPPROTO_ESP || proto == IPPROTO_AH) { ++ skb->nfmark = (skb->nfmark & (~(IPsecSAref2NFmark(IPSEC_SA_REF_MASK)))) ++ | IPsecSAref2NFmark(IPsecSA2SAref(irs->ipsp)); ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "%s SA sets skb->nfmark=0x%x.\n", ++ proto == IPPROTO_ESP ? "ESP" : "AH", ++ (unsigned)skb->nfmark); ++ } ++#endif /* CONFIG_NETFILTER */ ++ ++ return IPSEC_RCV_OK; ++} ++ ++ ++/* ++ * core decapsulation loop for all protocols. ++ * ++ * the following things should be setup to enter this function. ++ * ++ * irs->stats == stats structure (or NULL) ++ * irs->ipp = IP header. ++ * irs->ipsp = NULL. ++ * irs->ilen = 0; ++ * irs->authlen = 0; ++ * irs->authfuncs = NULL; ++ * irs->skb = skb; ++ * skb->nh.iph = ipp; ++ * skb->h.raw = start of payload ++ * ++ */ ++int ipsec_rcv_decap(struct ipsec_rcv_state *irs) ++{ ++ struct ipsec_sa *ipsp = NULL; ++ struct ipsec_sa* ipsnext = NULL; ++ struct in_addr ipsaddr; ++ struct in_addr ipdaddr; ++ struct iphdr *ipp; ++ struct sk_buff *skb = NULL; ++ ++ /* begin decapsulating loop here */ ++ ++ /* ++ The spinlock is to prevent any other process from ++ accessing or deleting the ipsec_sa hash table or any of the ++ ipsec_sa s while we are using and updating them. ++ ++ This is not optimal, but was relatively straightforward ++ at the time. A better way to do it has been planned for ++ more than a year, to lock the hash table and put reference ++ counts on each ipsec_sa instead. This is not likely to happen ++ in KLIPS1 unless a volunteer contributes it, but will be ++ designed into KLIPS2. ++ */ ++ spin_lock(&tdb_lock); ++ ++ do { ++ int decap_stat; ++ struct xform_functions *proto_funcs; ++ ++ switch(irs->ipp->protocol) { ++ case IPPROTO_ESP: ++ proto_funcs = esp_xform_funcs; ++ break; ++ ++#ifdef CONFIG_KLIPS_AH ++ case IPPROTO_AH: ++ proto_funcs = ah_xform_funcs; ++ break; ++#endif /* !CONFIG_KLIPS_AH */ ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++ case IPPROTO_COMP: ++ proto_funcs = ipcomp_xform_funcs; ++ break; ++#endif /* !CONFIG_KLIPS_IPCOMP */ ++ default: ++ if(irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ decap_stat = IPSEC_RCV_BADPROTO; ++ goto rcvleave; ++ } ++ ++ decap_stat = ipsec_rcv_decap_once(irs, proto_funcs); ++ ++ if(decap_stat != IPSEC_RCV_OK) { ++ spin_unlock(&tdb_lock); ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: decap_once failed: %d\n", ++ decap_stat); ++ ++ goto rcvleave; ++ } ++ /* end decapsulation loop here */ ++ } while( (irs->ipp->protocol == IPPROTO_ESP ) ++ || (irs->ipp->protocol == IPPROTO_AH ) ++#ifdef CONFIG_KLIPS_IPCOMP ++ || (irs->ipp->protocol == IPPROTO_COMP) ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ); ++ ++ /* set up for decap loop */ ++ ipp =irs->ipp; ++ ipsp =irs->ipsp; ++ ipsnext = ipsp->ips_inext; ++ skb = irs->skb; ++ ++ /* if there is an IPCOMP, but we don't have an IPPROTO_COMP, ++ * then we can just skip it ++ */ ++#ifdef CONFIG_KLIPS_IPCOMP ++ if(ipsnext && ipsnext->ips_said.proto == IPPROTO_COMP) { ++ ipsp = ipsnext; ++ ipsnext = ipsp->ips_inext; ++ } ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ if ((irs->natt_type) && (ipp->protocol != IPPROTO_IPIP)) { ++ /** ++ * NAT-Traversal and Transport Mode: ++ * we need to correct TCP/UDP checksum ++ * ++ * If we've got NAT-OA, we can fix checksum without recalculation. ++ */ ++ __u32 natt_oa = ipsp->ips_natt_oa ? ++ ((struct sockaddr_in*)(ipsp->ips_natt_oa))->sin_addr.s_addr : 0; ++ __u16 pkt_len = skb_tail_pointer(skb) - (unsigned char *)ipp; ++ __u16 data_len = pkt_len - (ipp->ihl << 2); ++ ++ switch (ipp->protocol) { ++ case IPPROTO_TCP: ++ if (data_len >= sizeof(struct tcphdr)) { ++ struct tcphdr *tcp = tcp_hdr(skb); ++ if (natt_oa) { ++ __u32 buff[2] = { ~natt_oa, ipp->saddr }; ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: " ++ "fix TCP checksum using NAT-OA\n"); ++ tcp->check = csum_fold( ++ csum_partial((unsigned char *)buff, sizeof(buff), ++ tcp->check^0xffff)); ++ } ++ else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: recalc TCP checksum\n"); ++ if (pkt_len > (ntohs(ipp->tot_len))) ++ data_len -= (pkt_len - ntohs(ipp->tot_len)); ++ tcp->check = 0; ++ tcp->check = csum_tcpudp_magic(ipp->saddr, ipp->daddr, ++ data_len, IPPROTO_TCP, ++ csum_partial((unsigned char *)tcp, data_len, 0)); ++ } ++ } ++ else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: can't fix TCP checksum\n"); ++ } ++ break; ++ case IPPROTO_UDP: ++ if (data_len >= sizeof(struct udphdr)) { ++ struct udphdr *udp = udp_hdr(skb); ++ if (udp->check == 0) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: UDP checksum already 0\n"); ++ } ++ else if (natt_oa) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: " ++ "fix UDP checksum using NAT-OA\n"); ++#ifdef DISABLE_UDP_CHECKSUM ++ udp->check=0; ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: " ++ "UDP checksum using NAT-OA disabled at compile time\n"); ++#else ++ { ++ __u32 buff[2] = { ~natt_oa, ipp->saddr }; ++ ++ udp->check = csum_fold( ++ csum_partial((unsigned char *)buff, sizeof(buff), ++ udp->check^0xffff)); ++ } ++#endif ++ } ++ else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: zero UDP checksum\n"); ++ udp->check = 0; ++ } ++ } ++ else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: can't fix UDP checksum\n"); ++ } ++ break; ++ default: ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NAT-T & TRANSPORT: non TCP/UDP packet -- do nothing\n"); ++ break; ++ } ++ } ++#endif ++ ++ /* ++ * XXX this needs to be locked from when it was first looked ++ * up in the decapsulation loop. Perhaps it is better to put ++ * the IPIP decap inside the loop. ++ */ ++ if(ipsnext) { ++ ipsp = ipsnext; ++ irs->sa_len = KLIPS_SATOT(debug_rcv, &irs->said, 0, irs->sa, sizeof(irs->sa)); ++ if((ipp->protocol != IPPROTO_IPIP) && ++ (ipp->protocol != IPPROTO_ATT_HEARTBEAT)) { /* AT&T heartbeats to SIG/GIG */ ++ spin_unlock(&tdb_lock); ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s, Hey! How did this get through? Dropped.\n", ++ irs->sa_len ? irs->sa : " (error)"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ goto rcvleave; ++ } ++ if(sysctl_ipsec_inbound_policy_check) { ++ struct sockaddr_in *psin = (struct sockaddr_in*)(ipsp->ips_addr_s); ++ if((ipsnext = ipsp->ips_inext)) { ++ char sa2[SATOT_BUF]; ++ size_t sa_len2; ++ sa_len2 = KLIPS_SATOT(debug_rcv, &ipsnext->ips_said, 0, sa2, sizeof(sa2)); ++ spin_unlock(&tdb_lock); ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "unexpected SA:%s after IPIP SA:%s\n", ++ sa_len2 ? sa2 : " (error)", ++ irs->sa_len ? irs->sa : " (error)"); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ goto rcvleave; ++ } ++ if(ipp->saddr != psin->sin_addr.s_addr) { ++ spin_unlock(&tdb_lock); ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s, src=%s(%08x) does not match expected 0x%08x.\n", ++ irs->sa_len ? irs->sa : " (error)", ++ irs->ipsaddr_txt, ++ ipp->saddr, psin->sin_addr.s_addr); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ goto rcvleave; ++ } ++ } ++ ++ if(ipp->protocol == IPPROTO_IPIP) /* added to support AT&T heartbeats to SIG/GIG */ ++ { ++ /* ++ * XXX this needs to be locked from when it was first looked ++ * up in the decapsulation loop. Perhaps it is better to put ++ * the IPIP decap inside the loop. ++ */ ++ ipsp->ips_life.ipl_bytes.ipl_count += skb->len; ++ ipsp->ips_life.ipl_bytes.ipl_last = skb->len; ++ ++ if(!ipsp->ips_life.ipl_usetime.ipl_count) { ++ ipsp->ips_life.ipl_usetime.ipl_count = jiffies / HZ; ++ } ++ ipsp->ips_life.ipl_usetime.ipl_last = jiffies / HZ; ++ ipsp->ips_life.ipl_packets.ipl_count += 1; ++ ++ if(skb->len < irs->iphlen) { ++ spin_unlock(&tdb_lock); ++ printk(KERN_WARNING "klips_debug:ipsec_rcv: " ++ "tried to skb_pull iphlen=%d, %d available. This should never happen, please report.\n", ++ irs->iphlen, ++ (int)(skb->len)); ++ ++ goto rcvleave; ++ } ++ ++ /* ++ * we need to pull up by size of IP header, ++ * options, but also by any UDP/ESP encap there might ++ * have been, and this deals with all cases. ++ */ ++ skb_pull(skb, (skb_transport_header(skb) - skb_network_header(skb))); ++ ++ /* new L3 header is where L4 payload was */ ++ skb_set_network_header(skb, ipsec_skb_offset(skb, skb_transport_header(skb))); ++ ++ /* now setup new L4 payload location */ ++ ipp = (struct iphdr *)skb_network_header(skb); ++ skb_set_transport_header(skb, ipsec_skb_offset(skb, skb_network_header(skb) + (ipp->ihl << 2))); ++ ++ ++ /* remove any saved options that we might have, ++ * since we have a new IP header. ++ */ ++ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); ++ ++#if 0 ++ KLIPS_PRINT(debug_rcv, "csum: %d\n", ip_fast_csum((u8 *)ipp, ipp->ihl)); ++#endif ++ ++ /* re-do any strings for debugging */ ++ ipsaddr.s_addr = ipp->saddr; ++ if (debug_rcv) ++ addrtoa(ipsaddr, 0, irs->ipsaddr_txt, sizeof(irs->ipsaddr_txt)); ++ ipdaddr.s_addr = ipp->daddr; ++ if (debug_rcv) ++ addrtoa(ipdaddr, 0, irs->ipdaddr_txt, sizeof(irs->ipdaddr_txt)); ++ ++ skb->protocol = htons(ETH_P_IP); ++ skb->ip_summed = 0; ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "IPIP tunnel stripped.\n"); ++ KLIPS_IP_PRINT(debug_rcv & DB_RX_PKTRX, ipp); ++ } ++ ++ if(sysctl_ipsec_inbound_policy_check ++ /* ++ Note: "xor" (^) logically replaces "not equal" ++ (!=) and "bitwise or" (|) logically replaces ++ "boolean or" (||). This is done to speed up ++ execution by doing only bitwise operations and ++ no branch operations ++ */ ++ && (((ipp->saddr & ipsp->ips_mask_s.u.v4.sin_addr.s_addr) ++ ^ ipsp->ips_flow_s.u.v4.sin_addr.s_addr) ++ | ((ipp->daddr & ipsp->ips_mask_d.u.v4.sin_addr.s_addr) ++ ^ ipsp->ips_flow_d.u.v4.sin_addr.s_addr)) ) ++ { ++ char sflow_txt[SUBNETTOA_BUF], dflow_txt[SUBNETTOA_BUF]; ++ ++ subnettoa(ipsp->ips_flow_s.u.v4.sin_addr, ++ ipsp->ips_mask_s.u.v4.sin_addr, ++ 0, sflow_txt, sizeof(sflow_txt)); ++ subnettoa(ipsp->ips_flow_d.u.v4.sin_addr, ++ ipsp->ips_mask_d.u.v4.sin_addr, ++ 0, dflow_txt, sizeof(dflow_txt)); ++ spin_unlock(&tdb_lock); ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "SA:%s, inner tunnel policy [%s -> %s] does not agree with pkt contents [%s -> %s].\n", ++ irs->sa_len ? irs->sa : " (error)", ++ sflow_txt, ++ dflow_txt, ++ irs->ipsaddr_txt, ++ irs->ipdaddr_txt); ++ if(irs->stats) { ++ irs->stats->rx_dropped++; ++ } ++ goto rcvleave; ++ } ++#ifdef CONFIG_NETFILTER ++ skb->nfmark = (skb->nfmark & (~(IPsecSAref2NFmark(IPSEC_SA_REF_TABLE_MASK)))) ++ | IPsecSAref2NFmark(IPsecSA2SAref(ipsp)); ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "IPIP SA sets skb->nfmark=0x%x.\n", ++ (unsigned)skb->nfmark); ++#endif /* CONFIG_NETFILTER */ ++ } ++ ++ spin_unlock(&tdb_lock); ++ ++ if(irs->stats) { ++ irs->stats->rx_bytes += skb->len; ++ } ++ if(skb->dst) { ++ dst_release(skb->dst); ++ skb->dst = NULL; ++ } ++ skb->pkt_type = PACKET_HOST; ++ if(irs->hard_header_len && ++ (skb_mac_header(skb) != (skb_network_header(skb) - irs->hard_header_len)) && ++ (irs->hard_header_len <= skb_headroom(skb))) { ++ /* copy back original MAC header */ ++ memmove(skb_network_header(skb) - irs->hard_header_len, ++ skb_mac_header(skb), irs->hard_header_len); ++ skb_set_mac_header(skb, ipsec_skb_offset(skb, skb_network_header(skb) - irs->hard_header_len)); ++ } ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++ if(ipp->protocol == IPPROTO_COMP) { ++ unsigned int flags = 0; ++ ++ if(sysctl_ipsec_inbound_policy_check) { ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "inbound policy checking enabled, IPCOMP follows IPIP, dropped.\n"); ++ if (irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ goto rcvleave; ++ } ++ /* ++ XXX need a ipsec_sa for updating ratio counters but it is not ++ following policy anyways so it is not a priority ++ */ ++ skb = skb_decompress(skb, NULL, &flags); ++ if (!skb || flags) { ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "skb_decompress() returned error flags: %d, dropped.\n", ++ flags); ++ if (irs->stats) { ++ irs->stats->rx_errors++; ++ } ++ goto rcvleave; ++ } ++ } ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++ /* ++ * make sure that data now starts at IP header, since we are going ++ * to pass this back to ip_input (aka netif_rx). Rules for what the ++ * pointers wind up a different for 2.6 vs 2.4, so we just fudge it here. ++ */ ++#ifdef NET_26 ++ irs->skb->data = skb_push(irs->skb, skb_transport_header(irs->skb) - skb_network_header(irs->skb)); ++#else ++ irs->skb->data = skb_network_header(irs->skb); ++ { ++ struct iphdr *iph = ip_hdr(irs->skb); ++ int len = ntohs(iph->tot_len); ++ irs->skb->len = len; ++ } ++#endif ++ ++#ifdef SKB_RESET_NFCT ++ nf_conntrack_put(skb->nfct); ++ skb->nfct = NULL; ++#if defined(CONFIG_NETFILTER_DEBUG) && defined(HAVE_SKB_NF_DEBUG) ++ skb->nf_debug = 0; ++#endif /* CONFIG_NETFILTER_DEBUG */ ++#endif /* SKB_RESET_NFCT */ ++ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX, ++ "klips_debug:ipsec_rcv: " ++ "netif_rx() called.\n"); ++ netif_rx(skb); ++ skb=NULL; ++ ++ rcvleave: ++ if(skb) { ++ ipsec_kfree_skb(skb); ++ } ++ ++ /* KLIPS_DEC_USE; Artifact from refactor? bug # 454 */ ++ return(0); ++} ++ ++struct sk_buff *ipsec_rcv_unclone(struct sk_buff *skb, ++ struct ipsec_rcv_state *irs) ++{ ++ /* if skb was cloned (most likely due to a packet sniffer such as ++ tcpdump being momentarily attached to the interface), make ++ a copy of our own to modify */ ++ if(skb_cloned(skb)) { ++ /* include any mac header while copying.. */ ++ if(skb_headroom(skb) < irs->hard_header_len) { ++ printk(KERN_WARNING "klips_error:ipsec_rcv: " ++ "tried to skb_push hhlen=%d, %d available. This should never happen, please report.\n", ++ irs->hard_header_len, ++ skb_headroom(skb)); ++ goto rcvleave; ++ } ++ skb_push(skb, irs->hard_header_len); ++ if ++#ifdef SKB_COW_NEW ++ (skb_cow(skb, skb_headroom(skb)) != 0) ++#else /* SKB_COW_NEW */ ++ ((skb = skb_cow(skb, skb_headroom(skb))) == NULL) ++#endif /* SKB_COW_NEW */ ++ { ++ goto rcvleave; ++ } ++ if(skb->len < irs->hard_header_len) { ++ printk(KERN_WARNING "klips_error:ipsec_rcv: " ++ "tried to skb_pull hhlen=%d, %d available. This should never happen, please report.\n", ++ irs->hard_header_len, ++ skb->len); ++ goto rcvleave; ++ } ++ skb_pull(skb, irs->hard_header_len); ++ } ++ return skb; ++ ++rcvleave: ++ ipsec_kfree_skb(skb); ++ return NULL; ++} ++ ++ ++#if !defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++/* ++ * decapsulate a UDP encapsulated ESP packet ++ */ ++struct sk_buff *ipsec_rcv_natt_decap(struct sk_buff *skb ++ , struct ipsec_rcv_state *irs ++ , int *udp_decap_ret_p) ++{ ++ *udp_decap_ret_p = 0; ++ if (skb->sk && skb->nh.iph && skb->nh.iph->protocol==IPPROTO_UDP) { ++ /** ++ * Packet comes from udp_queue_rcv_skb so it is already defrag, ++ * checksum verified, ... (ie safe to use) ++ * ++ * If the packet is not for us, return -1 and udp_queue_rcv_skb ++ * will continue to handle it (do not kfree skb !!). ++ */ ++ ++#ifndef UDP_OPT_IN_SOCK ++ struct udp_opt { ++ __u32 esp_in_udp; ++ }; ++ struct udp_opt *tp = (struct udp_opt *)&(skb->sk->tp_pinfo.af_tcp); ++#else ++ struct udp_opt *tp = &(skb->sk->tp_pinfo.af_udp); ++#endif ++ ++ struct iphdr *ip = (struct iphdr *)skb->nh.iph; ++ struct udphdr *udp = (struct udphdr *)((__u32 *)ip+ip->ihl); ++ __u8 *udpdata = (__u8 *)udp + sizeof(struct udphdr); ++ __u32 *udpdata32 = (__u32 *)udpdata; ++ ++ irs->natt_sport = ntohs(udp->source); ++ irs->natt_dport = ntohs(udp->dest); ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "suspected ESPinUDP packet (NAT-Traversal) [%d].\n", ++ tp->esp_in_udp); ++ KLIPS_IP_PRINT(debug_rcv, ip); ++ ++ if (udpdata < skb->tail) { ++ unsigned int len = skb->tail - udpdata; ++ if ((len==1) && (udpdata[0]==0xff)) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ /* not IPv6 compliant message */ ++ "NAT-keepalive from %d.%d.%d.%d.\n", NIPQUAD(ip->saddr)); ++ *udp_decap_ret_p = 0; ++ return NULL; ++ } ++ else if ( (tp->esp_in_udp == ESPINUDP_WITH_NON_IKE) && ++ (len > (2*sizeof(__u32) + sizeof(struct esphdr))) && ++ (udpdata32[0]==0) && (udpdata32[1]==0) ) { ++ /* ESP Packet with Non-IKE header */ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "ESPinUDP pkt with Non-IKE - spi=0x%x\n", ++ ntohl(udpdata32[2])); ++ irs->natt_type = ESPINUDP_WITH_NON_IKE; ++ irs->natt_len = sizeof(struct udphdr)+(2*sizeof(__u32)); ++ } ++ else if ( (tp->esp_in_udp == ESPINUDP_WITH_NON_ESP) && ++ (len > sizeof(struct esphdr)) && ++ (udpdata32[0]!=0) ) { ++ /* ESP Packet without Non-ESP header */ ++ irs->natt_type = ESPINUDP_WITH_NON_ESP; ++ irs->natt_len = sizeof(struct udphdr); ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "ESPinUDP pkt without Non-ESP - spi=0x%x\n", ++ ntohl(udpdata32[0])); ++ } ++ else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "IKE packet - not handled here\n"); ++ *udp_decap_ret_p = -1; ++ return NULL; ++ } ++ } ++ else { ++ return NULL; ++ } ++ } ++ return skb; ++} ++#endif ++ ++ ++int ++ipsec_rcv(struct sk_buff *skb ++#ifndef PROTO_HANDLER_SINGLE_PARM ++ unsigned short xlen ++#endif /* PROTO_HANDLER_SINGLE_PARM */ ++ ) ++{ ++#ifdef CONFIG_KLIPS_DEBUG ++ struct net_device *dev = skb->dev; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ unsigned char protoc; ++ struct net_device_stats *stats = NULL; /* This device's statistics */ ++ struct net_device *ipsecdev = NULL, *prvdev; ++ struct ipsecpriv *prv; ++ struct ipsec_rcv_state nirs, *irs = &nirs; ++ struct iphdr *ipp; ++ char name[9]; ++ int i; ++ ++ /* Don't unlink in the middle of a turnaround */ ++ KLIPS_INC_USE; ++ ++ memset(&nirs, 0, sizeof(struct ipsec_rcv_state)); ++ ++ if (skb == NULL) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NULL skb passed in.\n"); ++ goto rcvleave; ++ } ++ ++ if (skb->data == NULL) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "NULL skb->data passed in, packet is bogus, dropping.\n"); ++ goto rcvleave; ++ } ++ ++#if defined(CONFIG_IPSEC_NAT_TRAVERSAL) && !defined(NET_26) ++ { ++ /* NET_26 NAT-T is handled by seperate function */ ++ struct sk_buff *nskb; ++ int udp_decap_ret = 0; ++ ++ nskb = ipsec_rcv_natt_decap(skb, irs, &udp_decap_ret); ++ if(nskb == NULL) { ++ /* return with non-zero, because UDP.c code ++ * need to send it upstream. ++ */ ++ if(skb && udp_decap_ret == 0) { ++ ipsec_kfree_skb(skb); ++ } ++ KLIPS_DEC_USE; ++ return(udp_decap_ret); ++ } ++ skb = nskb; ++ } ++#endif /* NAT_T */ ++ ++ /* dev->hard_header_len is unreliable and should not be used */ ++ /* klips26_rcv_encap will have already set hard_header_len for us?? */ ++ if (irs->hard_header_len == 0) { ++ irs->hard_header_len = skb_mac_header(skb) ? (skb_network_header(skb) - skb_mac_header(skb)) : 0; ++ if((irs->hard_header_len < 0) || (irs->hard_header_len > skb_headroom(skb))) ++ irs->hard_header_len = 0; ++ } ++ ++ skb = ipsec_rcv_unclone(skb, irs); ++ if(skb == NULL) { ++ goto rcvleave; ++ } ++ ++#if IP_FRAGMENT_LINEARIZE ++ /* In Linux 2.4.4, we may have to reassemble fragments. They are ++ not assembled automatically to save TCP from having to copy ++ twice. ++ */ ++ if (skb_is_nonlinear(skb)) { ++#ifdef HAVE_NEW_SKB_LINEARIZE ++ if (skb_linearize_cow(skb) != 0) ++#else ++ if (skb_linearize(skb, GFP_ATOMIC) != 0) ++#endif ++ { ++ goto rcvleave; ++ } ++ } ++#endif /* IP_FRAGMENT_LINEARIZE */ ++ ++#if defined(CONFIG_IPSEC_NAT_TRAVERSAL) && !defined(NET_26) ++ if (irs->natt_len) { ++ /** ++ * Now, we are sure packet is ESPinUDP, and we have a private ++ * copy that has been linearized, remove natt_len bytes ++ * from packet and modify protocol to ESP. ++ */ ++ if (((unsigned char *)skb->data > (unsigned char *)skb->nh.iph) ++ && ((unsigned char *)skb->nh.iph > (unsigned char *)skb->head)) ++ { ++ unsigned int _len = (unsigned char *)skb->data - ++ (unsigned char *)skb->nh.iph; ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: adjusting skb: skb_push(%u)\n", ++ _len); ++ skb_push(skb, _len); ++ } ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "removing %d bytes from ESPinUDP packet\n", irs->natt_len); ++ ipp = skb->nh.iph; ++ irs->iphlen = ipp->ihl << 2; ++ ipp->tot_len = htons(ntohs(ipp->tot_len) - irs->natt_len); ++ if (skb->len < irs->iphlen + irs->natt_len) { ++ printk(KERN_WARNING ++ "klips_error:ipsec_rcv: " ++ "ESPinUDP packet is too small (%d < %d+%d). " ++ "This should never happen, please report.\n", ++ (int)(skb->len), irs->iphlen, irs->natt_len); ++ goto rcvleave; ++ } ++ ++ /* advance payload pointer to point past the UDP header */ ++ skb->h.raw = skb->h.raw + irs->natt_len; ++ ++ /* modify protocol */ ++ ipp->protocol = IPPROTO_ESP; ++ ++ skb->sk = NULL; ++ ++ KLIPS_IP_PRINT(debug_rcv, skb->nh.iph); ++ } ++#endif ++ ++ /* ipp = skb->nh.iph; */ ++ ipp = ip_hdr(skb); ++ ++ { ++ struct in_addr ipsaddr; ++ struct in_addr ipdaddr; ++ ++ ipsaddr.s_addr = ipp->saddr; ++ addrtoa(ipsaddr, 0, irs->ipsaddr_txt ++ , sizeof(irs->ipsaddr_txt)); ++ ipdaddr.s_addr = ipp->daddr; ++ addrtoa(ipdaddr, 0, irs->ipdaddr_txt ++ , sizeof(irs->ipdaddr_txt)); ++ } ++ ++ irs->iphlen = ipp->ihl << 2; ++ ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "<<< Info -- "); ++ KLIPS_PRINTMORE(debug_rcv && skb->dev, "skb->dev=%s ", ++ skb->dev->name ? skb->dev->name : "NULL"); ++ KLIPS_PRINTMORE(debug_rcv && dev, "dev=%s ", ++ dev->name ? dev->name : "NULL"); ++ KLIPS_PRINTMORE(debug_rcv, "\n"); ++ ++ KLIPS_PRINT(debug_rcv && !(skb->dev && dev && (skb->dev == dev)), ++ "klips_debug:ipsec_rcv: " ++ "Informational -- **if this happens, find out why** skb->dev:%s is not equal to dev:%s\n", ++ skb->dev ? (skb->dev->name ? skb->dev->name : "NULL") : "NULL", ++ dev ? (dev->name ? dev->name : "NULL") : "NULL"); ++ ++ protoc = ipp->protocol; ++#ifndef NET_21 ++ if((!protocol) || (protocol->protocol != protoc)) { ++ KLIPS_PRINT(debug_rcv & DB_RX_IPSA, ++ "klips_debug:ipsec_rcv: " ++ "protocol arg is NULL or unequal to the packet contents, this is odd, using value in packet.\n"); ++ } ++#endif /* !NET_21 */ ++ ++ if( (protoc != IPPROTO_AH) && ++#ifdef CONFIG_KLIPS_IPCOMP_disabled_until_we_register_IPCOMP_HANDLER ++ (protoc != IPPROTO_COMP) && ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ (protoc != IPPROTO_ESP) ) { ++ KLIPS_PRINT(debug_rcv & DB_RX_IPSA, ++ "klips_debug:ipsec_rcv: Why the hell is someone " ++ "passing me a non-ipsec protocol = %d packet? -- dropped.\n", ++ protoc); ++ goto rcvleave; ++ } ++ ++ if(skb->dev) { ++ for(i = 0; i < IPSEC_NUM_IF; i++) { ++ sprintf(name, IPSEC_DEV_FORMAT, i); ++ if(!strcmp(name, skb->dev->name)) { ++ prv = (struct ipsecpriv *)(skb->dev->priv); ++ if(prv) { ++ stats = (struct net_device_stats *) &(prv->mystats); ++ } ++ ipsecdev = skb->dev; ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "Info -- pkt already proc'ed a group of ipsec headers, processing next group of ipsec headers.\n"); ++ break; ++ } ++ if((ipsecdev = __ipsec_dev_get(name)) == NULL) { ++ KLIPS_PRINT(debug_rcv, ++ "klips_error:ipsec_rcv: " ++ "device %s does not exist\n", ++ name); ++ } ++ prv = ipsecdev ? (struct ipsecpriv *)(ipsecdev->priv) : NULL; ++ prvdev = prv ? (struct net_device *)(prv->dev) : NULL; ++ ++#if 0 ++ KLIPS_PRINT(debug_rcv && prvdev, ++ "klips_debug:ipsec_rcv: " ++ "physical device for device %s is %s\n", ++ name, ++ prvdev->name); ++#endif ++ if(prvdev && skb->dev && ++ !strcmp(prvdev->name, skb->dev->name)) { ++ stats = prv ? ((struct net_device_stats *) &(prv->mystats)) : NULL; ++ skb->dev = ipsecdev; ++ KLIPS_PRINT(debug_rcv && prvdev, ++ "klips_debug:ipsec_rcv: " ++ "assigning packet ownership to virtual device %s from physical device %s.\n", ++ name, prvdev->name); ++ if(stats) { ++ stats->rx_packets++; ++ } ++ break; ++ } ++ } ++ } else { ++ KLIPS_PRINT(debug_rcv, ++ "klips_debug:ipsec_rcv: " ++ "device supplied with skb is NULL\n"); ++ } ++ ++ if(stats == NULL) { ++ KLIPS_PRINT((debug_rcv), ++ "klips_error:ipsec_rcv: " ++ "packet received from physical I/F (%s) not connected to ipsec I/F. Cannot record stats. May not have SA for decoding. Is IPSEC traffic expected on this I/F? Check routing.\n", ++ skb->dev ? (skb->dev->name ? skb->dev->name : "NULL") : "NULL"); ++ } ++ ++ KLIPS_IP_PRINT(debug_rcv, ipp); ++ ++ /* set up for decap loop */ ++ irs->stats= stats; ++ irs->ipp = ipp; ++ irs->ipsp = NULL; ++ irs->ilen = 0; ++ irs->authlen=0; ++ irs->authfuncs=NULL; ++ irs->skb = skb; ++ ++ ipsec_rcv_decap(irs); ++ KLIPS_DEC_USE; ++ return(0); ++ ++ rcvleave: ++ if(skb) { ++ ipsec_kfree_skb(skb); ++ } ++ KLIPS_DEC_USE; ++ return(0); ++ ++} ++ ++#ifdef NET_26 ++/* ++ * this entry point is not a protocol entry point, so the entry ++ * is a bit different. ++ * ++ * skb->iph->tot_len has been byte-swapped, and reduced by the size of ++ * the IP header (and options). ++ * ++ * skb->h.raw has been pulled up the ESP header. ++ * ++ * skb->iph->protocol = 50 IPPROTO_ESP; ++ * ++ */ ++int klips26_rcv_encap(struct sk_buff *skb, __u16 encap_type) ++{ ++ struct ipsec_rcv_state nirs, *irs = &nirs; ++ struct iphdr *ipp; ++ ++ /* Don't unlink in the middle of a turnaround */ ++ KLIPS_INC_USE; ++ ++ memset(irs, 0, sizeof(*irs)); ++ ++ /* XXX fudge it so that all nat-t stuff comes from ipsec0 */ ++ /* eventually, the SA itself will determine which device ++ * it comes from ++ */ ++ { ++ skb->dev = ipsec_get_device(0); ++ } ++ ++ /* set up for decap loop */ ++ irs->hard_header_len = skb->dev->hard_header_len; ++ ++ skb = ipsec_rcv_unclone(skb, irs); ++ ++#if IP_FRAGMENT_LINEARIZE ++ /* In Linux 2.4.4, we may have to reassemble fragments. They are ++ not assembled automatically to save TCP from having to copy ++ twice. ++ */ ++ if (skb_is_nonlinear(skb)) { ++#ifdef HAVE_NEW_SKB_LINEARIZE ++ if (skb_linearize_cow(skb) != 0) ++#else ++ if (skb_linearize(skb, GFP_ATOMIC) != 0) ++#endif ++ { ++ goto rcvleave; ++ } ++ } ++#endif /* IP_FRAGMENT_LINEARIZE */ ++ ++ /* ipp = skb->nh.iph; */ ++ ipp =ip_hdr(skb); ++ ++ { ++ struct in_addr ipsaddr; ++ struct in_addr ipdaddr; ++ ++ ipsaddr.s_addr = ipp->saddr; ++ addrtoa(ipsaddr, 0, irs->ipsaddr_txt ++ , sizeof(irs->ipsaddr_txt)); ++ ipdaddr.s_addr = ipp->daddr; ++ addrtoa(ipdaddr, 0, irs->ipdaddr_txt ++ , sizeof(irs->ipdaddr_txt)); ++ } ++ ++ irs->iphlen = ipp->ihl << 2; ++ ++ KLIPS_IP_PRINT(debug_rcv, ipp); ++ ++ irs->stats= NULL; ++ irs->ipp = ipp; ++ irs->ipsp = NULL; ++ irs->ilen = 0; ++ irs->authlen=0; ++ irs->authfuncs=NULL; ++ irs->skb = skb; ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ switch(encap_type) { ++ case UDP_ENCAP_ESPINUDP: ++ irs->natt_type = ESPINUDP_WITH_NON_ESP; ++ break; ++ ++ case UDP_ENCAP_ESPINUDP_NON_IKE: ++ irs->natt_type = ESPINUDP_WITH_NON_IKE; ++ break; ++ ++ default: ++ if(printk_ratelimit()) { ++ printk(KERN_INFO "KLIPS received unknown UDP-ESP encap type %u\n", ++ encap_type); ++ } ++ return -1; ++ } ++ ++#endif ++ ipsec_rcv_decap(irs); ++ KLIPS_DEC_USE; ++ return 0; ++ ++rcvleave: ++ if(skb) { ++ ipsec_kfree_skb(skb); ++ } ++ KLIPS_DEC_USE; ++ return 0; ++} ++#endif ++ ++ ++/* ++ * $Log: ipsec_rcv.c,v $ ++ * Revision 1.171.2.15 2007-10-30 21:37:45 paul ++ * Use skb_tail_pointer() [dhr] ++ * ++ * Revision 1.171.2.14 2007-10-22 14:54:38 paul ++ * Fix identation ++ * ++ * Revision 1.171.2.13 2007/10/15 22:16:34 paul ++ * Adding missing ; in DISABLE_UDP_CHECKSUM code ++ * ++ * Revision 1.171.2.12 2007/09/05 02:56:09 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.171.2.11 2007/04/28 20:46:40 paul ++ * Added compile time switch for -DDISABLE_UDP_CHECKSUM that seems to be ++ * breaking IPsec+NAT+Transport mode with NAT-OA. Enabled this per default ++ * via Makefile.inc's USERCOMPILE flags. ++ * ++ * Revision 1.171.2.10 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.171.2.9 2006/07/30 02:09:33 paul ++ * Author: Bart Trojanowski ++ * This fixes a NATT+ESP bug in rcv path. ++ * ++ * We only want to test NATT policy on the ESP packet. Doing so on the ++ * bundled SA breaks because the next layer does not know anything about ++ * NATT. ++ * ++ * Fix just puts an if(proto == IPPROTO_ESP) around the NATT policy check. ++ * ++ * Revision 1.171.2.8 2006/07/29 05:03:04 paul ++ * Added check for new version of skb_linearize that only takes 1 argument, ++ * for 2.6.18+ kernels. ++ * ++ * Revision 1.171.2.7 2006/04/20 16:33:07 mcr ++ * remove all of CONFIG_KLIPS_ALG --- one can no longer build without it. ++ * Fix in-kernel module compilation. Sub-makefiles do not work. ++ * ++ * Revision 1.171.2.6 2005/12/07 06:07:04 paul ++ * comment out KLIPS_DEC_USE in ipsec_rcv_decap. Likely an artifact from ++ * refactoring. http://bugs.xelerance.com/view.php?id=454 ++ * ++ * Revision 1.171.2.5 2005/10/21 02:22:29 mcr ++ * pull up of another try at 2.4.x kernel fix ++ * ++ * Revision 1.171.2.4 2005/10/21 01:39:56 mcr ++ * nat-t fix is 2.4/2.6 specific ++ * ++ * Revision 1.178 2005/10/21 02:19:34 mcr ++ * on 2.4 systems, we have to fix up the length as well. ++ * ++ * Revision 1.177 2005/10/21 00:18:31 mcr ++ * nat-t fix is 2.4 specific. ++ * ++ * Revision 1.176 2005/10/20 21:06:11 mcr ++ * possible fix for nat-t problem on 2.4 kernels. ++ * ++ * Revision 1.175 2005/10/13 02:49:24 mcr ++ * tested UDP-encapsulated ESP packets that were not actually ESP, ++ * (but IKE) were being eaten. ++ * ++ * Revision 1.174 2005/10/13 01:25:22 mcr ++ * UDP-encapsulated ESP packets that were not actually ESP, ++ * (but IKE) were being eaten. ++ * ++ * Revision 1.173 2005/08/31 23:26:11 mcr ++ * fixes for 2.6.13 ++ * ++ * Revision 1.172 2005/08/05 08:44:54 mcr ++ * ipsec_kern24.h (compat code for 2.4) must be include ++ * explicitely now. ++ * ++ * Revision 1.171 2005/07/08 23:56:06 ken ++ * #ifdef ++ * ++ * Revision 1.170 2005/07/08 23:50:05 ken ++ * Don't attempt to decapsulate if NAT-T isn't available in the code ++ * ++ * Revision 1.169 2005/06/06 00:27:31 mcr ++ * fix for making tcpdump (packet capture) work correctly for ++ * nat-t received packets. ++ * ++ * Revision 1.168 2005/06/04 16:06:06 mcr ++ * better patch for nat-t rcv-device code. ++ * ++ * Revision 1.167 2005/06/03 17:04:46 mcr ++ * nat-t packets are forced to arrive from ipsec0. ++ * ++ * Revision 1.166 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.165 2005/04/20 17:11:32 mcr ++ * fixed to compile on 2.4. ++ * ++ * Revision 1.164 2005/04/18 03:09:50 ken ++ * Fix typo ++ * ++ * Revision 1.163 2005/04/17 05:32:58 mcr ++ * remove extraneous debugging ++ * make sure to return success from klips26_encap_rcv(). ++ * ++ * Revision 1.162 2005/04/17 04:37:01 mcr ++ * make sure that irs->ipp is still set. ++ * ++ * Revision 1.161 2005/04/17 03:51:52 mcr ++ * removed old comment about removed code. ++ * added translation from udp.c/2.6 to KLIPS NAT-ESP naming. ++ * comment about check for origin address/port for incoming NAT-ESP packets. ++ * ++ * Revision 1.160 2005/04/15 19:55:58 mcr ++ * adjustments to use proper skb fields for data. ++ * ++ * Revision 1.159 2005/04/10 22:58:20 mcr ++ * refactoring of receive functions to make it easier to ++ * call the ESP decap. ++ * ++ * Revision 1.158 2005/04/08 18:27:53 mcr ++ * refactored ipsec_rcv() into ipsec_rcv() and ipsec_rcv_decap(). ++ * ++ * Revision 1.157 2004/12/28 23:13:09 mcr ++ * use consistent CONFIG_IPSEC_NAT_TRAVERSAL. ++ * ++ * Revision 1.156 2004/12/03 21:34:51 mcr ++ * mistype of KLIPS_USE_COUNT -> KLIPS_INC_USE; ++ * ++ * Revision 1.155 2004/12/03 21:25:57 mcr ++ * compile time fixes for running on 2.6. ++ * still experimental. ++ * ++ * Revision 1.154 2004/09/08 17:21:36 ken ++ * Rename MD5* -> osMD5 functions to prevent clashes with other symbols exported by kernel modules (CIFS in 2.6 initiated this) ++ * ++ * Revision 1.153 2004/08/22 20:10:00 mcr ++ * removed check for incorrect setting of NET_26. ++ * ++ * Revision 1.152 2004/08/21 15:22:39 mcr ++ * added #defines for ATT heartbeat. ++ * ++ * Revision 1.151 2004/08/21 02:16:32 ken ++ * Patch from Jochen Eisinger for AT&T MTS Heartbeat packet support ++ * ++ * Revision 1.150 2004/08/21 00:44:48 mcr ++ * CONFIG_KLIPS_NAT was wrong, also need to include udp.h. ++ * ++ * Revision 1.149 2004/08/20 21:45:45 mcr ++ * CONFIG_KLIPS_NAT_TRAVERSAL is not used in an attempt to ++ * be 26sec compatible. But, some defines where changed. ++ * ++ * Revision 1.148 2004/08/17 03:27:23 mcr ++ * klips 2.6 edits. ++ * ++ * Revision 1.147 2004/08/05 23:29:27 mcr ++ * fixed nesting of #ifdef vs {} in ipsec_rcv(). ++ * ++ * Revision 1.146 2004/08/04 15:57:07 mcr ++ * moved des .h files to include/des/ * ++ * included 2.6 protocol specific things ++ * started at NAT-T support, but it will require a kernel patch. ++ * ++ * Revision 1.145 2004/08/03 18:19:08 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.144 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.143 2004/05/10 22:27:00 mcr ++ * fix for ESP-3DES-noauth test case. ++ * ++ * Revision 1.142 2004/05/10 22:25:57 mcr ++ * reformat of calls to ipsec_lifetime_check(). ++ * ++ * Revision 1.141 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.140 2004/02/03 03:12:53 mcr ++ * removed erroneously, double patched code. ++ * ++ * Revision 1.139 2004/01/05 23:21:29 mcr ++ * initialize sin_family in ipsec_rcv.c ++ * ++ * Revision 1.138 2003/12/24 19:46:52 mcr ++ * if sock.h patch has not been applied, then define appropriate ++ * structure so we can use it. This is serious inferior, and ++ * depends upon the concept that the structure in question is ++ * smaller than the other members of that union. ++ * getting rid of differing methods is a better solution. ++ * ++ * Revision 1.137 2003/12/22 19:40:57 mcr ++ * NAT-T patches 0.6c. ++ * ++ * Revision 1.136 2003/12/15 18:13:12 mcr ++ * when compiling with NAT traversal, don't assume that the ++ * kernel has been patched, unless CONFIG_IPSEC_NAT_NON_ESP ++ * is set. ++ * ++ * Revision 1.135 2003/12/13 19:10:21 mcr ++ * refactored rcv and xmit code - same as FS 2.05. ++ * ++ * Revision 1.134.2.1 2003/12/22 15:25:52 jjo ++ * Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.134 2003/12/10 01:14:27 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.133 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.132.2.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.132 2003/09/02 19:51:48 mcr ++ * fixes for PR#252. ++ * ++ * Revision 1.131 2003/07/31 22:47:16 mcr ++ * preliminary (untested by FS-team) 2.5 patches. ++ * ++ * Revision 1.130 2003/04/03 17:38:25 rgb ++ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}. ++ * Clarified logic for non-connected devices. ++ * ++ * Revision 1.129 2003/02/06 02:21:34 rgb ++ * ++ * Moved "struct auth_alg" from ipsec_rcv.c to ipsec_ah.h . ++ * Changed "struct ah" to "struct ahhdr" and "struct esp" to "struct esphdr". ++ * Removed "#ifdef INBOUND_POLICY_CHECK_eroute" dead code. ++ * ++ * Revision 1.128 2002/12/13 20:58:03 rgb ++ * Relegated MCR's recent "_dmp" routine to debug_verbose. ++ * Cleaned up printing of source and destination addresses in debug output. ++ * ++ * Revision 1.127 2002/12/04 16:00:16 rgb ++ * ++ * Fixed AH decapsulation pointer update bug and added some comments and ++ * debugging. ++ * This bug was caught by west-ah-0[12]. ++ * ++ * Revision 1.126 2002/11/04 05:03:43 mcr ++ * fixes for IPCOMP. There were two problems: ++ * 1) the irs->ipp pointer was not being updated properly after ++ * the ESP descryption. The meant nothing for IPIP, as the ++ * later IP header overwrote the earlier one. ++ * 2) the more serious problem was that skb_decompress will ++ * usually allocate a new SKB, so we have to make sure that ++ * it doesn't get lost. ++ * #2 meant removing the skb argument from the ->decrypt routine ++ * and moving it to the irs->skb, so it could be value/result. ++ * ++ * Revision 1.125 2002/11/01 01:53:35 dhr ++ * ++ * fix typo ++ * ++ * Revision 1.124 2002/10/31 22:49:01 dhr ++ * ++ * - eliminate unused variable "hash" ++ * - reduce scope of variable "authenticator" ++ * - add comment on a couple of tricky bits ++ * ++ * Revision 1.123 2002/10/31 22:39:56 dhr ++ * ++ * use correct type for result of function calls ++ * ++ * Revision 1.122 2002/10/31 22:36:25 dhr ++ * ++ * simplify complex test ++ * ++ * Revision 1.121 2002/10/31 22:34:04 dhr ++ * ++ * ipsprev is never used: ditch it ++ * ++ * Revision 1.120 2002/10/31 22:30:21 dhr ++ * ++ * eliminate redundant assignments ++ * ++ * Revision 1.119 2002/10/31 22:27:43 dhr ++ * ++ * make whitespace canonical ++ * ++ * Revision 1.118 2002/10/30 05:47:17 rgb ++ * Fixed cut-and-paste error mis-identifying comp runt as ah. ++ * ++ * Revision 1.117 2002/10/17 16:37:45 rgb ++ * Remove compp intermediate variable and in-line its contents ++ * where used ++ * ++ * Revision 1.116 2002/10/12 23:11:53 dhr ++ * ++ * [KenB + DHR] more 64-bit cleanup ++ * ++ * Revision 1.115 2002/10/07 19:06:58 rgb ++ * Minor fixups and activation to west-rcv-nfmark-set-01 test to check for SA reference properly set on incoming. ++ * ++ * Revision 1.114 2002/10/07 18:31:31 rgb ++ * Set saref on incoming packets. ++ * ++ * Revision 1.113 2002/09/16 21:28:12 mcr ++ * adjust hash length for HMAC calculation - must look at whether ++ * it is MD5 or SHA1. ++ * ++ * Revision 1.112 2002/09/16 21:19:15 mcr ++ * fixes for west-ah-icmp-01 - length of AH header must be ++ * calculated properly, and next_header field properly copied. ++ * ++ * Revision 1.111 2002/09/10 02:45:56 mcr ++ * re-factored the ipsec_rcv function into several functions, ++ * ipsec_rcv_decap_once, and a set of functions for AH, ESP and IPCOMP. ++ * In addition, the MD5 and SHA1 functions are replaced with pointers. ++ * ++ * Revision 1.110 2002/08/30 06:34:33 rgb ++ * Fix scope of shift in AH header length check. ++ * ++ * Revision 1.109 2002/08/27 16:49:20 rgb ++ * Fixed ESP short packet DOS (and AH and IPCOMP). ++ * ++ * Revision 1.108 2002/07/24 18:44:54 rgb ++ * Type fiddling to tame ia64 compiler. ++ * ++ * Revision 1.107 2002/05/27 18:58:18 rgb ++ * Convert to dynamic ipsec device allocation. ++ * Remove final vistiges of tdb references via IPSEC_KLIPS1_COMPAT. ++ * ++ * Revision 1.106 2002/05/23 07:15:21 rgb ++ * Pointer clean-up. ++ * Added refcount code. ++ * ++ * Revision 1.105 2002/05/14 02:35:06 rgb ++ * Change all references to tdb, TDB or Tunnel Descriptor Block to ips, ++ * ipsec_sa or ipsec_sa. ++ * Change references to _TDB to _IPSA. ++ * ++ * Revision 1.104 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.103 2002/04/24 07:36:30 mcr ++ * Moved from ./klips/net/ipsec/ipsec_rcv.c,v ++ * ++ * Revision 1.102 2002/01/29 17:17:56 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.101 2002/01/29 04:00:52 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.100 2002/01/29 02:13:17 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.99 2002/01/28 21:40:59 mcr ++ * should use #if to test boolean option rather than #ifdef. ++ * ++ * Revision 1.98 2002/01/20 20:19:36 mcr ++ * renamed option to IP_FRAGMENT_LINEARIZE. ++ * ++ * Revision 1.97 2002/01/12 02:55:36 mcr ++ * fix for post-2.4.4 to linearize skb's when ESP packet ++ * was assembled from fragments. ++ * ++ * Revision 1.96 2001/11/26 09:23:49 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.93.2.2 2001/10/22 20:54:07 mcr ++ * include des.h, removed phony prototypes and fixed calling ++ * conventions to match real prototypes. ++ * ++ * Revision 1.93.2.1 2001/09/25 02:22:22 mcr ++ * struct tdb -> struct ipsec_sa. ++ * lifetime checks moved to ipsec_life.c ++ * some sa(tdb) manipulation functions renamed. ++ * ++ * Revision 1.95 2001/11/06 19:49:07 rgb ++ * Added variable descriptions. ++ * Removed unauthenticated sequence==0 check to prevent DoS. ++ * ++ * Revision 1.94 2001/10/18 04:45:20 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/freeswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.93 2001/09/07 22:17:24 rgb ++ * Fix for removal of transport layer protocol handler arg in 2.4.4. ++ * Fix to accomodate peer non-conformance to IPCOMP rfc2393. ++ * ++ * Revision 1.92 2001/08/27 19:44:41 rgb ++ * Fix error in comment. ++ * ++ * Revision 1.91 2001/07/20 19:31:48 dhr ++ * [DHR] fix source and destination subnets of policy in diagnostic ++ * ++ * Revision 1.90 2001/07/06 19:51:09 rgb ++ * Added inbound policy checking code for IPIP SAs. ++ * Renamed unused function argument for ease and intuitive naming. ++ * ++ * Revision 1.89 2001/06/22 19:35:23 rgb ++ * Disable ipcomp processing if we are handed a ipcomp packet with no esp ++ * or ah header. ++ * Print protocol if we are handed a non-ipsec packet. ++ * ++ * Revision 1.88 2001/06/20 06:30:47 rgb ++ * Fixed transport mode IPCOMP policy check bug. ++ * ++ * Revision 1.87 2001/06/13 20:58:40 rgb ++ * Added parentheses around assignment used as truth value to silence ++ * compiler. ++ * ++ * Revision 1.86 2001/06/07 22:25:23 rgb ++ * Added a source address policy check for tunnel mode. It still does ++ * not check client addresses and masks. ++ * Only decapsulate IPIP if it is expected. ++ * ++ * Revision 1.85 2001/05/30 08:14:02 rgb ++ * Removed vestiges of esp-null transforms. ++ * ++ * Revision 1.84 2001/05/27 06:12:11 rgb ++ * Added structures for pid, packet count and last access time to eroute. ++ * Added packet count to beginning of /proc/net/ipsec_eroute. ++ * ++ * Revision 1.83 2001/05/04 16:45:47 rgb ++ * Remove unneeded code. ipp is not used after this point. ++ * ++ * Revision 1.82 2001/05/04 16:36:00 rgb ++ * Fix skb_cow() call for 2.4.4. (SS) ++ * ++ * Revision 1.81 2001/05/02 14:46:53 rgb ++ * Fix typo for compiler directive to pull IPH back. ++ * ++ * Revision 1.80 2001/04/30 19:46:34 rgb ++ * Update for 2.4.4. We now receive the skb with skb->data pointing to ++ * h.raw. ++ * ++ * Revision 1.79 2001/04/23 15:01:15 rgb ++ * Added spin_lock() check to prevent double-locking for multiple ++ * transforms and hence kernel lock-ups with SMP kernels. ++ * Minor spin_unlock() adjustments to unlock before non-dependant prints ++ * and IPSEC device stats updates. ++ * ++ * Revision 1.78 2001/04/21 23:04:24 rgb ++ * Check if soft expire has already been sent before sending another to ++ * prevent ACQUIRE flooding. ++ * ++ * Revision 1.77 2001/03/16 07:35:20 rgb ++ * Ditch extra #if 1 around now permanent policy checking code. ++ * ++ * Revision 1.76 2001/02/27 22:24:54 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.75 2001/02/19 22:28:30 rgb ++ * Minor change to virtual device discovery code to assert which I/F has ++ * been found. ++ * ++ * Revision 1.74 2000/11/25 03:50:36 rgb ++ * Oops fix by minor re-arrangement of code to avoid accessing a freed tdb. ++ * ++ * Revision 1.73 2000/11/09 20:52:15 rgb ++ * More spinlock shuffling, locking earlier and unlocking later in rcv to ++ * include ipcomp and prevent races, renaming some tdb variables that got ++ * forgotten, moving some unlocks to include tdbs and adding a missing ++ * unlock. Thanks to Svenning for some of these. ++ * ++ * Revision 1.72 2000/11/09 20:11:22 rgb ++ * Minor shuffles to fix non-standard kernel config option selection. ++ * ++ * Revision 1.71 2000/11/06 04:36:18 rgb ++ * Ditched spin_lock_irqsave in favour of spin_lock. ++ * Minor initial protocol check rewrite. ++ * Clean up debug printing. ++ * Clean up tdb handling on ipcomp. ++ * Fixed transport mode null pointer de-reference without ipcomp. ++ * Add Svenning's adaptive content compression. ++ * Disabled registration of ipcomp handler. ++ * ++ * Revision 1.70 2000/10/30 23:41:43 henry ++ * Hans-Joerg Hoexer's null-pointer fix ++ * ++ * Revision 1.69 2000/10/10 18:54:16 rgb ++ * Added a fix for incoming policy check with ipcomp enabled but ++ * uncompressible. ++ * ++ * Revision 1.68 2000/09/22 17:53:12 rgb ++ * Fixed ipcomp tdb pointers update for policy checking. ++ * ++ * Revision 1.67 2000/09/21 03:40:58 rgb ++ * Added more debugging to try and track down the cpi outward copy problem. ++ * ++ * Revision 1.66 2000/09/20 04:00:10 rgb ++ * Changed static functions to DEBUG_NO_STATIC to reveal function names for ++ * debugging oopsen. ++ * ++ * Revision 1.65 2000/09/19 07:07:16 rgb ++ * Added debugging to inbound policy check for ipcomp. ++ * Added missing spin_unlocks (thanks Svenning!). ++ * Fixed misplaced tdbnext pointers causing mismatched ipip policy check. ++ * Protect ipcomp policy check following ipip decap with sysctl switch. ++ * ++ * Revision 1.64 2000/09/18 21:27:29 rgb ++ * 2.0 fixes. ++ * ++ * Revision 1.63 2000/09/18 02:35:50 rgb ++ * Added policy checking to ipcomp and re-enabled policy checking by ++ * default. ++ * Optimised satoa calls. ++ * ++ * Revision 1.62 2000/09/17 21:02:32 rgb ++ * Clean up debugging, removing slow timestamp debug code. ++ * ++ * Revision 1.61 2000/09/16 01:07:55 rgb ++ * Fixed erroneous ref from struct ipcomp to struct ipcomphdr. ++ * ++ * Revision 1.60 2000/09/15 11:37:01 rgb ++ * Merge in heavily modified Svenning Soerensen's ++ * IPCOMP zlib deflate code. ++ * ++ * Revision 1.59 2000/09/15 04:56:20 rgb ++ * Remove redundant satoa() call, reformat comment. ++ * ++ * Revision 1.58 2000/09/13 08:00:52 rgb ++ * Flick on inbound policy checking. ++ * ++ * Revision 1.57 2000/09/12 03:22:19 rgb ++ * Converted inbound_policy_check to sysctl. ++ * Re-enabled policy backcheck. ++ * Moved policy checks to top and within tdb lock. ++ * ++ * Revision 1.56 2000/09/08 19:12:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.55 2000/08/28 18:15:46 rgb ++ * Added MB's nf-debug reset patch. ++ * ++ * Revision 1.54 2000/08/27 01:41:26 rgb ++ * More minor tweaks to the bad padding debug code. ++ * ++ * Revision 1.53 2000/08/24 16:54:16 rgb ++ * Added KLIPS_PRINTMORE macro to continue lines without KERN_INFO level ++ * info. ++ * Tidied up device reporting at the start of ipsec_rcv. ++ * Tidied up bad padding debugging and processing. ++ * ++ * Revision 1.52 2000/08/20 21:36:03 rgb ++ * Activated pfkey_expire() calls. ++ * Added a hard/soft expiry parameter to pfkey_expire(). ++ * Added sanity checking to avoid propagating zero or smaller-length skbs ++ * from a bogus decryption. ++ * Re-arranged the order of soft and hard expiry to conform to RFC2367. ++ * Clean up references to CONFIG_IPSEC_PFKEYv2. ++ * ++ * Revision 1.51 2000/08/18 21:23:30 rgb ++ * Improve bad padding warning so that the printk buffer doesn't get ++ * trampled. ++ * ++ * Revision 1.50 2000/08/01 14:51:51 rgb ++ * Removed _all_ remaining traces of DES. ++ * ++ * Revision 1.49 2000/07/28 13:50:53 rgb ++ * Changed enet_statistics to net_device_stats and added back compatibility ++ * for pre-2.1.19. ++ * ++ * Revision 1.48 2000/05/10 19:14:40 rgb ++ * Only check usetime against soft and hard limits if the tdb has been ++ * used. ++ * Cast output of ntohl so that the broken prototype doesn't make our ++ * compile noisy. ++ * ++ * Revision 1.47 2000/05/09 17:45:43 rgb ++ * Fix replay bitmap corruption bug upon receipt of bogus packet ++ * with correct SPI. This was a DoS. ++ * ++ * Revision 1.46 2000/03/27 02:31:58 rgb ++ * Fixed authentication failure printout bug. ++ * ++ * Revision 1.45 2000/03/22 16:15:37 rgb ++ * Fixed renaming of dev_get (MB). ++ * ++ * Revision 1.44 2000/03/16 08:17:24 rgb ++ * Hardcode PF_KEYv2 support. ++ * Fixed minor bug checking AH header length. ++ * ++ * Revision 1.43 2000/03/14 12:26:59 rgb ++ * Added skb->nfct support for clearing netfilter conntrack bits (MB). ++ * ++ * Revision 1.42 2000/01/26 10:04:04 rgb ++ * Fixed inbound policy checking on transport mode bug. ++ * Fixed noisy 2.0 printk arguments. ++ * ++ * Revision 1.41 2000/01/24 20:58:02 rgb ++ * Improve debugging/reporting support for (disabled) inbound ++ * policy checking. ++ * ++ * Revision 1.40 2000/01/22 23:20:10 rgb ++ * Fixed up inboud policy checking code. ++ * Cleaned out unused crud. ++ * ++ * Revision 1.39 2000/01/21 06:15:29 rgb ++ * Added sanity checks on skb_push(), skb_pull() to prevent panics. ++ * Fixed cut-and-paste debug_tunnel to debug_rcv. ++ * Added inbound policy checking code, disabled. ++ * Simplified output code by updating ipp to post-IPIP decapsulation. ++ * ++ * elided pre-2000 comments. Use "cvs log" ++ * ++ * ++ * Local Variables: ++ * c-set-style: linux ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_sa.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1880 @@ ++/* ++ * Common routines for IPsec SA maintenance routines. ++ * ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001, 2002 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_sa.c,v 1.30.2.3 2007-09-05 02:56:10 paul Exp $ ++ * ++ * This is the file formerly known as "ipsec_xform.h" ++ * ++ */ ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* vmalloc() */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++#ifdef SPINLOCK ++#ifdef SPINLOCK_23 ++#include /* *lock* */ ++#else /* SPINLOCK_23 */ ++#include /* *lock* */ ++#endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++ ++#include "openswan/radij.h" ++ ++#include "openswan/ipsec_stats.h" ++#include "openswan/ipsec_life.h" ++#include "openswan/ipsec_sa.h" ++#include "openswan/ipsec_xform.h" ++ ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_ipe4.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipsec_alg.h" ++ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_xform = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0) ++ ++struct ipsec_sa *ipsec_sadb_hash[SADB_HASHMOD]; ++#ifdef SPINLOCK ++spinlock_t tdb_lock = SPIN_LOCK_UNLOCKED; ++#else /* SPINLOCK */ ++spinlock_t tdb_lock; ++#endif /* SPINLOCK */ ++ ++struct ipsec_sadb ipsec_sadb; ++ ++#if IPSEC_SA_REF_CODE ++ ++/* the sub table must be narrower (or equal) in bits than the variable type ++ in the main table to count the number of unused entries in it. */ ++typedef struct { ++ int testSizeOf_refSubTable : ++ ((sizeof(IPsecRefTableUnusedCount) * 8) < IPSEC_SA_REF_SUBTABLE_IDX_WIDTH ? -1 : 1); ++} dummy; ++ ++ ++/* The field where the saref will be hosted in the skb must be wide enough to ++ accomodate the information it needs to store. */ ++typedef struct { ++ int testSizeOf_refField : ++ (IPSEC_SA_REF_HOST_FIELD_WIDTH < IPSEC_SA_REF_TABLE_IDX_WIDTH ? -1 : 1 ); ++} dummy2; ++ ++ ++#define IPS_HASH(said) (((said)->spi + (said)->dst.u.v4.sin_addr.s_addr + (said)->proto) % SADB_HASHMOD) ++ ++ ++void ++ipsec_SAtest(void) ++{ ++ IPsecSAref_t SAref = 258; ++ struct ipsec_sa ips; ++ ips.ips_ref = 772; ++ ++ printk("klips_debug:ipsec_SAtest: " ++ "IPSEC_SA_REF_SUBTABLE_IDX_WIDTH=%u\n" ++ "IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES=%u\n" ++ "IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES=%u\n" ++ "IPSEC_SA_REF_HOST_FIELD_WIDTH=%lu\n" ++ "IPSEC_SA_REF_TABLE_MASK=%x\n" ++ "IPSEC_SA_REF_ENTRY_MASK=%x\n" ++ "IPsecSAref2table(%d)=%u\n" ++ "IPsecSAref2entry(%d)=%u\n" ++ "IPsecSAref2NFmark(%d)=%u\n" ++ "IPsecSAref2SA(%d)=%p\n" ++ "IPsecSA2SAref(%p)=%d\n" ++ , ++ IPSEC_SA_REF_SUBTABLE_IDX_WIDTH, ++ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES, ++ IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES, ++ (unsigned long) IPSEC_SA_REF_HOST_FIELD_WIDTH, ++ IPSEC_SA_REF_TABLE_MASK, ++ IPSEC_SA_REF_ENTRY_MASK, ++ SAref, IPsecSAref2table(SAref), ++ SAref, IPsecSAref2entry(SAref), ++ SAref, IPsecSAref2NFmark(SAref), ++ SAref, IPsecSAref2SA(SAref), ++ (&ips), IPsecSA2SAref((&ips)) ++ ); ++ return; ++} ++ ++int ++ipsec_SAref_recycle(void) ++{ ++ int table; ++ int entry; ++ int error = 0; ++ ++ ipsec_sadb.refFreeListHead = -1; ++ ipsec_sadb.refFreeListTail = -1; ++ ++ if(ipsec_sadb.refFreeListCont == IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES * IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_recycle: " ++ "end of table reached, continuing at start..\n"); ++ ipsec_sadb.refFreeListCont = 0; ++ } ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_recycle: " ++ "recycling, continuing from SAref=%d (0p%p), table=%d, entry=%d.\n", ++ ipsec_sadb.refFreeListCont, ++ (ipsec_sadb.refTable[IPsecSAref2table(ipsec_sadb.refFreeListCont)] != NULL) ? IPsecSAref2SA(ipsec_sadb.refFreeListCont) : NULL, ++ IPsecSAref2table(ipsec_sadb.refFreeListCont), ++ IPsecSAref2entry(ipsec_sadb.refFreeListCont)); ++ ++ for(table = IPsecSAref2table(ipsec_sadb.refFreeListCont); ++ table < IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES; ++ table++) { ++ if(ipsec_sadb.refTable[table] == NULL) { ++ error = ipsec_SArefSubTable_alloc(table); ++ if(error) { ++ return error; ++ } ++ } ++ for(entry = IPsecSAref2entry(ipsec_sadb.refFreeListCont); ++ entry < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES; ++ entry++) { ++ if(ipsec_sadb.refTable[table]->entry[entry] == NULL) { ++ ipsec_sadb.refFreeList[++ipsec_sadb.refFreeListTail] = IPsecSArefBuild(table, entry); ++ if(ipsec_sadb.refFreeListTail == (IPSEC_SA_REF_FREELIST_NUM_ENTRIES - 1)) { ++ ipsec_sadb.refFreeListHead = 0; ++ ipsec_sadb.refFreeListCont = ipsec_sadb.refFreeList[ipsec_sadb.refFreeListTail] + 1; ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_recycle: " ++ "SArefFreeList refilled.\n"); ++ return 0; ++ } ++ } ++ } ++ } ++ ++ if(ipsec_sadb.refFreeListTail == -1) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_recycle: " ++ "out of room in the SArefTable.\n"); ++ ++ return(-ENOSPC); ++ } ++ ++ ipsec_sadb.refFreeListHead = 0; ++ ipsec_sadb.refFreeListCont = ipsec_sadb.refFreeList[ipsec_sadb.refFreeListTail] + 1; ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_recycle: " ++ "SArefFreeList partly refilled to %d of %d.\n", ++ ipsec_sadb.refFreeListTail, ++ IPSEC_SA_REF_FREELIST_NUM_ENTRIES); ++ return 0; ++} ++ ++int ++ipsec_SArefSubTable_alloc(unsigned table) ++{ ++ unsigned entry; ++ struct IPsecSArefSubTable* SArefsub; ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SArefSubTable_alloc: " ++ "allocating %lu bytes for table %u of %u.\n", ++ (unsigned long) (IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES * sizeof(struct ipsec_sa *)), ++ table, ++ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES); ++ ++ /* allocate another sub-table */ ++ SArefsub = vmalloc(IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES * sizeof(struct ipsec_sa *)); ++ if(SArefsub == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SArefSubTable_alloc: " ++ "error allocating memory for table %u of %u!\n", ++ table, ++ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES); ++ return -ENOMEM; ++ } ++ ++ /* add this sub-table to the main table */ ++ ipsec_sadb.refTable[table] = SArefsub; ++ ++ /* initialise each element to NULL */ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SArefSubTable_alloc: " ++ "initialising %u elements (2 ^ %u) of table %u.\n", ++ IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES, ++ IPSEC_SA_REF_SUBTABLE_IDX_WIDTH, ++ table); ++ for(entry = 0; entry < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES; entry++) { ++ SArefsub->entry[entry] = NULL; ++ } ++ ++ return 0; ++} ++#endif /* IPSEC_SA_REF_CODE */ ++ ++int ++ipsec_saref_freelist_init(void) ++{ ++ int i; ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_saref_freelist_init: " ++ "initialising %u elements of FreeList.\n", ++ IPSEC_SA_REF_FREELIST_NUM_ENTRIES); ++ ++ for(i = 0; i < IPSEC_SA_REF_FREELIST_NUM_ENTRIES; i++) { ++ ipsec_sadb.refFreeList[i] = IPSEC_SAREF_NULL; ++ } ++ ipsec_sadb.refFreeListHead = -1; ++ ipsec_sadb.refFreeListCont = 0; ++ ipsec_sadb.refFreeListTail = -1; ++ ++ return 0; ++} ++ ++int ++ipsec_sadb_init(void) ++{ ++ int error = 0; ++ unsigned i; ++ ++ for(i = 0; i < SADB_HASHMOD; i++) { ++ ipsec_sadb_hash[i] = NULL; ++ } ++ /* parts above are for the old style SADB hash table */ ++ ++ ++#if IPSEC_SA_REF_CODE ++ /* initialise SA reference table */ ++ ++ /* initialise the main table */ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_init: " ++ "initialising main table of size %u (2 ^ %u).\n", ++ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES, ++ IPSEC_SA_REF_MAINTABLE_IDX_WIDTH); ++ { ++ unsigned table; ++ for(table = 0; table < IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES; table++) { ++ ipsec_sadb.refTable[table] = NULL; ++ } ++ } ++ ++ /* allocate the first sub-table */ ++ error = ipsec_SArefSubTable_alloc(0); ++ if(error) { ++ return error; ++ } ++ ++ error = ipsec_saref_freelist_init(); ++#endif /* IPSEC_SA_REF_CODE */ ++ return error; ++} ++ ++#if IPSEC_SA_REF_CODE ++IPsecSAref_t ++ipsec_SAref_alloc(int*error) /* pass in error var by pointer */ ++{ ++ IPsecSAref_t SAref; ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_alloc: " ++ "SAref requested... head=%d, cont=%d, tail=%d, listsize=%d.\n", ++ ipsec_sadb.refFreeListHead, ++ ipsec_sadb.refFreeListCont, ++ ipsec_sadb.refFreeListTail, ++ IPSEC_SA_REF_FREELIST_NUM_ENTRIES); ++ ++ if(ipsec_sadb.refFreeListHead == -1) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_alloc: " ++ "FreeList empty, recycling...\n"); ++ *error = ipsec_SAref_recycle(); ++ if(*error) { ++ return IPSEC_SAREF_NULL; ++ } ++ } ++ ++ SAref = ipsec_sadb.refFreeList[ipsec_sadb.refFreeListHead]; ++ if(SAref == IPSEC_SAREF_NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_alloc: " ++ "unexpected error, refFreeListHead = %d points to invalid entry.\n", ++ ipsec_sadb.refFreeListHead); ++ *error = -ESPIPE; ++ return IPSEC_SAREF_NULL; ++ } ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_alloc: " ++ "allocating SAref=%d, table=%u, entry=%u of %u.\n", ++ SAref, ++ IPsecSAref2table(SAref), ++ IPsecSAref2entry(SAref), ++ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES * IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES); ++ ++ ipsec_sadb.refFreeList[ipsec_sadb.refFreeListHead] = IPSEC_SAREF_NULL; ++ ipsec_sadb.refFreeListHead++; ++ if(ipsec_sadb.refFreeListHead > ipsec_sadb.refFreeListTail) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_SAref_alloc: " ++ "last FreeList entry allocated, resetting list head to empty.\n"); ++ ipsec_sadb.refFreeListHead = -1; ++ } ++ ++ return SAref; ++} ++#endif /* IPSEC_SA_REF_CODE */ ++ ++int ++ipsec_sa_print(struct ipsec_sa *ips) ++{ ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++ printk(KERN_INFO "klips_debug: SA:"); ++ if(ips == NULL) { ++ printk("NULL\n"); ++ return -ENOENT; ++ } ++ printk(" ref=%d", ips->ips_ref); ++ printk(" refcount=%d", atomic_read(&ips->ips_refcount)); ++ if(ips->ips_hnext != NULL) { ++ printk(" hnext=0p%p", ips->ips_hnext); ++ } ++ if(ips->ips_inext != NULL) { ++ printk(" inext=0p%p", ips->ips_inext); ++ } ++ if(ips->ips_onext != NULL) { ++ printk(" onext=0p%p", ips->ips_onext); ++ } ++ sa_len = satot(&ips->ips_said, 0, sa, sizeof(sa)); ++ printk(" said=%s", sa_len ? sa : " (error)"); ++ if(ips->ips_seq) { ++ printk(" seq=%u", ips->ips_seq); ++ } ++ if(ips->ips_pid) { ++ printk(" pid=%u", ips->ips_pid); ++ } ++ if(ips->ips_authalg) { ++ printk(" authalg=%u", ips->ips_authalg); ++ } ++ if(ips->ips_encalg) { ++ printk(" encalg=%u", ips->ips_encalg); ++ } ++ printk(" XFORM=%s%s%s", IPS_XFORM_NAME(ips)); ++ if(ips->ips_replaywin) { ++ printk(" ooowin=%u", ips->ips_replaywin); ++ } ++ if(ips->ips_flags) { ++ printk(" flags=%u", ips->ips_flags); ++ } ++ if(ips->ips_addr_s) { ++ char buf[SUBNETTOA_BUF]; ++ addrtoa(((struct sockaddr_in*)(ips->ips_addr_s))->sin_addr, ++ 0, buf, sizeof(buf)); ++ printk(" src=%s", buf); ++ } ++ if(ips->ips_addr_d) { ++ char buf[SUBNETTOA_BUF]; ++ addrtoa(((struct sockaddr_in*)(ips->ips_addr_s))->sin_addr, ++ 0, buf, sizeof(buf)); ++ printk(" dst=%s", buf); ++ } ++ if(ips->ips_addr_p) { ++ char buf[SUBNETTOA_BUF]; ++ addrtoa(((struct sockaddr_in*)(ips->ips_addr_p))->sin_addr, ++ 0, buf, sizeof(buf)); ++ printk(" proxy=%s", buf); ++ } ++ if(ips->ips_key_bits_a) { ++ printk(" key_bits_a=%u", ips->ips_key_bits_a); ++ } ++ if(ips->ips_key_bits_e) { ++ printk(" key_bits_e=%u", ips->ips_key_bits_e); ++ } ++ ++ printk("\n"); ++ return 0; ++} ++ ++struct ipsec_sa* ++ipsec_sa_alloc(int*error) /* pass in error var by pointer */ ++{ ++ struct ipsec_sa* ips; ++ ++ if((ips = kmalloc(sizeof(*ips), GFP_ATOMIC) ) == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_alloc: " ++ "memory allocation error\n"); ++ *error = -ENOMEM; ++ return NULL; ++ } ++ memset((caddr_t)ips, 0, sizeof(*ips)); ++#if IPSEC_SA_REF_CODE ++ ips->ips_ref = ipsec_SAref_alloc(error); /* pass in error return by pointer */ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_alloc: " ++ "allocated %lu bytes for ipsec_sa struct=0p%p ref=%d.\n", ++ (unsigned long) sizeof(*ips), ++ ips, ++ ips->ips_ref); ++ if(ips->ips_ref == IPSEC_SAREF_NULL) { ++ kfree(ips); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_alloc: " ++ "SAref allocation error\n"); ++ return NULL; ++ } ++ ++ atomic_inc(&ips->ips_refcount); ++ IPsecSAref2SA(ips->ips_ref) = ips; ++#endif /* IPSEC_SA_REF_CODE */ ++ ++ *error = 0; ++ return(ips); ++} ++ ++int ++ipsec_sa_free(struct ipsec_sa* ips) ++{ ++ return ipsec_sa_wipe(ips); ++} ++ ++struct ipsec_sa * ++ipsec_sa_getbyid(ip_said *said) ++{ ++ int hashval; ++ struct ipsec_sa *ips; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++ if(said == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_error:ipsec_sa_getbyid: " ++ "null pointer passed in!\n"); ++ return NULL; ++ } ++ ++ sa_len = KLIPS_SATOT(debug_xform, said, 0, sa, sizeof(sa)); ++ ++ hashval = IPS_HASH(said); ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_getbyid: " ++ "linked entry in ipsec_sa table for hash=%d of SA:%s requested.\n", ++ hashval, ++ sa_len ? sa : " (error)"); ++ ++ if((ips = ipsec_sadb_hash[hashval]) == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_getbyid: " ++ "no entries in ipsec_sa table for hash=%d of SA:%s.\n", ++ hashval, ++ sa_len ? sa : " (error)"); ++ return NULL; ++ } ++ ++ for (; ips; ips = ips->ips_hnext) { ++ if ((ips->ips_said.spi == said->spi) && ++ (ips->ips_said.dst.u.v4.sin_addr.s_addr == said->dst.u.v4.sin_addr.s_addr) && ++ (ips->ips_said.proto == said->proto)) { ++ atomic_inc(&ips->ips_refcount); ++ return ips; ++ } ++ } ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_getbyid: " ++ "no entry in linked list for hash=%d of SA:%s.\n", ++ hashval, ++ sa_len ? sa : " (error)"); ++ return NULL; ++} ++ ++int ++ipsec_sa_put(struct ipsec_sa *ips) ++{ ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++ if(ips == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_error:ipsec_sa_put: " ++ "null pointer passed in!\n"); ++ return -1; ++ } ++ ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_put: " ++ "ipsec_sa SA:%s, ref:%d reference count decremented.\n", ++ sa_len ? sa : " (error)", ++ ips->ips_ref); ++ ++ atomic_dec(&ips->ips_refcount); ++ ++ return 0; ++} ++ ++/* ++ The ipsec_sa table better *NOT* be locked before it is handed in, or SMP locks will happen ++*/ ++int ++ipsec_sa_add(struct ipsec_sa *ips) ++{ ++ int error = 0; ++ unsigned int hashval; ++ ++ if(ips == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_error:ipsec_sa_add: " ++ "null pointer passed in!\n"); ++ return -ENODATA; ++ } ++ hashval = IPS_HASH(&ips->ips_said); ++ ++ atomic_inc(&ips->ips_refcount); ++ spin_lock_bh(&tdb_lock); ++ ++ ips->ips_hnext = ipsec_sadb_hash[hashval]; ++ ipsec_sadb_hash[hashval] = ips; ++ ++ spin_unlock_bh(&tdb_lock); ++ ++ return error; ++} ++ ++/* ++ The ipsec_sa table better be locked before it is handed in, or races might happen ++*/ ++int ++ipsec_sa_del(struct ipsec_sa *ips) ++{ ++ unsigned int hashval; ++ struct ipsec_sa *ipstp; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++ if(ips == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_error:ipsec_sa_del: " ++ "null pointer passed in!\n"); ++ return -ENODATA; ++ } ++ ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ if(ips->ips_inext || ips->ips_onext) { ++ KLIPS_PRINT(debug_xform, ++ "klips_error:ipsec_sa_del: " ++ "SA:%s still linked!\n", ++ sa_len ? sa : " (error)"); ++ return -EMLINK; ++ } ++ ++ hashval = IPS_HASH(&ips->ips_said); ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_del: " ++ "deleting SA:%s, hashval=%d.\n", ++ sa_len ? sa : " (error)", ++ hashval); ++ if(ipsec_sadb_hash[hashval] == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_del: " ++ "no entries in ipsec_sa table for hash=%d of SA:%s.\n", ++ hashval, ++ sa_len ? sa : " (error)"); ++ return -ENOENT; ++ } ++ ++ if (ips == ipsec_sadb_hash[hashval]) { ++ ipsec_sadb_hash[hashval] = ipsec_sadb_hash[hashval]->ips_hnext; ++ ips->ips_hnext = NULL; ++ atomic_dec(&ips->ips_refcount); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_del: " ++ "successfully deleted first ipsec_sa in chain.\n"); ++ return 0; ++ } else { ++ for (ipstp = ipsec_sadb_hash[hashval]; ++ ipstp; ++ ipstp = ipstp->ips_hnext) { ++ if (ipstp->ips_hnext == ips) { ++ ipstp->ips_hnext = ips->ips_hnext; ++ ips->ips_hnext = NULL; ++ atomic_dec(&ips->ips_refcount); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_del: " ++ "successfully deleted link in ipsec_sa chain.\n"); ++ return 0; ++ } ++ } ++ } ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_del: " ++ "no entries in linked list for hash=%d of SA:%s.\n", ++ hashval, ++ sa_len ? sa : " (error)"); ++ return -ENOENT; ++} ++ ++/* ++ The ipsec_sa table better be locked before it is handed in, or races ++ might happen ++*/ ++int ++ipsec_sa_delchain(struct ipsec_sa *ips) ++{ ++ struct ipsec_sa *ipsdel; ++ int error = 0; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++ if(ips == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_error:ipsec_sa_delchain: " ++ "null pointer passed in!\n"); ++ return -ENODATA; ++ } ++ ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_delchain: " ++ "passed SA:%s\n", ++ sa_len ? sa : " (error)"); ++ while(ips->ips_onext != NULL) { ++ ips = ips->ips_onext; ++ } ++ ++ while(ips) { ++ /* XXX send a pfkey message up to advise of deleted ipsec_sa */ ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_delchain: " ++ "unlinking and delting SA:%s", ++ sa_len ? sa : " (error)"); ++ ipsdel = ips; ++ ips = ips->ips_inext; ++ if(ips != NULL) { ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ ", inext=%s", ++ sa_len ? sa : " (error)"); ++ atomic_dec(&ipsdel->ips_refcount); ++ ipsdel->ips_inext = NULL; ++ atomic_dec(&ips->ips_refcount); ++ ips->ips_onext = NULL; ++ } ++ KLIPS_PRINT(debug_xform, ++ ".\n"); ++ if((error = ipsec_sa_del(ipsdel))) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_delchain: " ++ "ipsec_sa_del returned error %d.\n", -error); ++ return error; ++ } ++ if((error = ipsec_sa_wipe(ipsdel))) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_delchain: " ++ "ipsec_sa_wipe returned error %d.\n", -error); ++ return error; ++ } ++ } ++ return error; ++} ++ ++int ++ipsec_sadb_cleanup(__u8 proto) ++{ ++ unsigned i; ++ int error = 0; ++ struct ipsec_sa *ips, **ipsprev, *ipsdel; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_cleanup: " ++ "cleaning up proto=%d.\n", ++ proto); ++ ++ spin_lock_bh(&tdb_lock); ++ ++ for (i = 0; i < SADB_HASHMOD; i++) { ++ ipsprev = &(ipsec_sadb_hash[i]); ++ ips = ipsec_sadb_hash[i]; ++ if(ips != NULL) { ++ atomic_inc(&ips->ips_refcount); ++ } ++ for(; ips != NULL;) { ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_cleanup: " ++ "checking SA:%s, hash=%d, ref=%d", ++ sa_len ? sa : " (error)", ++ i, ++ ips->ips_ref); ++ ipsdel = ips; ++ ips = ipsdel->ips_hnext; ++ if(ips != NULL) { ++ atomic_inc(&ips->ips_refcount); ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ ", hnext=%s", ++ sa_len ? sa : " (error)"); ++ } ++ if(*ipsprev != NULL) { ++ sa_len = KLIPS_SATOT(debug_xform, &(*ipsprev)->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ ", *ipsprev=%s", ++ sa_len ? sa : " (error)"); ++ if((*ipsprev)->ips_hnext) { ++ sa_len = KLIPS_SATOT(debug_xform, &(*ipsprev)->ips_hnext->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ ", *ipsprev->ips_hnext=%s", ++ sa_len ? sa : " (error)"); ++ } ++ } ++ KLIPS_PRINT(debug_xform, ++ ".\n"); ++ if(proto == 0 || (proto == ipsdel->ips_said.proto)) { ++ sa_len = KLIPS_SATOT(debug_xform, &ipsdel->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_cleanup: " ++ "deleting SA chain:%s.\n", ++ sa_len ? sa : " (error)"); ++ if((error = ipsec_sa_delchain(ipsdel))) { ++ SENDERR(-error); ++ } ++ ipsprev = &(ipsec_sadb_hash[i]); ++ ips = ipsec_sadb_hash[i]; ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_cleanup: " ++ "deleted SA chain:%s", ++ sa_len ? sa : " (error)"); ++ if(ips != NULL) { ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ ", ipsec_sadb_hash[%d]=%s", ++ i, ++ sa_len ? sa : " (error)"); ++ } ++ if(*ipsprev != NULL) { ++ sa_len = KLIPS_SATOT(debug_xform, &(*ipsprev)->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ ", *ipsprev=%s", ++ sa_len ? sa : " (error)"); ++ if((*ipsprev)->ips_hnext != NULL) { ++ sa_len = KLIPS_SATOT(debug_xform, &(*ipsprev)->ips_hnext->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ ", *ipsprev->ips_hnext=%s", ++ sa_len ? sa : " (error)"); ++ } ++ } ++ KLIPS_PRINT(debug_xform, ++ ".\n"); ++ } else { ++ ipsprev = &ipsdel; ++ } ++ if(ipsdel != NULL) { ++ ipsec_sa_put(ipsdel); ++ } ++ } ++ } ++ errlab: ++ ++ spin_unlock_bh(&tdb_lock); ++ ++ ++#if IPSEC_SA_REF_CODE ++ /* clean up SA reference table */ ++ ++ /* go through the ref table and clean out all the SAs */ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_cleanup: " ++ "removing SAref entries and tables."); ++ { ++ unsigned table, entry; ++ for(table = 0; table < IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES; table++) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_cleanup: " ++ "cleaning SAref table=%u.\n", ++ table); ++ if(ipsec_sadb.refTable[table] == NULL) { ++ printk("\n"); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_cleanup: " ++ "cleaned %u used refTables.\n", ++ table); ++ break; ++ } ++ for(entry = 0; entry < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES; entry++) { ++ if(ipsec_sadb.refTable[table]->entry[entry] != NULL) { ++ ipsec_sa_delchain(ipsec_sadb.refTable[table]->entry[entry]); ++ ipsec_sadb.refTable[table]->entry[entry] = NULL; ++ } ++ } ++ } ++ } ++#endif /* IPSEC_SA_REF_CODE */ ++ ++ return(error); ++} ++ ++int ++ipsec_sadb_free(void) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_free: " ++ "freeing SArefTable memory.\n"); ++ ++ /* clean up SA reference table */ ++ ++ /* go through the ref table and clean out all the SAs if any are ++ left and free table memory */ ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_free: " ++ "removing SAref entries and tables.\n"); ++ { ++ unsigned table, entry; ++ for(table = 0; table < IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES; table++) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_free: " ++ "removing SAref table=%u.\n", ++ table); ++ if(ipsec_sadb.refTable[table] == NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sadb_free: " ++ "removed %u used refTables.\n", ++ table); ++ break; ++ } ++ for(entry = 0; entry < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES; entry++) { ++ if(ipsec_sadb.refTable[table]->entry[entry] != NULL) { ++ ipsec_sa_delchain(ipsec_sadb.refTable[table]->entry[entry]); ++ ipsec_sadb.refTable[table]->entry[entry] = NULL; ++ } ++ } ++ vfree(ipsec_sadb.refTable[table]); ++ ipsec_sadb.refTable[table] = NULL; ++ } ++ } ++ ++ return(error); ++} ++ ++int ++ipsec_sa_wipe(struct ipsec_sa *ips) ++{ ++ if(ips == NULL) { ++ return -ENODATA; ++ } ++ ++ /* if(atomic_dec_and_test(ips)) { ++ }; */ ++ ++#if IPSEC_SA_REF_CODE ++ /* remove me from the SArefTable */ ++ { ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa)); ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_wipe: " ++ "removing SA=%s(0p%p), SAref=%d, table=%d(0p%p), entry=%d from the refTable.\n", ++ sa_len ? sa : " (error)", ++ ips, ++ ips->ips_ref, ++ IPsecSAref2table(IPsecSA2SAref(ips)), ++ ipsec_sadb.refTable[IPsecSAref2table(IPsecSA2SAref(ips))], ++ IPsecSAref2entry(IPsecSA2SAref(ips))); ++ } ++ if(ips->ips_ref == IPSEC_SAREF_NULL) { ++ KLIPS_PRINT(debug_xform, ++ "klips_debug:ipsec_sa_wipe: " ++ "why does this SA not have a valid SAref?.\n"); ++ } ++ ipsec_sadb.refTable[IPsecSAref2table(IPsecSA2SAref(ips))]->entry[IPsecSAref2entry(IPsecSA2SAref(ips))] = NULL; ++ ips->ips_ref = IPSEC_SAREF_NULL; ++ ipsec_sa_put(ips); ++#endif /* IPSEC_SA_REF_CODE */ ++ ++ /* paranoid clean up */ ++ if(ips->ips_addr_s != NULL) { ++ memset((caddr_t)(ips->ips_addr_s), 0, ips->ips_addr_s_size); ++ kfree(ips->ips_addr_s); ++ } ++ ips->ips_addr_s = NULL; ++ ++ if(ips->ips_addr_d != NULL) { ++ memset((caddr_t)(ips->ips_addr_d), 0, ips->ips_addr_d_size); ++ kfree(ips->ips_addr_d); ++ } ++ ips->ips_addr_d = NULL; ++ ++ if(ips->ips_addr_p != NULL) { ++ memset((caddr_t)(ips->ips_addr_p), 0, ips->ips_addr_p_size); ++ kfree(ips->ips_addr_p); ++ } ++ ips->ips_addr_p = NULL; ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ if(ips->ips_natt_oa) { ++ memset((caddr_t)(ips->ips_natt_oa), 0, ips->ips_natt_oa_size); ++ kfree(ips->ips_natt_oa); ++ } ++ ips->ips_natt_oa = NULL; ++#endif ++ ++ if(ips->ips_key_a != NULL) { ++ memset((caddr_t)(ips->ips_key_a), 0, ips->ips_key_a_size); ++ kfree(ips->ips_key_a); ++ } ++ ips->ips_key_a = NULL; ++ ++ if(ips->ips_key_e != NULL) { ++ if (ips->ips_alg_enc && ++ ips->ips_alg_enc->ixt_e_destroy_key) ++ { ++ ips->ips_alg_enc->ixt_e_destroy_key(ips->ips_alg_enc, ++ ips->ips_key_e); ++ } else ++ { ++ memset((caddr_t)(ips->ips_key_e), 0, ips->ips_key_e_size); ++ kfree(ips->ips_key_e); ++ } ++ } ++ ips->ips_key_e = NULL; ++ ++ if(ips->ips_iv != NULL) { ++ memset((caddr_t)(ips->ips_iv), 0, ips->ips_iv_size); ++ kfree(ips->ips_iv); ++ } ++ ips->ips_iv = NULL; ++ ++ if(ips->ips_ident_s.data != NULL) { ++ memset((caddr_t)(ips->ips_ident_s.data), ++ 0, ++ ips->ips_ident_s.len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident)); ++ kfree(ips->ips_ident_s.data); ++ } ++ ips->ips_ident_s.data = NULL; ++ ++ if(ips->ips_ident_d.data != NULL) { ++ memset((caddr_t)(ips->ips_ident_d.data), ++ 0, ++ ips->ips_ident_d.len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident)); ++ kfree(ips->ips_ident_d.data); ++ } ++ ips->ips_ident_d.data = NULL; ++ ++#ifdef CONFIG_KLIPS_ALG ++ if (ips->ips_alg_enc||ips->ips_alg_auth) { ++ ipsec_alg_sa_wipe(ips); ++ } ++#endif /* CONFIG_KLIPS_ALG */ ++ ++ memset((caddr_t)ips, 0, sizeof(*ips)); ++ kfree(ips); ++ ips = NULL; ++ ++ return 0; ++} ++ ++extern int sysctl_ipsec_debug_verbose; ++ ++int ipsec_sa_init(struct ipsec_sa *ipsp) ++{ ++ int i; ++ int error = 0; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ char ipaddr_txt[ADDRTOA_BUF]; ++ char ipaddr2_txt[ADDRTOA_BUF]; ++#if defined (CONFIG_KLIPS_AUTH_HMAC_MD5) || defined (CONFIG_KLIPS_AUTH_HMAC_SHA1) ++ unsigned char kb[AHMD596_BLKLEN]; ++#endif ++#if defined CONFIG_KLIPS_ALG ++ struct ipsec_alg_enc *ixt_e = NULL; ++ struct ipsec_alg_auth *ixt_a = NULL; ++#endif /* CONFIG_KLIPS_ALG */ ++ ++ if(ipsp == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "ipsp is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ sa_len = KLIPS_SATOT(debug_pfkey, &ipsp->ips_said, 0, sa, sizeof(sa)); ++ ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "(pfkey defined) called for SA:%s\n", ++ sa_len ? sa : " (error)"); ++ ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "calling init routine of %s%s%s\n", ++ IPS_XFORM_NAME(ipsp)); ++ ++ switch(ipsp->ips_said.proto) { ++ ++#ifdef CONFIG_KLIPS_IPIP ++ case IPPROTO_IPIP: { ++ addrtoa(((struct sockaddr_in*)(ipsp->ips_addr_s))->sin_addr, ++ 0, ++ ipaddr_txt, sizeof(ipaddr_txt)); ++ addrtoa(((struct sockaddr_in*)(ipsp->ips_addr_d))->sin_addr, ++ 0, ++ ipaddr2_txt, sizeof(ipaddr_txt)); ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "(pfkey defined) IPIP ipsec_sa set for %s->%s.\n", ++ ipaddr_txt, ++ ipaddr2_txt); ++ } ++ break; ++#endif /* !CONFIG_KLIPS_IPIP */ ++ ++#ifdef CONFIG_KLIPS_AH ++ case IPPROTO_AH: ++ switch(ipsp->ips_authalg) { ++# ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: { ++ unsigned char *akp; ++ unsigned int aks; ++ MD5_CTX *ictx; ++ MD5_CTX *octx; ++ ++ if(ipsp->ips_key_bits_a != (AHMD596_KLEN * 8)) { ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "incorrect key size: %d bits -- must be %d bits\n"/*octets (bytes)\n"*/, ++ ipsp->ips_key_bits_a, AHMD596_KLEN * 8); ++ SENDERR(EINVAL); ++ } ++ ++# if KLIPS_DIVULGE_HMAC_KEY ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "hmac md5-96 key is 0x%08x %08x %08x %08x\n", ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+0)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+1)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+2)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+3))); ++# endif /* KLIPS_DIVULGE_HMAC_KEY */ ++ ++ ipsp->ips_auth_bits = AHMD596_ALEN * 8; ++ ++ /* save the pointer to the key material */ ++ akp = ipsp->ips_key_a; ++ aks = ipsp->ips_key_a_size; ++ ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "allocating %lu bytes for md5_ctx.\n", ++ (unsigned long) sizeof(struct md5_ctx)); ++ if((ipsp->ips_key_a = (caddr_t) ++ kmalloc(sizeof(struct md5_ctx), GFP_ATOMIC)) == NULL) { ++ ipsp->ips_key_a = akp; ++ SENDERR(ENOMEM); ++ } ++ ipsp->ips_key_a_size = sizeof(struct md5_ctx); ++ ++ for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) { ++ kb[i] = akp[i] ^ HMAC_IPAD; ++ } ++ for (; i < AHMD596_BLKLEN; i++) { ++ kb[i] = HMAC_IPAD; ++ } ++ ++ ictx = &(((struct md5_ctx*)(ipsp->ips_key_a))->ictx); ++ osMD5Init(ictx); ++ osMD5Update(ictx, kb, AHMD596_BLKLEN); ++ ++ for (i = 0; i < AHMD596_BLKLEN; i++) { ++ kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD); ++ } ++ ++ octx = &(((struct md5_ctx*)(ipsp->ips_key_a))->octx); ++ osMD5Init(octx); ++ osMD5Update(octx, kb, AHMD596_BLKLEN); ++ ++# if KLIPS_DIVULGE_HMAC_KEY ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "MD5 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n", ++ ((__u32*)ictx)[0], ++ ((__u32*)ictx)[1], ++ ((__u32*)ictx)[2], ++ ((__u32*)ictx)[3], ++ ((__u32*)octx)[0], ++ ((__u32*)octx)[1], ++ ((__u32*)octx)[2], ++ ((__u32*)octx)[3] ); ++# endif /* KLIPS_DIVULGE_HMAC_KEY */ ++ ++ /* zero key buffer -- paranoid */ ++ memset(akp, 0, aks); ++ kfree(akp); ++ } ++ break; ++# endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++# ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: { ++ unsigned char *akp; ++ unsigned int aks; ++ SHA1_CTX *ictx; ++ SHA1_CTX *octx; ++ ++ if(ipsp->ips_key_bits_a != (AHSHA196_KLEN * 8)) { ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "incorrect key size: %d bits -- must be %d bits\n"/*octets (bytes)\n"*/, ++ ipsp->ips_key_bits_a, AHSHA196_KLEN * 8); ++ SENDERR(EINVAL); ++ } ++ ++# if KLIPS_DIVULGE_HMAC_KEY ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "hmac sha1-96 key is 0x%08x %08x %08x %08x\n", ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+0)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+1)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+2)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+3))); ++# endif /* KLIPS_DIVULGE_HMAC_KEY */ ++ ++ ipsp->ips_auth_bits = AHSHA196_ALEN * 8; ++ ++ /* save the pointer to the key material */ ++ akp = ipsp->ips_key_a; ++ aks = ipsp->ips_key_a_size; ++ ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "allocating %lu bytes for sha1_ctx.\n", ++ (unsigned long) sizeof(struct sha1_ctx)); ++ if((ipsp->ips_key_a = (caddr_t) ++ kmalloc(sizeof(struct sha1_ctx), GFP_ATOMIC)) == NULL) { ++ ipsp->ips_key_a = akp; ++ SENDERR(ENOMEM); ++ } ++ ipsp->ips_key_a_size = sizeof(struct sha1_ctx); ++ ++ for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) { ++ kb[i] = akp[i] ^ HMAC_IPAD; ++ } ++ for (; i < AHMD596_BLKLEN; i++) { ++ kb[i] = HMAC_IPAD; ++ } ++ ++ ictx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->ictx); ++ SHA1Init(ictx); ++ SHA1Update(ictx, kb, AHSHA196_BLKLEN); ++ ++ for (i = 0; i < AHSHA196_BLKLEN; i++) { ++ kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD); ++ } ++ ++ octx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->octx); ++ SHA1Init(octx); ++ SHA1Update(octx, kb, AHSHA196_BLKLEN); ++ ++# if KLIPS_DIVULGE_HMAC_KEY ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "SHA1 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n", ++ ((__u32*)ictx)[0], ++ ((__u32*)ictx)[1], ++ ((__u32*)ictx)[2], ++ ((__u32*)ictx)[3], ++ ((__u32*)octx)[0], ++ ((__u32*)octx)[1], ++ ((__u32*)octx)[2], ++ ((__u32*)octx)[3] ); ++# endif /* KLIPS_DIVULGE_HMAC_KEY */ ++ /* zero key buffer -- paranoid */ ++ memset(akp, 0, aks); ++ kfree(akp); ++ } ++ break; ++# endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "authalg=%d support not available in the kernel", ++ ipsp->ips_authalg); ++ SENDERR(EINVAL); ++ } ++ break; ++#endif /* CONFIG_KLIPS_AH */ ++ ++#ifdef CONFIG_KLIPS_ESP ++ case IPPROTO_ESP: ++ { ++#if defined (CONFIG_KLIPS_AUTH_HMAC_MD5) || defined (CONFIG_KLIPS_AUTH_HMAC_SHA1) ++ unsigned char *akp; ++ unsigned int aks; ++#endif ++ ipsp->ips_iv_size = 0; ++#ifdef CONFIG_KLIPS_ALG ++ ipsec_alg_sa_init(ipsp); ++ ixt_e=ipsp->ips_alg_enc; ++ ++ if (ixt_e == NULL) { ++ if(printk_ratelimit()) { ++ printk(KERN_INFO ++ "ipsec_sa_init: " ++ "encalg=%d support not available in the kernel", ++ ipsp->ips_encalg); ++ } ++ SENDERR(ENOENT); ++ } ++ ++ ipsp->ips_iv_size = ixt_e->ixt_common.ixt_support.ias_ivlen/8; ++ ++ /* Create IV */ ++ if (ipsp->ips_iv_size) { ++ if((ipsp->ips_iv = (caddr_t) ++ kmalloc(ipsp->ips_iv_size, GFP_ATOMIC)) == NULL) { ++ SENDERR(ENOMEM); ++ } ++ prng_bytes(&ipsec_prng, ++ (char *)ipsp->ips_iv, ++ ipsp->ips_iv_size); ++ ipsp->ips_iv_bits = ipsp->ips_iv_size * 8; ++ } ++ ++ if ((error=ipsec_alg_enc_key_create(ipsp)) < 0) ++ SENDERR(-error); ++ ++ if ((ixt_a=ipsp->ips_alg_auth)) { ++ if ((error=ipsec_alg_auth_key_create(ipsp)) < 0) ++ SENDERR(-error); ++ } else ++#endif /* CONFIG_KLIPS_ALG */ ++ ++ switch(ipsp->ips_authalg) { ++# ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: { ++ MD5_CTX *ictx; ++ MD5_CTX *octx; ++ ++ if(ipsp->ips_key_bits_a != (AHMD596_KLEN * 8)) { ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "incorrect authorisation key size: %d bits -- must be %d bits\n"/*octets (bytes)\n"*/, ++ ipsp->ips_key_bits_a, ++ AHMD596_KLEN * 8); ++ SENDERR(EINVAL); ++ } ++ ++# if KLIPS_DIVULGE_HMAC_KEY ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "hmac md5-96 key is 0x%08x %08x %08x %08x\n", ++ ntohl(*(((__u32 *)(ipsp->ips_key_a))+0)), ++ ntohl(*(((__u32 *)(ipsp->ips_key_a))+1)), ++ ntohl(*(((__u32 *)(ipsp->ips_key_a))+2)), ++ ntohl(*(((__u32 *)(ipsp->ips_key_a))+3))); ++# endif /* KLIPS_DIVULGE_HMAC_KEY */ ++ ipsp->ips_auth_bits = AHMD596_ALEN * 8; ++ ++ /* save the pointer to the key material */ ++ akp = ipsp->ips_key_a; ++ aks = ipsp->ips_key_a_size; ++ ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "allocating %lu bytes for md5_ctx.\n", ++ (unsigned long) sizeof(struct md5_ctx)); ++ if((ipsp->ips_key_a = (caddr_t) ++ kmalloc(sizeof(struct md5_ctx), GFP_ATOMIC)) == NULL) { ++ ipsp->ips_key_a = akp; ++ SENDERR(ENOMEM); ++ } ++ ipsp->ips_key_a_size = sizeof(struct md5_ctx); ++ ++ for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) { ++ kb[i] = akp[i] ^ HMAC_IPAD; ++ } ++ for (; i < AHMD596_BLKLEN; i++) { ++ kb[i] = HMAC_IPAD; ++ } ++ ++ ictx = &(((struct md5_ctx*)(ipsp->ips_key_a))->ictx); ++ osMD5Init(ictx); ++ osMD5Update(ictx, kb, AHMD596_BLKLEN); ++ ++ for (i = 0; i < AHMD596_BLKLEN; i++) { ++ kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD); ++ } ++ ++ octx = &(((struct md5_ctx*)(ipsp->ips_key_a))->octx); ++ osMD5Init(octx); ++ osMD5Update(octx, kb, AHMD596_BLKLEN); ++ ++# if KLIPS_DIVULGE_HMAC_KEY ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "MD5 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n", ++ ((__u32*)ictx)[0], ++ ((__u32*)ictx)[1], ++ ((__u32*)ictx)[2], ++ ((__u32*)ictx)[3], ++ ((__u32*)octx)[0], ++ ((__u32*)octx)[1], ++ ((__u32*)octx)[2], ++ ((__u32*)octx)[3] ); ++# endif /* KLIPS_DIVULGE_HMAC_KEY */ ++ /* paranoid */ ++ memset(akp, 0, aks); ++ kfree(akp); ++ break; ++ } ++# endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++# ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: { ++ SHA1_CTX *ictx; ++ SHA1_CTX *octx; ++ ++ if(ipsp->ips_key_bits_a != (AHSHA196_KLEN * 8)) { ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "incorrect authorisation key size: %d bits -- must be %d bits\n"/*octets (bytes)\n"*/, ++ ipsp->ips_key_bits_a, ++ AHSHA196_KLEN * 8); ++ SENDERR(EINVAL); ++ } ++ ++# if KLIPS_DIVULGE_HMAC_KEY ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "hmac sha1-96 key is 0x%08x %08x %08x %08x\n", ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+0)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+1)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+2)), ++ ntohl(*(((__u32 *)ipsp->ips_key_a)+3))); ++# endif /* KLIPS_DIVULGE_HMAC_KEY */ ++ ipsp->ips_auth_bits = AHSHA196_ALEN * 8; ++ ++ /* save the pointer to the key material */ ++ akp = ipsp->ips_key_a; ++ aks = ipsp->ips_key_a_size; ++ ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "allocating %lu bytes for sha1_ctx.\n", ++ (unsigned long) sizeof(struct sha1_ctx)); ++ if((ipsp->ips_key_a = (caddr_t) ++ kmalloc(sizeof(struct sha1_ctx), GFP_ATOMIC)) == NULL) { ++ ipsp->ips_key_a = akp; ++ SENDERR(ENOMEM); ++ } ++ ipsp->ips_key_a_size = sizeof(struct sha1_ctx); ++ ++ for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) { ++ kb[i] = akp[i] ^ HMAC_IPAD; ++ } ++ for (; i < AHMD596_BLKLEN; i++) { ++ kb[i] = HMAC_IPAD; ++ } ++ ++ ictx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->ictx); ++ SHA1Init(ictx); ++ SHA1Update(ictx, kb, AHSHA196_BLKLEN); ++ ++ for (i = 0; i < AHSHA196_BLKLEN; i++) { ++ kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD); ++ } ++ ++ octx = &((struct sha1_ctx*)(ipsp->ips_key_a))->octx; ++ SHA1Init(octx); ++ SHA1Update(octx, kb, AHSHA196_BLKLEN); ++ ++# if KLIPS_DIVULGE_HMAC_KEY ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "ipsec_sa_init: " ++ "SHA1 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n", ++ ((__u32*)ictx)[0], ++ ((__u32*)ictx)[1], ++ ((__u32*)ictx)[2], ++ ((__u32*)ictx)[3], ++ ((__u32*)octx)[0], ++ ((__u32*)octx)[1], ++ ((__u32*)octx)[2], ++ ((__u32*)octx)[3] ); ++# endif /* KLIPS_DIVULGE_HMAC_KEY */ ++ memset(akp, 0, aks); ++ kfree(akp); ++ break; ++ } ++# endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ case AH_NONE: ++ break; ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "ipsec_sa_init: " ++ "authalg=%d support not available in the kernel.\n", ++ ipsp->ips_authalg); ++ SENDERR(EINVAL); ++ } ++ } ++ break; ++#endif /* !CONFIG_KLIPS_ESP */ ++#ifdef CONFIG_KLIPS_IPCOMP ++ case IPPROTO_COMP: ++ ipsp->ips_comp_adapt_tries = 0; ++ ipsp->ips_comp_adapt_skip = 0; ++ ipsp->ips_comp_ratio_cbytes = 0; ++ ipsp->ips_comp_ratio_dbytes = 0; ++ break; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ default: ++ printk(KERN_ERR "KLIPS sa initialization: " ++ "proto=%d unknown.\n", ++ ipsp->ips_said.proto); ++ SENDERR(EINVAL); ++ } ++ ++ errlab: ++ return(error); ++} ++ ++ ++ ++/* ++ * $Log: ipsec_sa.c,v $ ++ * Revision 1.30.2.3 2007-09-05 02:56:10 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.30.2.2 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.30.2.1 2006/04/20 16:33:07 mcr ++ * remove all of CONFIG_KLIPS_ALG --- one can no longer build without it. ++ * Fix in-kernel module compilation. Sub-makefiles do not work. ++ * ++ * Revision 1.30 2005/05/24 01:02:35 mcr ++ * some refactoring/simplification of situation where alg ++ * is not found. ++ * ++ * Revision 1.29 2005/05/18 19:13:28 mcr ++ * rename debug messages. make sure that algo not found is not ++ * a debug message. ++ * ++ * Revision 1.28 2005/05/11 01:30:20 mcr ++ * removed "poor-man"s OOP in favour of proper C structures. ++ * ++ * Revision 1.27 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.26 2005/04/14 20:56:24 mcr ++ * moved (pfkey_)ipsec_sa_init to ipsec_sa.c. ++ * ++ * Revision 1.25 2004/08/22 20:12:16 mcr ++ * one more KLIPS_NAT->IPSEC_NAT. ++ * ++ * Revision 1.24 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.23 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.22.2.1 2003/12/22 15:25:52 jjo ++ * . Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.22 2003/12/10 01:14:27 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.21 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.20.4.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.20 2003/02/06 01:50:34 rgb ++ * Fixed initialisation bug for first sadb hash bucket that would only manifest itself on platforms where NULL != 0. ++ * ++ * Revision 1.19 2003/01/30 02:32:22 rgb ++ * ++ * Rename SAref table macro names for clarity. ++ * Transmit error code through to caller from callee for better diagnosis of problems. ++ * Convert IPsecSAref_t from signed to unsigned to fix apparent SAref exhaustion bug. ++ * ++ * Revision 1.18 2002/10/12 23:11:53 dhr ++ * ++ * [KenB + DHR] more 64-bit cleanup ++ * ++ * Revision 1.17 2002/10/07 18:31:43 rgb ++ * Move field width sanity checks to ipsec_sa.c ++ * ++ * Revision 1.16 2002/09/20 15:41:02 rgb ++ * Re-wrote most of the SAref code to eliminate Entry pointers. ++ * Added SAref code compiler directive switch. ++ * Added a saref test function for testing macros. ++ * Switch from pfkey_alloc_ipsec_sa() to ipsec_sa_alloc(). ++ * Split ipsec_sadb_cleanup from new funciton ipsec_sadb_free to avoid problem ++ * of freeing newly created structures when clearing the reftable upon startup ++ * to start from a known state. ++ * Place all ipsec sadb globals into one struct. ++ * Rework saref freelist. ++ * Added memory allocation debugging. ++ * ++ * Revision 1.15 2002/09/20 05:01:44 rgb ++ * Update copyright date. ++ * ++ * Revision 1.14 2002/08/13 19:01:25 mcr ++ * patches from kenb to permit compilation of FreeSWAN on ia64. ++ * des library patched to use proper DES_LONG type for ia64. ++ * ++ * Revision 1.13 2002/07/29 03:06:20 mcr ++ * get rid of variable not used warnings. ++ * ++ * Revision 1.12 2002/07/26 08:48:31 rgb ++ * Added SA ref table code. ++ * ++ * Revision 1.11 2002/06/04 16:48:49 rgb ++ * Tidied up pointer code for processor independance. ++ * ++ * Revision 1.10 2002/05/23 07:16:17 rgb ++ * Added ipsec_sa_put() for releasing an ipsec_sa refcount. ++ * Pointer clean-up. ++ * Added refcount code. ++ * Convert "usecount" to "refcount" to remove ambiguity. ++ * ++ * Revision 1.9 2002/05/14 02:34:49 rgb ++ * Converted reference from ipsec_sa_put to ipsec_sa_add to avoid confusion ++ * with "put" usage in the kernel. ++ * Change all references to tdb, TDB or Tunnel Descriptor Block to ips, ++ * ipsec_sa or ipsec_sa. ++ * Added some preliminary refcount code. ++ * ++ * Revision 1.8 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.7 2002/04/24 07:36:30 mcr ++ * Moved from ./klips/net/ipsec/ipsec_sa.c,v ++ * ++ * Revision 1.6 2002/04/20 00:12:25 rgb ++ * Added esp IV CBC attack fix, disabled. ++ * ++ * Revision 1.5 2002/01/29 17:17:56 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.4 2002/01/29 04:00:52 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.3 2002/01/29 02:13:18 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.2 2001/11/26 09:16:15 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.1.2.2 2001/10/22 21:05:41 mcr ++ * removed phony prototype for des_set_key. ++ * ++ * Revision 1.1.2.1 2001/09/25 02:24:57 mcr ++ * struct tdb -> struct ipsec_sa. ++ * sa(tdb) manipulation functions renamed and moved to ipsec_sa.c ++ * ipsec_xform.c removed. header file still contains useful things. ++ * ++ * ++ * ++ * CLONED from ipsec_xform.c: ++ * Revision 1.53 2001/09/08 21:13:34 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.52 2001/06/14 19:35:11 rgb ++ * Update copyright date. ++ * ++ * Revision 1.51 2001/05/30 08:14:03 rgb ++ * Removed vestiges of esp-null transforms. ++ * ++ * Revision 1.50 2001/05/03 19:43:18 rgb ++ * Initialise error return variable. ++ * Update SENDERR macro. ++ * Fix sign of error return code for ipsec_tdbcleanup(). ++ * Use more appropriate return code for ipsec_tdbwipe(). ++ * ++ * Revision 1.49 2001/04/19 18:56:17 rgb ++ * Fixed tdb table locking comments. ++ * ++ * Revision 1.48 2001/02/27 22:24:55 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.47 2000/11/06 04:32:08 rgb ++ * Ditched spin_lock_irqsave in favour of spin_lock_bh. ++ * ++ * Revision 1.46 2000/09/20 16:21:57 rgb ++ * Cleaned up ident string alloc/free. ++ * ++ * Revision 1.45 2000/09/08 19:16:51 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * Removed all references to CONFIG_IPSEC_PFKEYv2. ++ * ++ * Revision 1.44 2000/08/30 05:29:04 rgb ++ * Compiler-define out no longer used tdb_init() in ipsec_xform.c. ++ * ++ * Revision 1.43 2000/08/18 21:30:41 rgb ++ * Purged all tdb_spi, tdb_proto and tdb_dst macros. They are unclear. ++ * ++ * Revision 1.42 2000/08/01 14:51:51 rgb ++ * Removed _all_ remaining traces of DES. ++ * ++ * Revision 1.41 2000/07/28 14:58:31 rgb ++ * Changed kfree_s to kfree, eliminating extra arg to fix 2.4.0-test5. ++ * ++ * Revision 1.40 2000/06/28 05:50:11 rgb ++ * Actually set iv_bits. ++ * ++ * Revision 1.39 2000/05/10 23:11:09 rgb ++ * Added netlink debugging output. ++ * Added a cast to quiet down the ntohl bug. ++ * ++ * Revision 1.38 2000/05/10 19:18:42 rgb ++ * Cast output of ntohl so that the broken prototype doesn't make our ++ * compile noisy. ++ * ++ * Revision 1.37 2000/03/16 14:04:59 rgb ++ * Hardwired CONFIG_IPSEC_PFKEYv2 on. ++ * ++ * Revision 1.36 2000/01/26 10:11:28 rgb ++ * Fixed spacing in error text causing run-in words. ++ * ++ * Revision 1.35 2000/01/21 06:17:16 rgb ++ * Tidied up compiler directive indentation for readability. ++ * Added ictx,octx vars for simplification.(kravietz) ++ * Added macros for HMAC padding magic numbers.(kravietz) ++ * Fixed missing key length reporting bug. ++ * Fixed bug in tdbwipe to return immediately on NULL tdbp passed in. ++ * ++ * Revision 1.34 1999/12/08 00:04:19 rgb ++ * Fixed SA direction overwriting bug for netlink users. ++ * ++ * Revision 1.33 1999/12/01 22:16:44 rgb ++ * Minor formatting changes in ESP MD5 initialisation. ++ * ++ * Revision 1.32 1999/11/25 09:06:36 rgb ++ * Fixed error return messages, should be returning negative numbers. ++ * Implemented SENDERR macro for propagating error codes. ++ * Added debug message and separate error code for algorithms not compiled ++ * in. ++ * ++ * Revision 1.31 1999/11/23 23:06:26 rgb ++ * Sort out pfkey and freeswan headers, putting them in a library path. ++ * ++ * Revision 1.30 1999/11/18 04:09:20 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * ++ * Revision 1.29 1999/11/17 15:53:40 rgb ++ * Changed all occurrences of #include "../../../lib/freeswan.h" ++ * to #include which works due to -Ilibfreeswan in the ++ * klips/net/ipsec/Makefile. ++ * ++ * Revision 1.28 1999/10/18 20:04:01 rgb ++ * Clean-out unused cruft. ++ * ++ * Revision 1.27 1999/10/03 19:01:03 rgb ++ * Spinlock support for 2.3.xx and 2.0.xx kernels. ++ * ++ * Revision 1.26 1999/10/01 16:22:24 rgb ++ * Switch from assignment init. to functional init. of spinlocks. ++ * ++ * Revision 1.25 1999/10/01 15:44:54 rgb ++ * Move spinlock header include to 2.1> scope. ++ * ++ * Revision 1.24 1999/10/01 00:03:46 rgb ++ * Added tdb structure locking. ++ * Minor formatting changes. ++ * Add function to initialize tdb hash table. ++ * ++ * Revision 1.23 1999/05/25 22:42:12 rgb ++ * Add deltdbchain() debugging. ++ * ++ * Revision 1.22 1999/05/25 21:24:31 rgb ++ * Add debugging statements to deltdbchain(). ++ * ++ * Revision 1.21 1999/05/25 03:51:48 rgb ++ * Refix error return code. ++ * ++ * Revision 1.20 1999/05/25 03:34:07 rgb ++ * Fix error return for flush. ++ * ++ * Revision 1.19 1999/05/09 03:25:37 rgb ++ * Fix bug introduced by 2.2 quick-and-dirty patch. ++ * ++ * Revision 1.18 1999/05/05 22:02:32 rgb ++ * Add a quick and dirty port to 2.2 kernels by Marc Boucher . ++ * ++ * Revision 1.17 1999/04/29 15:20:16 rgb ++ * Change gettdb parameter to a pointer to reduce stack loading and ++ * facilitate parameter sanity checking. ++ * Add sanity checking for null pointer arguments. ++ * Add debugging instrumentation. ++ * Add function deltdbchain() which will take care of unlinking, ++ * zeroing and deleting a chain of tdbs. ++ * Add a parameter to tdbcleanup to be able to delete a class of SAs. ++ * tdbwipe now actually zeroes the tdb as well as any of its pointed ++ * structures. ++ * ++ * Revision 1.16 1999/04/16 15:36:29 rgb ++ * Fix cut-and-paste error causing a memory leak in IPIP TDB freeing. ++ * ++ * Revision 1.15 1999/04/11 00:29:01 henry ++ * GPL boilerplate ++ * ++ * Revision 1.14 1999/04/06 04:54:28 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.13 1999/02/19 18:23:01 rgb ++ * Nix debug off compile warning. ++ * ++ * Revision 1.12 1999/02/17 16:52:16 rgb ++ * Consolidate satoa()s for space and speed efficiency. ++ * Convert DEBUG_IPSEC to KLIPS_PRINT ++ * Clean out unused cruft. ++ * Ditch NET_IPIP dependancy. ++ * Loop for 3des key setting. ++ * ++ * Revision 1.11 1999/01/26 02:09:05 rgb ++ * Remove ah/esp/IPIP switching on include files. ++ * Removed CONFIG_IPSEC_ALGO_SWITCH macro. ++ * Removed dead code. ++ * Clean up debug code when switched off. ++ * Remove references to INET_GET_PROTOCOL. ++ * Added code exclusion macros to reduce code from unused algorithms. ++ * ++ * Revision 1.10 1999/01/22 06:28:55 rgb ++ * Cruft clean-out. ++ * Put random IV generation in kernel. ++ * Added algorithm switch code. ++ * Enhanced debugging. ++ * 64-bit clean-up. ++ * ++ * Revision 1.9 1998/11/30 13:22:55 rgb ++ * Rationalised all the klips kernel file headers. They are much shorter ++ * now and won't conflict under RH5.2. ++ * ++ * Revision 1.8 1998/11/25 04:59:06 rgb ++ * Add conditionals for no IPIP tunnel code. ++ * Delete commented out code. ++ * ++ * Revision 1.7 1998/10/31 06:50:41 rgb ++ * Convert xform ASCII names to no spaces. ++ * Fixed up comments in #endif directives. ++ * ++ * Revision 1.6 1998/10/19 14:44:28 rgb ++ * Added inclusion of freeswan.h. ++ * sa_id structure implemented and used: now includes protocol. ++ * ++ * Revision 1.5 1998/10/09 04:32:19 rgb ++ * Added 'klips_debug' prefix to all klips printk debug statements. ++ * ++ * Revision 1.4 1998/08/12 00:11:31 rgb ++ * Added new xform functions to the xform table. ++ * Fixed minor debug output spelling error. ++ * ++ * Revision 1.3 1998/07/09 17:45:31 rgb ++ * Clarify algorithm not available message. ++ * ++ * Revision 1.2 1998/06/23 03:00:51 rgb ++ * Check for presence of IPIP protocol if it is setup one way (we don't ++ * know what has been set up the other way and can only assume it will be ++ * symmetrical with the exception of keys). ++ * ++ * Revision 1.1 1998/06/18 21:27:51 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.3 1998/06/11 05:54:59 rgb ++ * Added transform version string pointer to xformsw initialisations. ++ * ++ * Revision 1.2 1998/04/21 21:28:57 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.1 1998/04/09 03:06:13 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:02 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.5 1997/06/03 04:24:48 ji ++ * Added ESP-3DES-MD5-96 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * Added new transforms. ++ * ++ * Revision 0.3 1996/11/20 14:39:04 ji ++ * Minor cleanups. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_sha1.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,219 @@ ++/* ++ * RCSID $Id: ipsec_sha1.c,v 1.9 2004-04-06 02:49:26 mcr Exp $ ++ */ ++ ++/* ++ * The rest of the code is derived from sha1.c by Steve Reid, which is ++ * public domain. ++ * Minor cosmetic changes to accomodate it in the Linux kernel by ji. ++ */ ++ ++#include ++#include ++ ++#include "openswan/ipsec_sha1.h" ++ ++#if defined(rol) ++#undef rol ++#endif ++ ++#define SHA1HANDSOFF ++ ++#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) ++ ++/* blk0() and blk() perform the initial expand. */ ++/* I got the idea of expanding during the round function from SSLeay */ ++#ifdef __LITTLE_ENDIAN ++#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \ ++ |(rol(block->l[i],8)&0x00FF00FF)) ++#else ++#define blk0(i) block->l[i] ++#endif ++#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ ++ ^block->l[(i+2)&15]^block->l[i&15],1)) ++ ++/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ ++#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); ++#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); ++#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); ++#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); ++#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); ++ ++ ++/* Hash a single 512-bit block. This is the core of the algorithm. */ ++ ++void SHA1Transform(__u32 state[5], __u8 buffer[64]) ++{ ++__u32 a, b, c, d, e; ++typedef union { ++ unsigned char c[64]; ++ __u32 l[16]; ++} CHAR64LONG16; ++CHAR64LONG16* block; ++#ifdef SHA1HANDSOFF ++static unsigned char workspace[64]; ++ block = (CHAR64LONG16*)workspace; ++ memcpy(block, buffer, 64); ++#else ++ block = (CHAR64LONG16*)buffer; ++#endif ++ /* Copy context->state[] to working vars */ ++ a = state[0]; ++ b = state[1]; ++ c = state[2]; ++ d = state[3]; ++ e = state[4]; ++ /* 4 rounds of 20 operations each. Loop unrolled. */ ++ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); ++ R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); ++ R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); ++ R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); ++ R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); ++ R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); ++ R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); ++ R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); ++ R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); ++ R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); ++ R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); ++ R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); ++ R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); ++ R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); ++ R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); ++ R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); ++ R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); ++ R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); ++ R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); ++ R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); ++ /* Add the working vars back into context.state[] */ ++ state[0] += a; ++ state[1] += b; ++ state[2] += c; ++ state[3] += d; ++ state[4] += e; ++ /* Wipe variables */ ++ a = b = c = d = e = 0; ++} ++ ++ ++/* SHA1Init - Initialize new context */ ++ ++void SHA1Init(void *vcontext) ++{ ++ SHA1_CTX* context = vcontext; ++ ++ /* SHA1 initialization constants */ ++ context->state[0] = 0x67452301; ++ context->state[1] = 0xEFCDAB89; ++ context->state[2] = 0x98BADCFE; ++ context->state[3] = 0x10325476; ++ context->state[4] = 0xC3D2E1F0; ++ context->count[0] = context->count[1] = 0; ++} ++ ++ ++/* Run your data through this. */ ++ ++void SHA1Update(void *vcontext, unsigned char* data, __u32 len) ++{ ++ SHA1_CTX* context = vcontext; ++ __u32 i, j; ++ ++ j = context->count[0]; ++ if ((context->count[0] += len << 3) < j) ++ context->count[1]++; ++ context->count[1] += (len>>29); ++ j = (j >> 3) & 63; ++ if ((j + len) > 63) { ++ memcpy(&context->buffer[j], data, (i = 64-j)); ++ SHA1Transform(context->state, context->buffer); ++ for ( ; i + 63 < len; i += 64) { ++ SHA1Transform(context->state, &data[i]); ++ } ++ j = 0; ++ } ++ else i = 0; ++ memcpy(&context->buffer[j], &data[i], len - i); ++} ++ ++ ++/* Add padding and return the message digest. */ ++ ++void SHA1Final(unsigned char digest[20], void *vcontext) ++{ ++ __u32 i, j; ++ unsigned char finalcount[8]; ++ SHA1_CTX* context = vcontext; ++ ++ for (i = 0; i < 8; i++) { ++ finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] ++ >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ ++ } ++ SHA1Update(context, (unsigned char *)"\200", 1); ++ while ((context->count[0] & 504) != 448) { ++ SHA1Update(context, (unsigned char *)"\0", 1); ++ } ++ SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */ ++ for (i = 0; i < 20; i++) { ++ digest[i] = (unsigned char) ++ ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); ++ } ++ /* Wipe variables */ ++ i = j = 0; ++ memset(context->buffer, 0, 64); ++ memset(context->state, 0, 20); ++ memset(context->count, 0, 8); ++ memset(&finalcount, 0, 8); ++#ifdef SHA1HANDSOFF /* make SHA1Transform overwrite its own static vars */ ++ SHA1Transform(context->state, context->buffer); ++#endif ++} ++ ++ ++/* ++ * $Log: ipsec_sha1.c,v $ ++ * Revision 1.9 2004-04-06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.8 2002/09/10 01:45:14 mcr ++ * changed type of MD5_CTX and SHA1_CTX to void * so that ++ * the function prototypes would match, and could be placed ++ * into a pointer to a function. ++ * ++ * Revision 1.7 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.6 2002/04/24 07:36:30 mcr ++ * Moved from ./klips/net/ipsec/ipsec_sha1.c,v ++ * ++ * Revision 1.5 1999/12/13 13:59:13 rgb ++ * Quick fix to argument size to Update bugs. ++ * ++ * Revision 1.4 1999/04/11 00:29:00 henry ++ * GPL boilerplate ++ * ++ * Revision 1.3 1999/04/06 04:54:27 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.2 1999/01/22 06:55:50 rgb ++ * 64-bit clean-up. ++ * ++ * Revision 1.1 1998/06/18 21:27:50 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.2 1998/04/23 20:54:04 rgb ++ * Fixed md5 and sha1 include file nesting issues, to be cleaned up when ++ * verified. ++ * ++ * Revision 1.1 1998/04/09 03:06:11 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:05 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * New transform ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_snprintf.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,135 @@ ++/* ++ * @(#) ipsec_snprintf() function ++ * ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs ++ * 2001 Michael Richardson ++ * Copyright (C) 2005 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * Split out from ipsec_proc.c. ++ */ ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#define __NO_VERSION__ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_kversion.h" ++#include "openswan/ipsec_param.h" ++ ++#include ++ ++#include "openswan/radij.h" ++ ++#include "openswan/ipsec_life.h" ++#include "openswan/ipsec_stats.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_xmit.h" ++ ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++#include "openswan/ipsec_kern24.h" ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++#include "openswan/ipcomp.h" ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#include "openswan/ipsec_proto.h" ++ ++#include ++#include ++ ++/* ipsec_snprintf: like snprintf except ++ * - size is signed and a negative value is treated as if it were 0 ++ * - the returned result is never negative -- ++ * an error generates a "?" or null output (depending on space). ++ * (Our callers are too lazy to check for an error return.) ++ * ++ * @param buf String buffer ++ * @param size Size of the string ++ * @param fmt printf string ++ * @param ... Variables to be displayed in fmt ++ * @return int Return code ++ */ ++int ipsec_snprintf(char *buf, ssize_t size, const char *fmt, ...) ++{ ++ va_list args; ++ int i; ++ size_t possize = size < 0? 0 : size; ++ va_start(args, fmt); ++ i = vsnprintf(buf,possize,fmt,args); ++ va_end(args); ++ if (i < 0) { ++ /* create empty output in place of error */ ++ i = 0; ++ if (size > 0) { ++ *buf = '\0'; ++ } ++ } ++ return i; ++} ++ ++ ++void ipsec_dmp_block(char *s, caddr_t bb, int len) ++{ ++ int i; ++ unsigned char *b = bb; ++ ++ printk(KERN_INFO "klips_dmp: " ++ "at %s, len=%d:\n", s, len); ++ ++ for(i = 0; i < len; i++ /*, c++*/) { ++ if(!(i % 16)) { ++ printk(KERN_INFO ++ "klips_debug: @%03x:", ++ i); ++ } ++ printk(" %02x", b[i]); ++ if(!((i + 1) % 16)) { ++ printk("\n"); ++ } ++ } ++ if(i % 16) { ++ printk("\n"); ++ } ++} ++ ++/* ++ * ++ * $Log: ipsec_snprintf.c,v $ ++ * Revision 1.3.2.1 2006-10-06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.3 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.2 2005/04/15 00:32:01 mcr ++ * added ipsec_dmp_block routine. ++ * ++ * ++ * Local Variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_tunnel.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,2893 @@ ++/* ++ * IPSEC Tunneling code. Heavily based on drivers/net/new_tunnel.c ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipsec_tunnel_c_version[] = "RCSID $Id: ipsec_tunnel.c,v 1.232.2.7 2007-09-18 18:26:18 paul Exp $"; ++ ++#define __NO_VERSION__ ++#include ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif /* for CONFIG_IP_FORWARD */ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include ++#include ++#include ++ ++#include /* struct device, struct net_device_stats, dev_queue_xmit() and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include ++ ++#include ++ ++#ifdef NET_21 ++# include ++# define ip_chk_addr inet_addr_type ++# define IS_MYADDR RTN_LOCAL ++# include ++# undef dev_kfree_skb ++# define dev_kfree_skb(a,b) kfree_skb(a) ++# define PHYSDEV_TYPE ++#endif /* NET_21 */ ++ ++#include /* icmp_send() */ ++#include ++#ifdef NETDEV_23 ++# include ++#endif /* NETDEV_23 */ ++ ++#include ++#include ++ ++#include "openswan/ipsec_kversion.h" ++#include "openswan/radij.h" ++#include "openswan/ipsec_life.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_eroute.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_sa.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_xmit.h" ++#include "openswan/ipsec_ipe4.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++#include "openswan/ipsec_kern24.h" ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++#include ++#endif ++ ++static __u32 zeroes[64]; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_tunnel = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++DEBUG_NO_STATIC int ++ipsec_tunnel_open(struct net_device *dev) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ ++ /* ++ * Can't open until attached. ++ */ ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_open: " ++ "dev = %s, prv->dev = %s\n", ++ dev->name, prv->dev?prv->dev->name:"NONE"); ++ ++ if (prv->dev == NULL) ++ return -ENODEV; ++ ++ KLIPS_INC_USE; ++ return 0; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_tunnel_close(struct net_device *dev) ++{ ++ KLIPS_DEC_USE; ++ return 0; ++} ++ ++#ifdef NETDEV_23 ++static inline int ipsec_tunnel_xmit2(struct sk_buff *skb) ++{ ++#ifdef NETDEV_25 /* 2.6 kernels */ ++ return dst_output(skb); ++#else ++ return ip_send(skb); ++#endif ++} ++#endif /* NETDEV_23 */ ++ ++enum ipsec_xmit_value ++ipsec_tunnel_strip_hard_header(struct ipsec_xmit_state *ixs) ++{ ++ /* ixs->physdev->hard_header_len is unreliable and should not be used */ ++ ixs->hard_header_len = (unsigned char *)(ixs->iph) - ixs->skb->data; ++ ++ if(ixs->hard_header_len < 0) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_error:ipsec_xmit_strip_hard_header: " ++ "Negative hard_header_len (%d)?!\n", ixs->hard_header_len); ++ ixs->stats->tx_dropped++; ++ return IPSEC_XMIT_BADHHLEN; ++ } ++ ++ /* while ixs->physdev->hard_header_len is unreliable and ++ * should not be trusted, it accurate and required for ATM, GRE and ++ * some other interfaces to work. Thanks to Willy Tarreau ++ * . ++ */ ++ if(ixs->hard_header_len == 0) { /* no hard header present */ ++ ixs->hard_header_stripped = 1; ++ ixs->hard_header_len = ixs->physdev->hard_header_len; ++ } ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (debug_tunnel & DB_TN_XMIT) { ++ int i; ++ char c; ++ ++ printk(KERN_INFO "klips_debug:ipsec_xmit_strip_hard_header: " ++ ">>> skb->len=%ld hard_header_len:%d", ++ (unsigned long int)ixs->skb->len, ixs->hard_header_len); ++ c = ' '; ++ for (i=0; i < ixs->hard_header_len; i++) { ++ printk("%c%02x", c, ixs->skb->data[i]); ++ c = ':'; ++ } ++ printk(" \n"); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ KLIPS_IP_PRINT(debug_tunnel & DB_TN_XMIT, ixs->iph); ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_strip_hard_header: " ++ "Original head,tailroom: %d,%d\n", ++ skb_headroom(ixs->skb), skb_tailroom(ixs->skb)); ++ ++ return IPSEC_XMIT_OK; ++} ++ ++enum ipsec_xmit_value ++ipsec_tunnel_SAlookup(struct ipsec_xmit_state *ixs) ++{ ++ unsigned int bypass; ++ ++ bypass = FALSE; ++ ++ /* ++ * First things first -- look us up in the erouting tables. ++ */ ++ ixs->matcher.sen_len = sizeof (struct sockaddr_encap); ++ ixs->matcher.sen_family = AF_ENCAP; ++ ixs->matcher.sen_type = SENT_IP4; ++ ixs->matcher.sen_ip_src.s_addr = ixs->iph->saddr; ++ ixs->matcher.sen_ip_dst.s_addr = ixs->iph->daddr; ++ ixs->matcher.sen_proto = ixs->iph->protocol; ++ ipsec_extract_ports(ixs->iph, &ixs->matcher); ++ ++ /* ++ * The spinlock is to prevent any other process from accessing or deleting ++ * the eroute while we are using and updating it. ++ */ ++ spin_lock(&eroute_lock); ++ ++ ixs->eroute = ipsec_findroute(&ixs->matcher); ++ ++ if(ixs->iph->protocol == IPPROTO_UDP) { ++ struct udphdr *t = NULL; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:udp port check: " ++ "fragoff: %d len: %d>%ld \n", ++ ntohs(ixs->iph->frag_off) & IP_OFFSET, ++ (ixs->skb->len - ixs->hard_header_len), ++ (unsigned long int) ((ixs->iph->ihl << 2) + sizeof(struct udphdr))); ++ ++ if((ntohs(ixs->iph->frag_off) & IP_OFFSET) == 0 && ++ ((ixs->skb->len - ixs->hard_header_len) >= ++ ((ixs->iph->ihl << 2) + sizeof(struct udphdr)))) ++ { ++ t =((struct udphdr*)((caddr_t)ixs->iph+(ixs->iph->ihl<<2))); ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:udp port in packet: " ++ "port %d -> %d\n", ++ ntohs(t->source), ntohs(t->dest)); ++ } ++ ++ ixs->sport=0; ixs->dport=0; ++ ++ if(ixs->skb->sk) { ++#ifdef NET_26 ++#ifdef HAVE_INET_SK_SPORT ++ ixs->sport = ntohs(inet_sk(ixs->skb->sk)->sport); ++ ixs->dport = ntohs(inet_sk(ixs->skb->sk)->dport); ++#else ++ struct udp_sock *us; ++ ++ us = (struct udp_sock *)ixs->skb->sk; ++ ++ ixs->sport = ntohs(us->inet.sport); ++ ixs->dport = ntohs(us->inet.dport); ++#endif ++#else ++ ixs->sport = ntohs(ixs->skb->sk->sport); ++ ixs->dport = ntohs(ixs->skb->sk->dport); ++#endif ++ ++ } ++ ++ if(t != NULL) { ++ if(ixs->sport == 0) { ++ ixs->sport = ntohs(t->source); ++ } ++ if(ixs->dport == 0) { ++ ixs->dport = ntohs(t->dest); ++ } ++ } ++ } ++ ++ /* ++ * practically identical to above, but let's be careful about ++ * tcp vs udp headers ++ */ ++ if(ixs->iph->protocol == IPPROTO_TCP) { ++ struct tcphdr *t = NULL; ++ ++ if((ntohs(ixs->iph->frag_off) & IP_OFFSET) == 0 && ++ ((ixs->skb->len - ixs->hard_header_len) >= ++ ((ixs->iph->ihl << 2) + sizeof(struct tcphdr)))) { ++ t =((struct tcphdr*)((caddr_t)ixs->iph+(ixs->iph->ihl<<2))); ++ } ++ ++ ixs->sport=0; ixs->dport=0; ++ ++ if(ixs->skb->sk) { ++#ifdef NET_26 ++#ifdef HAVE_INET_SK_SPORT ++ ixs->sport = ntohs(inet_sk(ixs->skb->sk)->sport); ++ ixs->dport = ntohs(inet_sk(ixs->skb->sk)->dport); ++#else ++ struct tcp_tw_bucket *tw; ++ ++ tw = (struct tcp_tw_bucket *)ixs->skb->sk; ++ ++ ixs->sport = ntohs(tw->tw_sport); ++ ixs->dport = ntohs(tw->tw_dport); ++#endif ++#else ++ ixs->sport = ntohs(ixs->skb->sk->sport); ++ ixs->dport = ntohs(ixs->skb->sk->dport); ++#endif ++ } ++ ++ if(t != NULL) { ++ if(ixs->sport == 0) { ++ ixs->sport = ntohs(t->source); ++ } ++ if(ixs->dport == 0) { ++ ixs->dport = ntohs(t->dest); ++ } ++ } ++ } ++ ++ /* default to a %drop eroute */ ++ ixs->outgoing_said.proto = IPPROTO_INT; ++ ixs->outgoing_said.spi = htonl(SPI_DROP); ++ ixs->outgoing_said.dst.u.v4.sin_addr.s_addr = INADDR_ANY; ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_SAlookup: " ++ "checking for local udp/500 IKE packet " ++ "saddr=%x, er=0p%p, daddr=%x, er_dst=%x, proto=%d sport=%d dport=%d\n", ++ ntohl((unsigned int)ixs->iph->saddr), ++ ixs->eroute, ++ ntohl((unsigned int)ixs->iph->daddr), ++ ixs->eroute ? ntohl((unsigned int)ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr) : 0, ++ ixs->iph->protocol, ++ ixs->sport, ++ ixs->dport); ++ ++ /* ++ * cheat for now...are we udp/500? If so, let it through ++ * without interference since it is most likely an IKE packet. ++ */ ++ ++ if (ip_chk_addr((unsigned long)ixs->iph->saddr) == IS_MYADDR ++ && (ixs->eroute==NULL ++ || ixs->iph->daddr == ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr ++ || INADDR_ANY == ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr) ++ && (ixs->iph->protocol == IPPROTO_UDP && ++ (ixs->sport == 500 || ixs->sport == 4500))) { ++ /* Whatever the eroute, this is an IKE message ++ * from us (i.e. not being forwarded). ++ * Furthermore, if there is a tunnel eroute, ++ * the destination is the peer for this eroute. ++ * So %pass the packet: modify the default %drop. ++ */ ++ ++ ixs->outgoing_said.spi = htonl(SPI_PASS); ++ if(!(ixs->skb->sk) && ((ntohs(ixs->iph->frag_off) & IP_MF) != 0)) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_SAlookup: " ++ "local UDP/500 (probably IKE) passthrough: base fragment, rest of fragments will probably get filtered.\n"); ++ } ++ bypass = TRUE; ++ } ++ ++#ifdef KLIPS_EXCEPT_DNS53 ++ /* ++ * ++ * if we are udp/53 or tcp/53, also let it through a %trap or %hold, ++ * since it is DNS, but *also* follow the %trap. ++ * ++ * we do not do this for tunnels, only %trap's and %hold's. ++ * ++ */ ++ ++ if (ip_chk_addr((unsigned long)ixs->iph->saddr) == IS_MYADDR ++ && (ixs->eroute==NULL ++ || ixs->iph->daddr == ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr ++ || INADDR_ANY == ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr) ++ && ((ixs->iph->protocol == IPPROTO_UDP ++ || ixs->iph->protocol == IPPROTO_TCP) ++ && ixs->dport == 53)) { ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_SAlookup: " ++ "possible DNS packet\n"); ++ ++ if(ixs->eroute) ++ { ++ if(ixs->eroute->er_said.spi == htonl(SPI_TRAP) ++ || ixs->eroute->er_said.spi == htonl(SPI_HOLD)) ++ { ++ ixs->outgoing_said.spi = htonl(SPI_PASSTRAP); ++ bypass = TRUE; ++ } ++ } ++ else ++ { ++ ixs->outgoing_said.spi = htonl(SPI_PASSTRAP); ++ bypass = TRUE; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_SAlookup: " ++ "bypass = %d\n", bypass); ++ ++ if(bypass ++ && !(ixs->skb->sk) ++ && ((ntohs(ixs->iph->frag_off) & IP_MF) != 0)) ++ { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_SAlookup: " ++ "local port 53 (probably DNS) passthrough:" ++ "base fragment, rest of fragments will " ++ "probably get filtered.\n"); ++ } ++ } ++#endif ++ ++ if (bypass==FALSE && ixs->eroute) { ++ ixs->eroute->er_count++; ++ ixs->eroute->er_lasttime = jiffies/HZ; ++ if(ixs->eroute->er_said.proto==IPPROTO_INT ++ && ixs->eroute->er_said.spi==htonl(SPI_HOLD)) ++ { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_SAlookup: " ++ "shunt SA of HOLD: skb stored in HOLD.\n"); ++ if(ixs->eroute->er_last != NULL) { ++ kfree_skb(ixs->eroute->er_last); ++ } ++ ixs->eroute->er_last = ixs->skb; ++ ixs->skb = NULL; ++ ixs->stats->tx_dropped++; ++ spin_unlock(&eroute_lock); ++ return IPSEC_XMIT_STOLEN; ++ } ++ ixs->outgoing_said = ixs->eroute->er_said; ++ ixs->eroute_pid = ixs->eroute->er_pid; ++ ++ /* Copy of the ident for the TRAP/TRAPSUBNET eroutes */ ++ if(ixs->outgoing_said.proto==IPPROTO_INT ++ && (ixs->outgoing_said.spi==htonl(SPI_TRAP) ++ || (ixs->outgoing_said.spi==htonl(SPI_TRAPSUBNET)))) { ++ int len; ++ ++ ixs->ips.ips_ident_s.type = ixs->eroute->er_ident_s.type; ++ ixs->ips.ips_ident_s.id = ixs->eroute->er_ident_s.id; ++ ixs->ips.ips_ident_s.len = ixs->eroute->er_ident_s.len; ++ if (ixs->ips.ips_ident_s.len) ++ { ++ len = ixs->ips.ips_ident_s.len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident); ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_SAlookup: " ++ "allocating %d bytes for ident_s shunt SA of HOLD: skb stored in HOLD.\n", ++ len); ++ if ((ixs->ips.ips_ident_s.data = kmalloc(len, GFP_ATOMIC)) == NULL) { ++ printk(KERN_WARNING "klips_debug:ipsec_xmit_SAlookup: " ++ "Failed, tried to allocate %d bytes for source ident.\n", ++ len); ++ ixs->stats->tx_dropped++; ++ spin_unlock(&eroute_lock); ++ return IPSEC_XMIT_ERRMEMALLOC; ++ } ++ memcpy(ixs->ips.ips_ident_s.data, ixs->eroute->er_ident_s.data, len); ++ } ++ ixs->ips.ips_ident_d.type = ixs->eroute->er_ident_d.type; ++ ixs->ips.ips_ident_d.id = ixs->eroute->er_ident_d.id; ++ ixs->ips.ips_ident_d.len = ixs->eroute->er_ident_d.len; ++ if (ixs->ips.ips_ident_d.len) ++ { ++ len = ixs->ips.ips_ident_d.len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident); ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_SAlookup: " ++ "allocating %d bytes for ident_d shunt SA of HOLD: skb stored in HOLD.\n", ++ len); ++ if ((ixs->ips.ips_ident_d.data = kmalloc(len, GFP_ATOMIC)) == NULL) { ++ printk(KERN_WARNING "klips_debug:ipsec_xmit_SAlookup: " ++ "Failed, tried to allocate %d bytes for dest ident.\n", ++ len); ++ ixs->stats->tx_dropped++; ++ spin_unlock(&eroute_lock); ++ return IPSEC_XMIT_ERRMEMALLOC; ++ } ++ memcpy(ixs->ips.ips_ident_d.data, ixs->eroute->er_ident_d.data, len); ++ } ++ } ++ } ++ ++ spin_unlock(&eroute_lock); ++ return IPSEC_XMIT_OK; ++} ++ ++ ++enum ipsec_xmit_value ++ipsec_tunnel_restore_hard_header(struct ipsec_xmit_state*ixs) ++{ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_restore_hard_header: " ++ "After recursive xforms -- head,tailroom: %d,%d\n", ++ skb_headroom(ixs->skb), ++ skb_tailroom(ixs->skb)); ++ ++ if(ixs->saved_header) { ++ if(skb_headroom(ixs->skb) < ixs->hard_header_len) { ++ printk(KERN_WARNING ++ "klips_error:ipsec_xmit_restore_hard_header: " ++ "tried to skb_push hhlen=%d, %d available. This should never happen, please report.\n", ++ ixs->hard_header_len, ++ skb_headroom(ixs->skb)); ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_PUSHPULLERR; ++ ++ } ++ skb_push(ixs->skb, ixs->hard_header_len); ++ { ++ int i; ++ for (i = 0; i < ixs->hard_header_len; i++) { ++ ixs->skb->data[i] = ixs->saved_header[i]; ++ } ++ } ++ } ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ if (ixs->natt_type && ixs->natt_head) { ++ struct iphdr *ipp = ip_hdr(ixs->skb); ++ struct udphdr *udp; ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "encapsuling packet into UDP (NAT-Traversal) (%d %d)\n", ++ ixs->natt_type, ixs->natt_head); ++ ++ ixs->iphlen = ipp->ihl << 2; ++ ipp->tot_len = ++ htons(ntohs(ipp->tot_len) + ixs->natt_head); ++ if(skb_tailroom(ixs->skb) < ixs->natt_head) { ++ printk(KERN_WARNING "klips_error:ipsec_tunnel_start_xmit: " ++ "tried to skb_put %d, %d available. " ++ "This should never happen, please report.\n", ++ ixs->natt_head, ++ skb_tailroom(ixs->skb)); ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESPUDP; ++ } ++ skb_put(ixs->skb, ixs->natt_head); ++ ++ udp = (struct udphdr *)((char *)ipp + ixs->iphlen); ++ ++ /* move ESP hdr after UDP hdr */ ++ memmove((void *)((char *)udp + ixs->natt_head), ++ (void *)(udp), ++ ntohs(ipp->tot_len) - ixs->iphlen - ixs->natt_head); ++ ++ /* clear UDP & Non-IKE Markers (if any) */ ++ memset(udp, 0, ixs->natt_head); ++ ++ /* fill UDP with usefull informations ;-) */ ++ udp->source = htons(ixs->natt_sport); ++ udp->dest = htons(ixs->natt_dport); ++ udp->len = htons(ntohs(ipp->tot_len) - ixs->iphlen); ++ ++ /* set protocol */ ++ ipp->protocol = IPPROTO_UDP; ++ ++ /* fix IP checksum */ ++ ipp->check = 0; ++ ipp->check = ip_fast_csum((unsigned char *)ipp, ipp->ihl); ++ } ++#endif ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_restore_hard_header: " ++ "With hard_header, final head,tailroom: %d,%d\n", ++ skb_headroom(ixs->skb), ++ skb_tailroom(ixs->skb)); ++ ++ return IPSEC_XMIT_OK; ++} ++ ++enum ipsec_xmit_value ++ipsec_tunnel_send(struct ipsec_xmit_state*ixs) ++{ ++#ifdef NETDEV_25 ++ struct flowi fl; ++#endif ++ ++#ifdef NET_21 /* 2.2 and 2.4 kernels */ ++ /* new route/dst cache code from James Morris */ ++ ixs->skb->dev = ixs->physdev; ++#ifdef NETDEV_25 ++ memset (&fl, 0x0, sizeof (struct flowi)); ++ fl.oif = ixs->physdev->iflink; ++ fl.nl_u.ip4_u.daddr = ip_hdr(ixs->skb)->daddr; ++ fl.nl_u.ip4_u.saddr = ixs->pass ? 0 : ip_hdr(ixs->skb)->saddr; ++ fl.nl_u.ip4_u.tos = RT_TOS(ip_hdr(ixs->skb)->tos); ++ fl.proto = ip_hdr(ixs->skb)->protocol; ++ if ((ixs->error = ip_route_output_key(&ixs->route, &fl))) { ++#else ++ /*skb_orphan(ixs->skb);*/ ++ if((ixs->error = ip_route_output(&ixs->route, ++ ixs->skb->nh.iph->daddr, ++ ixs->pass ? 0 : ip_hdr(ixs->skb)->saddr, ++ RT_TOS(ip_hdr(ixs->skb)->tos), ++ /* mcr->rgb: should this be 0 instead? */ ++ ixs->physdev->iflink))) { ++#endif ++ ixs->stats->tx_errors++; ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_send: " ++ "ip_route_output failed with error code %d, rt->u.dst.dev=%s, dropped\n", ++ ixs->error, ++ ixs->route->u.dst.dev->name); ++ return IPSEC_XMIT_ROUTEERR; ++ } ++ if(ixs->dev == ixs->route->u.dst.dev) { ++ ip_rt_put(ixs->route); ++ /* This is recursion, drop it. */ ++ ixs->stats->tx_errors++; ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_send: " ++ "suspect recursion, dev=rt->u.dst.dev=%s, dropped\n", ++ ixs->dev->name); ++ return IPSEC_XMIT_RECURSDETECT; ++ } ++ dst_release(ixs->skb->dst); ++ ixs->skb->dst = &ixs->route->u.dst; ++ ixs->stats->tx_bytes += ixs->skb->len; ++ if(ixs->skb->len < skb_network_header(ixs->skb) - ixs->skb->data) { ++ ixs->stats->tx_errors++; ++ printk(KERN_WARNING ++ "klips_error:ipsec_xmit_send: " ++ "tried to __skb_pull nh-data=%ld, %d available. This should never happen, please report.\n", ++ (unsigned long)(skb_network_header(ixs->skb) - ixs->skb->data), ++ ixs->skb->len); ++ return IPSEC_XMIT_PUSHPULLERR; ++ } ++ __skb_pull(ixs->skb, skb_network_header(ixs->skb) - ixs->skb->data); ++#ifdef SKB_RESET_NFCT ++ if(!ixs->pass) { ++ nf_conntrack_put(ixs->skb->nfct); ++ ixs->skb->nfct = NULL; ++ } ++#if defined(CONFIG_NETFILTER_DEBUG) && defined(HAVE_SKB_NF_DEBUG) ++ ixs->skb->nf_debug = 0; ++#endif /* CONFIG_NETFILTER_DEBUG */ ++#endif /* SKB_RESET_NFCT */ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_send: " ++ "...done, calling ip_send() on device:%s\n", ++ ixs->skb->dev ? ixs->skb->dev->name : "NULL"); ++ KLIPS_IP_PRINT(debug_tunnel & DB_TN_XMIT, ip_hdr(ixs->skb)); ++#ifdef NETDEV_23 /* 2.4 kernels */ ++ { ++ int err; ++ ++ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, ixs->skb, NULL, ixs->route->u.dst.dev, ++ ipsec_tunnel_xmit2); ++ if(err != NET_XMIT_SUCCESS && err != NET_XMIT_CN) { ++ if(net_ratelimit()) ++ printk(KERN_ERR ++ "klips_error:ipsec_xmit_send: " ++ "ip_send() failed, err=%d\n", ++ -err); ++ ixs->stats->tx_errors++; ++ ixs->stats->tx_aborted_errors++; ++ ixs->skb = NULL; ++ return IPSEC_XMIT_IPSENDFAILURE; ++ } ++ } ++#else /* NETDEV_23 */ /* 2.2 kernels */ ++ ip_send(ixs->skb); ++#endif /* NETDEV_23 */ ++#else /* NET_21 */ /* 2.0 kernels */ ++ ixs->skb->arp = 1; ++ /* ISDN/ASYNC PPP from Matjaz Godec. */ ++ /* skb->protocol = htons(ETH_P_IP); */ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_send: " ++ "...done, calling dev_queue_xmit() or ip_fragment().\n"); ++ IP_SEND(ixs->skb, ixs->physdev); ++#endif /* NET_21 */ ++ ixs->stats->tx_packets++; ++ ++ ixs->skb = NULL; ++ ++ return IPSEC_XMIT_OK; ++} ++ ++void ++ipsec_tunnel_cleanup(struct ipsec_xmit_state*ixs) ++{ ++#if defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE) ++ netif_wake_queue(ixs->dev); ++#else /* defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE) */ ++ ixs->dev->tbusy = 0; ++#endif /* defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE) */ ++ if(ixs->saved_header) { ++ kfree(ixs->saved_header); ++ } ++ if(ixs->skb) { ++ dev_kfree_skb(ixs->skb, FREE_WRITE); ++ } ++ if(ixs->oskb) { ++ dev_kfree_skb(ixs->oskb, FREE_WRITE); ++ } ++ if (ixs->ips.ips_ident_s.data) { ++ kfree(ixs->ips.ips_ident_s.data); ++ } ++ if (ixs->ips.ips_ident_d.data) { ++ kfree(ixs->ips.ips_ident_d.data); ++ } ++} ++ ++/* ++ * This function assumes it is being called from dev_queue_xmit() ++ * and that skb is filled properly by that function. ++ */ ++int ++ipsec_tunnel_start_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ struct ipsec_xmit_state ixs_mem; ++ struct ipsec_xmit_state *ixs = &ixs_mem; ++ enum ipsec_xmit_value stat; ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ ixs->natt_type = 0, ixs->natt_head = 0; ++ ixs->natt_sport = 0, ixs->natt_dport = 0; ++#endif ++ ++ memset((caddr_t)ixs, 0, sizeof(*ixs)); ++ ixs->oskb = NULL; ++ ixs->saved_header = NULL; /* saved copy of the hard header */ ++ ixs->route = NULL; ++ memset((caddr_t)&(ixs->ips), 0, sizeof(ixs->ips)); ++ ixs->dev = dev; ++ ixs->skb = skb; ++ ++ stat = ipsec_xmit_sanity_check_dev(ixs); ++ if(stat != IPSEC_XMIT_OK) { ++ goto cleanup; ++ } ++ ++ stat = ipsec_xmit_sanity_check_skb(ixs); ++ if(stat != IPSEC_XMIT_OK) { ++ goto cleanup; ++ } ++ ++ stat = ipsec_tunnel_strip_hard_header(ixs); ++ if(stat != IPSEC_XMIT_OK) { ++ goto cleanup; ++ } ++ ++ stat = ipsec_tunnel_SAlookup(ixs); ++ if(stat != IPSEC_XMIT_OK) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_tunnel_start_xmit: SAlookup failed: %d\n", ++ stat); ++ goto cleanup; ++ } ++ ++ ixs->innersrc = ixs->iph->saddr; ++ /* start encapsulation loop here XXX */ ++ do { ++ stat = ipsec_xmit_encap_bundle(ixs); ++ if(stat != IPSEC_XMIT_OK) { ++ if(stat == IPSEC_XMIT_PASS) { ++ goto bypass; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_tunnel_start_xmit: encap_bundle failed: %d\n", ++ stat); ++ goto cleanup; ++ } ++ ++ ixs->matcher.sen_ip_src.s_addr = ixs->iph->saddr; ++ ixs->matcher.sen_ip_dst.s_addr = ixs->iph->daddr; ++ ixs->matcher.sen_proto = ixs->iph->protocol; ++ ipsec_extract_ports(ixs->iph, &ixs->matcher); ++ ++ spin_lock(&eroute_lock); ++ ixs->eroute = ipsec_findroute(&ixs->matcher); ++ if(ixs->eroute) { ++ ixs->outgoing_said = ixs->eroute->er_said; ++ ixs->eroute_pid = ixs->eroute->er_pid; ++ ixs->eroute->er_count++; ++ ixs->eroute->er_lasttime = jiffies/HZ; ++ } ++ spin_unlock(&eroute_lock); ++ ++ KLIPS_PRINT((debug_tunnel & DB_TN_XMIT) && ++ /* ((ixs->orgdst != ixs->newdst) || (ixs->orgsrc != ixs->newsrc)) */ ++ (ixs->orgedst != ixs->outgoing_said.dst.u.v4.sin_addr.s_addr) && ++ ixs->outgoing_said.dst.u.v4.sin_addr.s_addr && ++ ixs->eroute, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "We are recursing here.\n"); ++ ++ } while(/*((ixs->orgdst != ixs->newdst) || (ixs->orgsrc != ixs->newsrc))*/ ++ (ixs->orgedst != ixs->outgoing_said.dst.u.v4.sin_addr.s_addr) && ++ ixs->outgoing_said.dst.u.v4.sin_addr.s_addr && ++ ixs->eroute); ++ ++ stat = ipsec_tunnel_restore_hard_header(ixs); ++ if(stat != IPSEC_XMIT_OK) { ++ goto cleanup; ++ } ++ ++ bypass: ++ stat = ipsec_tunnel_send(ixs); ++ ++ cleanup: ++ ipsec_tunnel_cleanup(ixs); ++ ++ return 0; ++} ++ ++DEBUG_NO_STATIC struct net_device_stats * ++ipsec_tunnel_get_stats(struct net_device *dev) ++{ ++ return &(((struct ipsecpriv *)(dev->priv))->mystats); ++} ++ ++/* ++ * Revectored calls. ++ * For each of these calls, a field exists in our private structure. ++ */ ++ ++DEBUG_NO_STATIC int ++ipsec_tunnel_hard_header(struct sk_buff *skb, struct net_device *dev, ++ unsigned short type, void *daddr, void *saddr, unsigned len) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ struct net_device *tmp; ++ int ret; ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(skb == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_hard_header: " ++ "no skb...\n"); ++ return -ENODATA; ++ } ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_hard_header: " ++ "no device...\n"); ++ return -ENODEV; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_hard_header: " ++ "skb->dev=%s dev=%s.\n", ++ skb->dev ? skb->dev->name : "NULL", ++ dev->name); ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_hard_header: " ++ "no private space associated with dev=%s\n", ++ dev->name ? dev->name : "NULL"); ++ return -ENODEV; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_hard_header: " ++ "no physical device associated with dev=%s\n", ++ dev->name ? dev->name : "NULL"); ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++ /* check if we have to send a IPv6 packet. It might be a Router ++ Solicitation, where the building of the packet happens in ++ reverse order: ++ 1. ll hdr, ++ 2. IPv6 hdr, ++ 3. ICMPv6 hdr ++ -> skb->nh.raw is still uninitialized when this function is ++ called!! If this is no IPv6 packet, we can print debugging ++ messages, otherwise we skip all debugging messages and just ++ build the ll header */ ++ if(type != ETH_P_IPV6) { ++ /* execute this only, if we don't have to build the ++ header for a IPv6 packet */ ++ if(!prv->hard_header) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_hard_header: " ++ "physical device has been detached, packet dropped 0p%p->0p%p len=%d type=%d dev=%s->NULL ", ++ saddr, ++ daddr, ++ len, ++ type, ++ dev->name); ++#ifdef NET_21 ++ KLIPS_PRINTMORE(debug_tunnel & DB_TN_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(ip_hdr(skb)->saddr), ++ (__u32)ntohl(ip_hdr(skb)->daddr) ); ++#else /* NET_21 */ ++ KLIPS_PRINTMORE(debug_tunnel & DB_TN_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(skb->ip_hdr->saddr), ++ (__u32)ntohl(skb->ip_hdr->daddr) ); ++#endif /* NET_21 */ ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++#define da ((struct net_device *)(prv->dev))->dev_addr ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_hard_header: " ++ "Revectored 0p%p->0p%p len=%d type=%d dev=%s->%s dev_addr=%02x:%02x:%02x:%02x:%02x:%02x ", ++ saddr, ++ daddr, ++ len, ++ type, ++ dev->name, ++ prv->dev->name, ++ da[0], da[1], da[2], da[3], da[4], da[5]); ++#ifdef NET_21 ++ KLIPS_PRINTMORE(debug_tunnel & DB_TN_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(ip_hdr(skb)->saddr), ++ (__u32)ntohl(ip_hdr(skb)->daddr) ); ++#else /* NET_21 */ ++ KLIPS_PRINTMORE(debug_tunnel & DB_TN_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(skb->ip_hdr->saddr), ++ (__u32)ntohl(skb->ip_hdr->daddr) ); ++#endif /* NET_21 */ ++ } else { ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_hard_header: " ++ "is IPv6 packet, skip debugging messages, only revector and build linklocal header.\n"); ++ } ++ tmp = skb->dev; ++ skb->dev = prv->dev; ++ ret = prv->hard_header(skb, prv->dev, type, (void *)daddr, (void *)saddr, len); ++ skb->dev = tmp; ++ return ret; ++} ++ ++DEBUG_NO_STATIC int ++#ifdef NET_21 ++ipsec_tunnel_rebuild_header(struct sk_buff *skb) ++#else /* NET_21 */ ++ipsec_tunnel_rebuild_header(void *buff, struct net_device *dev, ++ unsigned long raddr, struct sk_buff *skb) ++#endif /* NET_21 */ ++{ ++ struct ipsecpriv *prv = skb->dev->priv; ++ struct net_device *tmp; ++ int ret; ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(skb->dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_rebuild_header: " ++ "no device..."); ++ return -ENODEV; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_rebuild_header: " ++ "no private space associated with dev=%s", ++ skb->dev->name ? skb->dev->name : "NULL"); ++ return -ENODEV; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_rebuild_header: " ++ "no physical device associated with dev=%s", ++ skb->dev->name ? skb->dev->name : "NULL"); ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++ if(!prv->rebuild_header) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_rebuild_header: " ++ "physical device has been detached, packet dropped skb->dev=%s->NULL ", ++ skb->dev->name); ++#ifdef NET_21 ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(ip_hdr(skb)->saddr), ++ (__u32)ntohl(ip_hdr(skb)->daddr) ); ++#else /* NET_21 */ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(skb->ip_hdr->saddr), ++ (__u32)ntohl(skb->ip_hdr->daddr) ); ++#endif /* NET_21 */ ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel: " ++ "Revectored rebuild_header dev=%s->%s ", ++ skb->dev->name, prv->dev->name); ++#ifdef NET_21 ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(ip_hdr(skb)->saddr), ++ (__u32)ntohl(ip_hdr(skb)->daddr) ); ++#else /* NET_21 */ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "ip=%08x->%08x\n", ++ (__u32)ntohl(skb->ip_hdr->saddr), ++ (__u32)ntohl(skb->ip_hdr->daddr) ); ++#endif /* NET_21 */ ++ tmp = skb->dev; ++ skb->dev = prv->dev; ++ ++#ifdef NET_21 ++ ret = prv->rebuild_header(skb); ++#else /* NET_21 */ ++ ret = prv->rebuild_header(buff, prv->dev, raddr, skb); ++#endif /* NET_21 */ ++ skb->dev = tmp; ++ return ret; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_tunnel_set_mac_address(struct net_device *dev, void *addr) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_set_mac_address: " ++ "no device..."); ++ return -ENODEV; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_set_mac_address: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return -ENODEV; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_set_mac_address: " ++ "no physical device associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ stats->tx_dropped++; ++ return -ENODEV; ++ } ++ ++ if(!prv->set_mac_address) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_set_mac_address: " ++ "physical device has been detached, cannot set - skb->dev=%s->NULL\n", ++ dev->name); ++ return -ENODEV; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_set_mac_address: " ++ "Revectored dev=%s->%s addr=0p%p\n", ++ dev->name, prv->dev->name, addr); ++ return prv->set_mac_address(prv->dev, addr); ++ ++} ++ ++#ifndef NET_21 ++DEBUG_NO_STATIC void ++ipsec_tunnel_cache_bind(struct hh_cache **hhp, struct net_device *dev, ++ unsigned short htype, __u32 daddr) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_bind: " ++ "no device..."); ++ return; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_bind: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_bind: " ++ "no physical device associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ stats->tx_dropped++; ++ return; ++ } ++ ++ if(!prv->header_cache_bind) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_bind: " ++ "physical device has been detached, cannot set - skb->dev=%s->NULL\n", ++ dev->name); ++ stats->tx_dropped++; ++ return; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_bind: " ++ "Revectored \n"); ++ prv->header_cache_bind(hhp, prv->dev, htype, daddr); ++ return; ++} ++#endif /* !NET_21 */ ++ ++ ++DEBUG_NO_STATIC void ++ipsec_tunnel_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) ++{ ++ struct ipsecpriv *prv = dev->priv; ++ ++ struct net_device_stats *stats; /* This device's statistics */ ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_update: " ++ "no device..."); ++ return; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_update: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return; ++ } ++ ++ stats = (struct net_device_stats *) &(prv->mystats); ++ ++ if(prv->dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_update: " ++ "no physical device associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ stats->tx_dropped++; ++ return; ++ } ++ ++ if(!prv->header_cache_update) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_cache_update: " ++ "physical device has been detached, cannot set - skb->dev=%s->NULL\n", ++ dev->name); ++ return; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel: " ++ "Revectored cache_update\n"); ++ prv->header_cache_update(hh, prv->dev, haddr); ++ return; ++} ++ ++#ifdef NET_21 ++DEBUG_NO_STATIC int ++ipsec_tunnel_neigh_setup(struct neighbour *n) ++{ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_neigh_setup:\n"); ++ ++ if (n->nud_state == NUD_NONE) { ++ n->ops = &arp_broken_ops; ++ n->output = n->ops->output; ++ } ++ return 0; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_tunnel_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) ++{ ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_neigh_setup_dev: " ++ "setting up %s\n", ++ dev ? dev->name : "NULL"); ++ ++ if (p->tbl->family == AF_INET) { ++ p->neigh_setup = ipsec_tunnel_neigh_setup; ++ p->ucast_probes = 0; ++ p->mcast_probes = 0; ++ } ++ return 0; ++} ++#endif /* NET_21 */ ++ ++/* ++ * We call the attach routine to attach another device. ++ */ ++ ++DEBUG_NO_STATIC int ++ipsec_tunnel_attach(struct net_device *dev, struct net_device *physdev) ++{ ++ int i; ++ struct ipsecpriv *prv = dev->priv; ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_attach: " ++ "no device..."); ++ return -ENODEV; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_attach: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return -ENODATA; ++ } ++ ++ prv->dev = physdev; ++ prv->hard_start_xmit = physdev->hard_start_xmit; ++ prv->get_stats = physdev->get_stats; ++ ++ if (physdev->hard_header) { ++ prv->hard_header = physdev->hard_header; ++ dev->hard_header = ipsec_tunnel_hard_header; ++ } else ++ dev->hard_header = NULL; ++ ++ if (physdev->rebuild_header) { ++ prv->rebuild_header = physdev->rebuild_header; ++ dev->rebuild_header = ipsec_tunnel_rebuild_header; ++ } else ++ dev->rebuild_header = NULL; ++ ++ if (physdev->set_mac_address) { ++ prv->set_mac_address = physdev->set_mac_address; ++ dev->set_mac_address = ipsec_tunnel_set_mac_address; ++ } else ++ dev->set_mac_address = NULL; ++ ++#ifndef NET_21 ++ if (physdev->header_cache_bind) { ++ prv->header_cache_bind = physdev->header_cache_bind; ++ dev->header_cache_bind = ipsec_tunnel_cache_bind; ++ } else ++ dev->header_cache_bind = NULL; ++#endif /* !NET_21 */ ++ ++ if (physdev->header_cache_update) { ++ prv->header_cache_update = physdev->header_cache_update; ++ dev->header_cache_update = ipsec_tunnel_cache_update; ++ } else ++ dev->header_cache_update = NULL; ++ ++ dev->hard_header_len = physdev->hard_header_len; ++ ++#ifdef NET_21 ++/* prv->neigh_setup = physdev->neigh_setup; */ ++ dev->neigh_setup = ipsec_tunnel_neigh_setup_dev; ++#endif /* NET_21 */ ++ dev->mtu = 16260; /* 0xfff0; */ /* dev->mtu; */ ++ prv->mtu = physdev->mtu; ++ ++#ifdef PHYSDEV_TYPE ++ dev->type = physdev->type; /* ARPHRD_TUNNEL; */ ++#endif /* PHYSDEV_TYPE */ ++ ++ dev->addr_len = physdev->addr_len; ++ for (i=0; iaddr_len; i++) { ++ dev->dev_addr[i] = physdev->dev_addr[i]; ++ } ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_tunnel & DB_TN_INIT) { ++ printk(KERN_INFO "klips_debug:ipsec_tunnel_attach: " ++ "physical device %s being attached has HW address: %2x", ++ physdev->name, physdev->dev_addr[0]); ++ for (i=1; i < physdev->addr_len; i++) { ++ printk(":%02x", physdev->dev_addr[i]); ++ } ++ printk("\n"); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ return 0; ++} ++ ++/* ++ * We call the detach routine to detach the ipsec tunnel from another device. ++ */ ++ ++DEBUG_NO_STATIC int ++ipsec_tunnel_detach(struct net_device *dev) ++{ ++ int i; ++ struct ipsecpriv *prv = dev->priv; ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_detach: " ++ "no device..."); ++ return -ENODEV; ++ } ++ ++ if(prv == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC, ++ "klips_debug:ipsec_tunnel_detach: " ++ "no private space associated with dev=%s", ++ dev->name ? dev->name : "NULL"); ++ return -ENODATA; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_detach: " ++ "physical device %s being detached from virtual device %s\n", ++ prv->dev ? prv->dev->name : "NULL", ++ dev->name); ++ ++ ipsec_dev_put(prv->dev); ++ prv->dev = NULL; ++ prv->hard_start_xmit = NULL; ++ prv->get_stats = NULL; ++ ++ prv->hard_header = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->hard_header = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++ prv->rebuild_header = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->rebuild_header = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++ prv->set_mac_address = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->set_mac_address = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++#ifndef NET_21 ++ prv->header_cache_bind = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->header_cache_bind = NULL; ++#endif /* DETACH_AND_DOWN */ ++#endif /* !NET_21 */ ++ ++ prv->header_cache_update = NULL; ++#ifdef DETACH_AND_DOWN ++ dev->header_cache_update = NULL; ++#endif /* DETACH_AND_DOWN */ ++ ++#ifdef NET_21 ++/* prv->neigh_setup = NULL; */ ++#ifdef DETACH_AND_DOWN ++ dev->neigh_setup = NULL; ++#endif /* DETACH_AND_DOWN */ ++#endif /* NET_21 */ ++ dev->hard_header_len = 0; ++#ifdef DETACH_AND_DOWN ++ dev->mtu = 0; ++#endif /* DETACH_AND_DOWN */ ++ prv->mtu = 0; ++ for (i=0; idev_addr[i] = 0; ++ } ++ dev->addr_len = 0; ++#ifdef PHYSDEV_TYPE ++ dev->type = ARPHRD_VOID; /* ARPHRD_TUNNEL; */ ++#endif /* PHYSDEV_TYPE */ ++ ++ return 0; ++} ++ ++/* ++ * We call the clear routine to detach all ipsec tunnels from other devices. ++ */ ++DEBUG_NO_STATIC int ++ipsec_tunnel_clear(void) ++{ ++ int i; ++ struct net_device *ipsecdev = NULL, *prvdev; ++ struct ipsecpriv *prv; ++ int ret; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_clear: .\n"); ++ ++ for(i = 0; i < IPSEC_NUM_IF; i++) { ++ ipsecdev = ipsecdevices[i]; ++ if(ipsecdev != NULL) { ++ if((prv = (struct ipsecpriv *)(ipsecdev->priv))) { ++ prvdev = (struct net_device *)(prv->dev); ++ if(prvdev) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_clear: " ++ "physical device for device %s is %s\n", ++ ipsecdev->name, prvdev->name); ++ if((ret = ipsec_tunnel_detach(ipsecdev))) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_clear: " ++ "error %d detatching device %s from device %s.\n", ++ ret, ipsecdev->name, prvdev->name); ++ return ret; ++ } ++ } ++ } ++ } ++ } ++ return 0; ++} ++ ++DEBUG_NO_STATIC int ++ipsec_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ++{ ++ struct ipsectunnelconf *cf = (struct ipsectunnelconf *)&ifr->ifr_data; ++ struct ipsecpriv *prv = dev->priv; ++ struct net_device *them; /* physical device */ ++#ifdef CONFIG_IP_ALIAS ++ char *colon; ++ char realphysname[IFNAMSIZ]; ++#endif /* CONFIG_IP_ALIAS */ ++ ++ if(dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "device not supplied.\n"); ++ return -ENODEV; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "tncfg service call #%d for dev=%s\n", ++ cmd, ++ dev->name ? dev->name : "NULL"); ++ switch (cmd) { ++ /* attach a virtual ipsec? device to a physical device */ ++ case IPSEC_SET_DEV: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "calling ipsec_tunnel_attatch...\n"); ++#ifdef CONFIG_IP_ALIAS ++ /* If this is an IP alias interface, get its real physical name */ ++ strncpy(realphysname, cf->cf_name, IFNAMSIZ); ++ realphysname[IFNAMSIZ-1] = 0; ++ colon = strchr(realphysname, ':'); ++ if (colon) *colon = 0; ++ them = ipsec_dev_get(realphysname); ++#else /* CONFIG_IP_ALIAS */ ++ them = ipsec_dev_get(cf->cf_name); ++#endif /* CONFIG_IP_ALIAS */ ++ ++ if (them == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "physical device %s requested is null\n", ++ cf->cf_name); ++ return -ENXIO; ++ } ++ ++#if 0 ++ if (them->flags & IFF_UP) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "physical device %s requested is not up.\n", ++ cf->cf_name); ++ ipsec_dev_put(them); ++ return -ENXIO; ++ } ++#endif ++ ++ if (prv && prv->dev) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "virtual device is already connected to %s.\n", ++ prv->dev->name ? prv->dev->name : "NULL"); ++ ipsec_dev_put(them); ++ return -EBUSY; ++ } ++ return ipsec_tunnel_attach(dev, them); ++ ++ case IPSEC_DEL_DEV: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "calling ipsec_tunnel_detatch.\n"); ++ if (! prv->dev) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "physical device not connected.\n"); ++ return -ENODEV; ++ } ++ return ipsec_tunnel_detach(dev); ++ ++ case IPSEC_CLR_DEV: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "calling ipsec_tunnel_clear.\n"); ++ return ipsec_tunnel_clear(); ++ ++ default: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_ioctl: " ++ "unknown command %d.\n", ++ cmd); ++ return -EOPNOTSUPP; ++ } ++} ++ ++struct net_device *ipsec_get_device(int inst) ++{ ++ struct net_device *ipsec_dev; ++ ++ ipsec_dev = NULL; ++ ++ if(inst < IPSEC_NUM_IF) { ++ ipsec_dev = ipsecdevices[inst]; ++ } ++ ++ return ipsec_dev; ++} ++ ++int ++ipsec_device_event(struct notifier_block *unused, unsigned long event, void *ptr) ++{ ++ struct net_device *dev = ptr; ++ struct net_device *ipsec_dev; ++ struct ipsecpriv *priv; ++ int i; ++ ++ if (dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "dev=NULL for event type %ld.\n", ++ event); ++ return(NOTIFY_DONE); ++ } ++ ++ /* check for loopback devices */ ++ if (dev && (dev->flags & IFF_LOOPBACK)) { ++ return(NOTIFY_DONE); ++ } ++ ++ switch (event) { ++ case NETDEV_DOWN: ++ /* look very carefully at the scope of these compiler ++ directives before changing anything... -- RGB */ ++#ifdef NET_21 ++ case NETDEV_UNREGISTER: ++ switch (event) { ++ case NETDEV_DOWN: ++#endif /* NET_21 */ ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_DOWN dev=%s flags=%x\n", ++ dev->name, ++ dev->flags); ++ if(strncmp(dev->name, "ipsec", strlen("ipsec")) == 0) { ++ printk(KERN_CRIT "IPSEC EVENT: KLIPS device %s shut down.\n", ++ dev->name); ++ } ++#ifdef NET_21 ++ break; ++ case NETDEV_UNREGISTER: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_UNREGISTER dev=%s flags=%x\n", ++ dev->name, ++ dev->flags); ++ break; ++ } ++#endif /* NET_21 */ ++ ++ /* find the attached physical device and detach it. */ ++ for(i = 0; i < IPSEC_NUM_IF; i++) { ++ ipsec_dev = ipsecdevices[i]; ++ ++ if(ipsec_dev) { ++ priv = (struct ipsecpriv *)(ipsec_dev->priv); ++ if(priv) { ++ ; ++ if(((struct net_device *)(priv->dev)) == dev) { ++ /* dev_close(ipsec_dev); */ ++ /* return */ ipsec_tunnel_detach(ipsec_dev); ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "device '%s' has been detached.\n", ++ ipsec_dev->name); ++ break; ++ } ++ } else { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "device '%s' has no private data space!\n", ++ ipsec_dev->name); ++ } ++ } ++ } ++ break; ++ case NETDEV_UP: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_UP dev=%s\n", ++ dev->name); ++ break; ++#ifdef NET_21 ++ case NETDEV_REBOOT: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_REBOOT dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_CHANGE: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_CHANGE dev=%s flags=%x\n", ++ dev->name, ++ dev->flags); ++ break; ++ case NETDEV_REGISTER: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_REGISTER dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_CHANGEMTU: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_CHANGEMTU dev=%s to mtu=%d\n", ++ dev->name, ++ dev->mtu); ++ break; ++ case NETDEV_CHANGEADDR: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_CHANGEADDR dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_GOING_DOWN: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_GOING_DOWN dev=%s\n", ++ dev->name); ++ break; ++ case NETDEV_CHANGENAME: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "NETDEV_CHANGENAME dev=%s\n", ++ dev->name); ++ break; ++#endif /* NET_21 */ ++ default: ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_device_event: " ++ "event type %ld unrecognised for dev=%s\n", ++ event, ++ dev->name); ++ break; ++ } ++ return NOTIFY_DONE; ++} ++ ++/* ++ * Called when an ipsec tunnel device is initialized. ++ * The ipsec tunnel device structure is passed to us. ++ */ ++ ++int ++ipsec_tunnel_init(struct net_device *dev) ++{ ++ int i; ++ ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_init: " ++ "allocating %lu bytes initialising device: %s\n", ++ (unsigned long) sizeof(struct ipsecpriv), ++ dev->name ? dev->name : "NULL"); ++ ++ /* Add our tunnel functions to the device */ ++ dev->open = ipsec_tunnel_open; ++ dev->stop = ipsec_tunnel_close; ++ dev->hard_start_xmit = ipsec_tunnel_start_xmit; ++ dev->get_stats = ipsec_tunnel_get_stats; ++ ++ dev->priv = kmalloc(sizeof(struct ipsecpriv), GFP_KERNEL); ++ if (dev->priv == NULL) ++ return -ENOMEM; ++ memset((caddr_t)(dev->priv), 0, sizeof(struct ipsecpriv)); ++ ++ for(i = 0; i < sizeof(zeroes); i++) { ++ ((__u8*)(zeroes))[i] = 0; ++ } ++ ++#ifndef NET_21 ++ /* Initialize the tunnel device structure */ ++ for (i = 0; i < DEV_NUMBUFFS; i++) ++ skb_queue_head_init(&dev->buffs[i]); ++#endif /* !NET_21 */ ++ ++ dev->set_multicast_list = NULL; ++ dev->do_ioctl = ipsec_tunnel_ioctl; ++ dev->hard_header = NULL; ++ dev->rebuild_header = NULL; ++ dev->set_mac_address = NULL; ++#ifndef NET_21 ++ dev->header_cache_bind = NULL; ++#endif /* !NET_21 */ ++ dev->header_cache_update= NULL; ++ ++#ifdef NET_21 ++/* prv->neigh_setup = NULL; */ ++ dev->neigh_setup = ipsec_tunnel_neigh_setup_dev; ++#endif /* NET_21 */ ++ dev->hard_header_len = 0; ++ dev->mtu = 0; ++ dev->addr_len = 0; ++ dev->type = ARPHRD_VOID; /* ARPHRD_TUNNEL; */ /* ARPHRD_ETHER; */ ++ dev->tx_queue_len = 10; /* Small queue */ ++ memset((caddr_t)(dev->broadcast),0xFF, ETH_ALEN); /* what if this is not attached to ethernet? */ ++ ++ /* New-style flags. */ ++ dev->flags = IFF_NOARP /* 0 */ /* Petr Novak */; ++ ++#if 0 ++#ifdef NET_21 ++ dev_init_buffers(dev); ++#else /* NET_21 */ ++ dev->family = AF_INET; ++ dev->pa_addr = 0; ++ dev->pa_brdaddr = 0; ++ dev->pa_mask = 0; ++ dev->pa_alen = 4; ++#endif /* NET_21 */ ++#endif ++ ++ /* We're done. Have I forgotten anything? */ ++ return 0; ++} ++ ++/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ ++/* Module specific interface (but it links with the rest of IPSEC) */ ++/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ ++ ++int ++ipsec_tunnel_probe(struct net_device *dev) ++{ ++ ipsec_tunnel_init(dev); ++ return 0; ++} ++ ++struct net_device *ipsecdevices[IPSEC_NUM_IF]; ++ ++int ++ipsec_tunnel_init_devices(void) ++{ ++ int i; ++ char name[IFNAMSIZ]; ++ struct net_device *dev_ipsec; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_init_devices: " ++ "creating and registering IPSEC_NUM_IF=%u devices, allocating %lu per device, IFNAMSIZ=%u.\n", ++ IPSEC_NUM_IF, ++ (unsigned long) (sizeof(struct net_device) + IFNAMSIZ), ++ IFNAMSIZ); ++ ++ for(i = 0; i < IPSEC_NUM_IF; i++) { ++ sprintf(name, IPSEC_DEV_FORMAT, i); ++ dev_ipsec = (struct net_device*)kmalloc(sizeof(struct net_device), GFP_KERNEL); ++ if (dev_ipsec == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_init_devices: " ++ "failed to allocate memory for device %s, quitting device init.\n", ++ name); ++ return -ENOMEM; ++ } ++ memset((caddr_t)dev_ipsec, 0, sizeof(struct net_device)); ++#ifdef NETDEV_23 ++ strncpy(dev_ipsec->name, name, sizeof(dev_ipsec->name)); ++#else /* NETDEV_23 */ ++ dev_ipsec->name = (char*)kmalloc(IFNAMSIZ, GFP_KERNEL); ++ if (dev_ipsec->name == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_init_devices: " ++ "failed to allocate memory for device %s name, quitting device init.\n", ++ name); ++ return -ENOMEM; ++ } ++ memset((caddr_t)dev_ipsec->name, 0, IFNAMSIZ); ++ strncpy(dev_ipsec->name, name, IFNAMSIZ); ++#endif /* NETDEV_23 */ ++#ifdef HAVE_DEV_NEXT ++ dev_ipsec->next = NULL; ++#endif ++ dev_ipsec->init = &ipsec_tunnel_probe; ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_init_devices: " ++ "registering device %s\n", ++ dev_ipsec->name); ++ ++ /* reference and hold the device reference */ ++ dev_hold(dev_ipsec); ++ ipsecdevices[i]=dev_ipsec; ++ ++ if (register_netdev(dev_ipsec) != 0) { ++ KLIPS_PRINT(1 || debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_init_devices: " ++ "registering device %s failed, quitting device init.\n", ++ dev_ipsec->name); ++ return -EIO; ++ } else { ++ KLIPS_PRINT(debug_tunnel & DB_TN_INIT, ++ "klips_debug:ipsec_tunnel_init_devices: " ++ "registering device %s succeeded, continuing...\n", ++ dev_ipsec->name); ++ } ++ } ++ return 0; ++} ++ ++/* void */ ++int ++ipsec_tunnel_cleanup_devices(void) ++{ ++ int error = 0; ++ int i; ++ struct net_device *dev_ipsec; ++ ++ for(i = 0; i < IPSEC_NUM_IF; i++) { ++ dev_ipsec = ipsecdevices[i]; ++ if(dev_ipsec == NULL) { ++ continue; ++ } ++ ++ /* release reference */ ++ ipsecdevices[i]=NULL; ++ ipsec_dev_put(dev_ipsec); ++ ++ KLIPS_PRINT(debug_tunnel, "Unregistering %s (refcnt=%d)\n", ++ dev_ipsec->name, ++ atomic_read(&dev_ipsec->refcnt)); ++ unregister_netdev(dev_ipsec); ++ KLIPS_PRINT(debug_tunnel, "Unregisted %s\n", dev_ipsec->name); ++#ifndef NETDEV_23 ++ kfree(dev_ipsec->name); ++ dev_ipsec->name=NULL; ++#endif /* !NETDEV_23 */ ++ kfree(dev_ipsec->priv); ++ dev_ipsec->priv=NULL; ++ } ++ return error; ++} ++ ++/* ++ * $Log: ipsec_tunnel.c,v $ ++ * Revision 1.232.2.7 2007-09-18 18:26:18 paul ++ * Fix mangled preprocessor line in HAVE_INET_SK_SPORT case. ++ * ++ * Revision 1.232.2.6 2007/09/05 02:56:10 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.232.2.5 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.232.2.4 2006/03/28 20:58:19 ken ++ * Fix for KLIPS on 2.6.16 - need to include now ++ * ++ * Revision 1.232.2.3 2006/02/15 05:14:12 paul ++ * 568: uninitialized struct in ipsec_tunnel.c coud break routing under 2.6 kernels ++ * ipsec_tunnel_send() calls the entry point function of routing subsystem ++ * (ip_route_output_key()) using a not fully initialized struct of type ++ * struct flowi. ++ * This will cause a failure in routing packets through an ipsec interface ++ * when patches for multipath routing from http://www.ssi.bg/~ja/ ++ * are applied. ++ * ++ * Revision 1.232.2.2 2005/11/22 04:11:52 ken ++ * Backport fixes for 2.6.14 kernels from HEAD ++ * ++ * Revision 1.232.2.1 2005/09/21 22:57:43 paul ++ * pulled up compile fix for 2.6.13 ++ * ++ * Revision 1.232 2005/06/04 16:06:06 mcr ++ * better patch for nat-t rcv-device code. ++ * ++ * Revision 1.231 2005/05/21 03:28:51 mcr ++ * make sure that port-500 hole is used for port-4500 as well. ++ * ++ * Revision 1.230 2005/05/11 01:42:04 mcr ++ * removal of debugging showed useless/wrong variables used. ++ * ++ * Revision 1.229 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.228 2005/01/26 00:50:35 mcr ++ * adjustment of confusion of CONFIG_IPSEC_NAT vs CONFIG_KLIPS_NAT, ++ * and make sure that NAT_TRAVERSAL is set as well to match ++ * userspace compiles of code. ++ * ++ * Revision 1.227 2004/12/10 21:16:08 ken ++ * 64bit fixes from Opteron port of KLIPS 2.6 ++ * ++ * Revision 1.226 2004/12/04 07:11:23 mcr ++ * fix for snmp SIOCPRIVATE use of snmpd. ++ * http://bugs.xelerance.com/view.php?id=144 ++ * ++ * Revision 1.225 2004/12/03 21:25:57 mcr ++ * compile time fixes for running on 2.6. ++ * still experimental. ++ * ++ * Revision 1.224 2004/08/14 03:28:24 mcr ++ * fixed log comment to remove warning about embedded comment. ++ * ++ * Revision 1.223 2004/08/04 15:57:07 mcr ++ * moved des .h files to include/des/ * ++ * included 2.6 protocol specific things ++ * started at NAT-T support, but it will require a kernel patch. ++ * ++ * Revision 1.222 2004/08/03 18:19:08 mcr ++ * in 2.6, use "net_device" instead of #define device->net_device. ++ * this probably breaks 2.0 compiles. ++ * ++ * Revision 1.221 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.220 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.219 2004/02/03 03:13:17 mcr ++ * minor edits for readability, and error reporting. ++ * ++ * Revision 1.218 2004/01/27 20:29:20 mcr ++ * fix for unregister_netdev() problem for underlying eth0. ++ * ++ * Revision 1.217 2003/12/10 01:14:27 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.216 2003/12/04 23:01:17 mcr ++ * removed ipsec_netlink.h ++ * ++ * Revision 1.215 2003/12/04 16:35:16 ken ++ * Fix for ATM devices where physdev->hard_header_len *is* correct ++ * ++ * Revision 1.214 2003/11/25 23:52:37 mcr ++ * fix typo in patch - ixs-> needed. ++ * ++ * Revision 1.213 2003/11/24 18:25:49 mcr ++ * patch from willy@w.ods.org to fix problems with ATM interfaces. ++ * ++ * Revision 1.212 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.211.2.2 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.211.2.1 2003/09/21 13:59:56 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.211 2003/09/10 16:46:30 mcr ++ * patches for 2.4 backport/2.6 existence. ++ * ++ * Revision 1.210 2003/07/31 22:47:16 mcr ++ * preliminary (untested by FS-team) 2.5 patches. ++ * ++ * Revision 1.209 2003/06/22 21:28:43 mcr ++ * inability to unload module was caused by calls to dev_get ++ * (ipsec_dev_get), to gather a device from a name. There is ++ * simply no reason to look the devices up - they should be kept ++ * in a nice array, ready for use. ++ * ++ * Revision 1.208 2003/06/22 21:25:07 mcr ++ * all staticly counted ipsecXXX device support removed. ++ * ++ * Revision 1.207 2003/04/02 20:15:37 mcr ++ * fix for PR#204 - do not clear connection tracking info if we ++ * the packet is being sent in the clear. ++ * ++ * Revision 1.206 2003/02/12 19:32:51 rgb ++ * Refactored file to: ++ * ipsec_xmit.c ++ * ipsec_xmit.h ++ * ipsec_mast.c ++ * ++ * Revision 1.205 2003/02/06 17:47:00 rgb ++ * ++ * Remove unused ipsec_tunnel_lock() and ipsec_tunnel_unlock() code. ++ * Refactor ipsec_tunnel_start_xmit() further into: ++ * ipsec_xmit_sanity_check_dev() ++ * ipsec_xmit_sanity_check_skb() ++ * ipsec_xmit_strip_hard_header() ++ * ipsec_xmit_restore_hard_header() ++ * ipsec_xmit_send() ++ * ipsec_xmit_cleanup() ++ * and start a skeletal ipsec_mast_start_xmit() . ++ * ++ * Revision 1.204 2003/02/06 06:43:46 rgb ++ * ++ * Refactor ipsec_tunnel_start_xmit, bringing out: ++ * ipsec_xmit_SAlookup ++ * ipsec_xmit_encap_once ++ * ipsec_xmit_encap_bundle ++ * ++ * Revision 1.203 2003/02/06 02:21:34 rgb ++ * ++ * Moved "struct auth_alg" from ipsec_rcv.c to ipsec_ah.h . ++ * Changed "struct ah" to "struct ahhdr" and "struct esp" to "struct esphdr". ++ * Removed "#ifdef INBOUND_POLICY_CHECK_eroute" dead code. ++ * ++ * Revision 1.202 2003/01/03 07:38:01 rgb ++ * ++ * Start to refactor ipsec_tunnel_start_xmit() by putting local variables ++ * into struct ipsec_xmit_state and renaming a few variables to give more ++ * unique or searchable names. ++ * ++ * Revision 1.201 2003/01/03 00:31:28 rgb ++ * ++ * Clean up memset usage, including fixing 2 places where keys were not ++ * properly wiped. ++ * ++ * Revision 1.200 2002/12/06 02:24:02 mcr ++ * patches for compiling against SUSE 8.1 kernels. Requires ++ * an additional -DSUSE_LINUX_2_4_19_IS_STUPID. ++ * ++ * Revision 1.199 2002/10/12 23:11:53 dhr ++ * ++ * [KenB + DHR] more 64-bit cleanup ++ * ++ * Revision 1.198 2002/10/05 05:02:58 dhr ++ * ++ * C labels go on statements ++ * ++ * Revision 1.197 2002/09/20 05:01:50 rgb ++ * Added compiler directive to switch on IP options and fix IP options bug. ++ * Make ip->ihl treatment consistent using shifts rather than multiplications. ++ * Check for large enough packet before accessing udp header for IKE bypass. ++ * Added memory allocation debugging. ++ * Fixed potential memory allocation failure-induced oops. ++ * ++ * Revision 1.196 2002/07/24 18:44:54 rgb ++ * Type fiddling to tame ia64 compiler. ++ * ++ * Revision 1.195 2002/07/23 03:36:07 rgb ++ * Fixed 2.2 device initialisation hang. ++ * ++ * Revision 1.194 2002/05/27 21:40:34 rgb ++ * Set unused ipsec devices to ARPHRD_VOID to avoid confusing iproute2. ++ * Cleaned up intermediate step to dynamic device allocation. ++ * ++ * Revision 1.193 2002/05/27 19:31:36 rgb ++ * Convert to dynamic ipsec device allocation. ++ * Remove final vistiges of tdb references via IPSEC_KLIPS1_COMPAT. ++ * ++ * Revision 1.192 2002/05/23 07:14:28 rgb ++ * Added refcount code. ++ * Cleaned up %p variants to 0p%p for test suite cleanup. ++ * ++ * Revision 1.191 2002/05/14 02:34:37 rgb ++ * Change all references to tdb, TDB or Tunnel Descriptor Block to ips, ++ * ipsec_sa or ipsec_sa. ++ * ++ * Revision 1.190 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.189 2002/04/24 07:36:32 mcr ++ * Moved from ./klips/net/ipsec/ipsec_tunnel.c,v ++ * ++ * Revision 1.188 2002/04/20 00:12:25 rgb ++ * Added esp IV CBC attack fix, disabled. ++ * ++ * Revision 1.187 2002/03/23 19:55:17 rgb ++ * Fix for 2.2 local IKE fragmentation blackhole. Still won't work if ++ * iptraf or another pcap app is running. ++ * ++ * Revision 1.186 2002/03/19 03:26:22 rgb ++ * Applied DHR's tunnel patch to streamline IKE/specialSA processing. ++ * ++ * Revision 1.185 2002/02/20 04:13:05 rgb ++ * Send back ICMP_PKT_FILTERED upon %reject. ++ * ++ * Revision 1.184 2002/01/29 17:17:56 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.183 2002/01/29 04:00:53 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.182 2002/01/29 02:13:18 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.181 2002/01/07 20:00:33 rgb ++ * Added IKE destination port debugging. ++ * ++ * Revision 1.180 2001/12/21 21:49:54 rgb ++ * Fixed bug as a result of moving IKE bypass above %trap/%hold code. ++ * ++ * Revision 1.179 2001/12/19 21:08:14 rgb ++ * Added transport protocol ports to ipsec_print_ip(). ++ * Update eroute info for non-SA targets. ++ * Added obey DF code disabled. ++ * Fixed formatting bugs in ipsec_tunnel_hard_header(). ++ * ++ * Revision 1.178 2001/12/05 09:36:10 rgb ++ * Moved the UDP/500 IKE check just above the %hold/%trap checks to avoid ++ * IKE packets being stolen by the %hold (and returned to the sending KMd ++ * in an ACQUIRE, ironically ;-). ++ * ++ * Revision 1.177 2001/11/26 09:23:50 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.170.2.1 2001/09/25 02:28:27 mcr ++ * struct tdb -> struct ipsec_sa. ++ * lifetime checks moved to common routines. ++ * cleaned up includes. ++ * ++ * Revision 1.170.2.2 2001/10/22 21:08:01 mcr ++ * include des.h, removed phony prototypes and fixed calling ++ * conventions to match real prototypes. ++ * ++ * Revision 1.176 2001/11/09 18:32:31 rgb ++ * Added Hans Schultz' fragmented UDP/500 IKE socket port selector. ++ * ++ * Revision 1.175 2001/11/06 20:47:00 rgb ++ * Added Eric Espie's TRAPSUBNET fix, minus spin-lock-bh dabbling. ++ * ++ * Revision 1.174 2001/11/06 19:50:43 rgb ++ * Moved IP_SEND, ICMP_SEND, DEV_QUEUE_XMIT macros to ipsec_tunnel.h for ++ * use also by pfkey_v2_parser.c ++ * ++ * Revision 1.173 2001/10/29 21:53:44 henry ++ * tone down the device-down message slightly, until we can make it smarter ++ * ++ * Revision 1.172 2001/10/26 04:59:37 rgb ++ * Added a critical level syslog message if an ipsec device goes down. ++ * ++ * Revision 1.171 2001/10/18 04:45:21 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/freeswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.170 2001/09/25 00:09:50 rgb ++ * Added NetCelo's TRAPSUBNET code to convert a new type TRAPSUBNET into a ++ * HOLD. ++ * ++ * Revision 1.169 2001/09/15 16:24:05 rgb ++ * Re-inject first and last HOLD packet when an eroute REPLACE is done. ++ * ++ * Revision 1.168 2001/09/14 16:58:37 rgb ++ * Added support for storing the first and last packets through a HOLD. ++ * ++ * Revision 1.167 2001/09/08 21:13:33 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.166 2001/08/27 19:47:59 rgb ++ * Clear tdb before usage. ++ * Added comment: clear IF before calling routing? ++ * ++ * Revision 1.165 2001/07/03 01:23:53 rgb ++ * Send back ICMP iff DF set, !ICMP, offset==0, sysctl_icmp, iph->tot_len > ++ * emtu, and don't drop. ++ * ++ * Revision 1.164 2001/06/14 19:35:10 rgb ++ * Update copyright date. ++ * ++ * Revision 1.163 2001/06/06 20:28:51 rgb ++ * Added sanity checks for NULL skbs and devices. ++ * Added more debugging output to various functions. ++ * Removed redundant dev->priv argument to ipsec_tunnel_{at,de}tach(). ++ * Renamed ipsec_tunnel_attach() virtual and physical device arguments. ++ * Corrected neigh_setup() device function assignment. ++ * Keep valid pointers to ipsec_tunnel_*() on detach. ++ * Set dev->type to the originally-initiallised value. ++ * ++ * Revision 1.162 2001/06/01 07:28:04 rgb ++ * Added sanity checks for detached devices. Don't down virtual devices ++ * to prevent packets going out in the clear if the detached device comes ++ * back up. ++ * ++ * Revision 1.161 2001/05/30 08:14:52 rgb ++ * Removed vestiges of esp-null transforms. ++ * NetDev Notifier instrumentation to track down disappearing devices. ++ * ++ * Revision 1.160 2001/05/29 05:15:12 rgb ++ * Added SS' PMTU patch which notifies sender if packet doesn't fit ++ * physical MTU (if it wasn't ICMP) and then drops it. ++ * ++ * Revision 1.159 2001/05/27 06:12:12 rgb ++ * Added structures for pid, packet count and last access time to eroute. ++ * Added packet count to beginning of /proc/net/ipsec_eroute. ++ * ++ * Revision 1.158 2001/05/24 05:39:33 rgb ++ * Applied source zeroing to 2.2 ip_route_output() call as well to enable ++ * PASS eroutes for opportunism. ++ * ++ * Revision 1.157 2001/05/23 22:35:28 rgb ++ * 2.4 source override simplification. ++ * ++ * Revision 1.156 2001/05/23 21:41:31 rgb ++ * Added error return code printing on ip_route_output(). ++ * ++ * Revision 1.155 2001/05/23 05:09:13 rgb ++ * Fixed incorrect ip_route_output() failure message. ++ * ++ * Revision 1.154 2001/05/21 14:53:31 rgb ++ * Added debug statement for case when ip_route_output() fails, causing ++ * packet to be dropped, but log looked ok. ++ * ++ * Revision 1.153 2001/05/19 02:37:54 rgb ++ * Fixed missing comment termination. ++ * ++ * Revision 1.152 2001/05/19 02:35:50 rgb ++ * Debug code optimisation for non-debug speed. ++ * Kernel version compiler define comments. ++ * 2.2 and 2.4 kernel ip_send device and ip debug output added. ++ * ++ * Revision 1.151 2001/05/18 16:17:35 rgb ++ * Changed reference from "magic" to "shunt" SAs. ++ * ++ * Revision 1.150 2001/05/18 16:12:19 rgb ++ * Changed UDP/500 bypass test from 3 nested ifs to one anded if. ++ * ++ * Revision 1.149 2001/05/16 04:39:33 rgb ++ * Add default == eroute.dest to IKE bypass conditions for magic eroutes. ++ * ++ * Revision 1.148 2001/05/05 03:31:41 rgb ++ * IP frag debugging updates and enhancements. ++ * ++ * Revision 1.147 2001/05/03 19:41:40 rgb ++ * Added SS' skb_cow fix for 2.4.4. ++ * ++ * Revision 1.146 2001/04/30 19:28:16 rgb ++ * Update for 2.4.4. ip_select_ident() now has 3 args. ++ * ++ * Revision 1.145 2001/04/23 14:56:10 rgb ++ * Added spin_lock() check to prevent double-locking for multiple ++ * transforms and hence kernel lock-ups with SMP kernels. ++ * ++ * Revision 1.144 2001/04/21 23:04:45 rgb ++ * Define out skb->used for 2.4 kernels. ++ * Check if soft expire has already been sent before sending another to ++ * prevent ACQUIRE flooding. ++ * ++ * Revision 1.143 2001/03/16 07:37:21 rgb ++ * Added comments to all #endifs. ++ * ++ * Revision 1.142 2001/02/28 05:03:27 rgb ++ * Clean up and rationalise startup messages. ++ * ++ * Revision 1.141 2001/02/27 22:24:54 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.140 2001/02/27 06:40:12 rgb ++ * Fixed TRAP->HOLD eroute byte order. ++ * ++ * Revision 1.139 2001/02/26 20:38:59 rgb ++ * Added compiler defines for 2.4.x-specific code. ++ * ++ * Revision 1.138 2001/02/26 19:57:27 rgb ++ * Implement magic SAs %drop, %reject, %trap, %hold, %pass as part ++ * of the new SPD and to support opportunistic. ++ * Drop sysctl_ipsec_{no_eroute_pass,opportunistic}, replaced by magic SAs. ++ * ++ * Revision 1.137 2001/02/19 22:29:49 rgb ++ * Fixes for presence of active ipv6 segments which share ipsec physical ++ * device (gg). ++ * ++ * Revision 1.136 2001/01/29 22:30:38 rgb ++ * Fixed minor acquire debug printing bug. ++ * ++ * Revision 1.135 2001/01/29 22:19:45 rgb ++ * Zero source address for 2.4 bypass route lookup. ++ * ++ * Revision 1.134 2001/01/23 20:19:49 rgb ++ * 2.4 fix to remove removed is_clone member. ++ * ++ * Revision 1.133 2000/12/09 22:08:35 rgb ++ * Fix NET_23 bug, should be NETDEV_23. ++ * ++ * Revision 1.132 2000/12/01 06:54:50 rgb ++ * Fix for new 2.4 IP TTL default variable name. ++ * ++ * Revision 1.131 2000/11/09 20:52:15 rgb ++ * More spinlock shuffling, locking earlier and unlocking later in rcv to ++ * include ipcomp and prevent races, renaming some tdb variables that got ++ * forgotten, moving some unlocks to include tdbs and adding a missing ++ * unlock. Thanks to Svenning for some of these. ++ * ++ * Revision 1.130 2000/11/09 20:11:22 rgb ++ * Minor shuffles to fix non-standard kernel config option selection. ++ * ++ * Revision 1.129 2000/11/06 04:32:49 rgb ++ * Clean up debug printing. ++ * Copy skb->protocol for all kernel versions. ++ * Ditched spin_lock_irqsave in favour of spin_lock. ++ * Disabled TTL decrement, done in ip_forward. ++ * Added debug printing before pfkey_acquire(). ++ * Fixed printk-deltdbchain-spin_lock races (Svenning). ++ * Use defaultTTL for 2.1+ kernels. ++ * Add Svenning's adaptive content compression. ++ * Fix up debug display arguments. ++ * ++ * Revision 1.128 2000/09/28 00:58:57 rgb ++ * Moved the IKE passthrough check after the eroute lookup so we can pass ++ * IKE through intermediate tunnels. ++ * ++ * Revision 1.127 2000/09/22 17:52:11 rgb ++ * Fixed misleading ipcomp debug output. ++ * ++ * Revision 1.126 2000/09/22 04:22:56 rgb ++ * Fixed dumb spi->cpi conversion error. ++ * ++ * Revision 1.125 2000/09/21 04:34:48 rgb ++ * A few debug-specific things should be hidden under ++ * CONFIG_IPSEC_DEBUG.(MB) ++ * Improved ip_send() error handling.(MB) ++ * ++ * Revision 1.124 2000/09/21 03:40:58 rgb ++ * Added more debugging to try and track down the cpi outward copy problem. ++ * ++ * Revision 1.123 2000/09/19 07:08:49 rgb ++ * Added debugging to outgoing compression report. ++ * ++ * Revision 1.122 2000/09/18 19:21:26 henry ++ * RGB-supplied fix for RH5.2 problem ++ * ++ * Revision 1.121 2000/09/17 21:05:09 rgb ++ * Added tdb to skb_compress call to write in cpi. ++ * ++ * Revision 1.120 2000/09/17 16:57:16 rgb ++ * Added Svenning's patch to remove restriction of ipcomp to innermost ++ * transform. ++ * ++ * Revision 1.119 2000/09/15 11:37:01 rgb ++ * Merge in heavily modified Svenning Soerensen's ++ * IPCOMP zlib deflate code. ++ * ++ * Revision 1.118 2000/09/15 04:57:16 rgb ++ * Moved debug output after sanity check. ++ * Added tos copy sysctl. ++ * ++ * Revision 1.117 2000/09/12 03:22:51 rgb ++ * Converted ipsec_icmp, no_eroute_pass, opportunistic and #if0 debugs to ++ * sysctl. ++ * ++ * Revision 1.116 2000/09/08 19:18:19 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * Added outgoing opportunistic hook, ifdef'ed out. ++ * ++ * Revision 1.115 2000/08/30 05:27:29 rgb ++ * Removed all the rest of the references to tdb_spi, tdb_proto, tdb_dst. ++ * Kill remainder of tdb_xform, tdb_xdata, xformsw. ++ * ++ * Revision 1.114 2000/08/28 18:15:46 rgb ++ * Added MB's nf-debug reset patch. ++ * ++ * Revision 1.113 2000/08/27 02:26:40 rgb ++ * Send all no-eroute-bypass, pluto-bypass and passthrough packets through ++ * fragmentation machinery for 2.0, 2.2 and 2.4 kernels. ++ * ++ * Revision 1.112 2000/08/20 21:37:33 rgb ++ * Activated pfkey_expire() calls. ++ * Added a hard/soft expiry parameter to pfkey_expire(). (Momchil) ++ * Re-arranged the order of soft and hard expiry to conform to RFC2367. ++ * Clean up references to CONFIG_IPSEC_PFKEYv2. ++ * ++ * Revision 1.111 2000/08/01 14:51:51 rgb ++ * Removed _all_ remaining traces of DES. ++ * ++ * Revision 1.110 2000/07/28 14:58:31 rgb ++ * Changed kfree_s to kfree, eliminating extra arg to fix 2.4.0-test5. ++ * ++ * Revision 1.109 2000/07/28 13:50:54 rgb ++ * Changed enet_statistics to net_device_stats and added back compatibility ++ * for pre-2.1.19. ++ * ++ * Revision 1.108 2000/05/16 03:03:11 rgb ++ * Updates for 2.3.99pre8 from MB. ++ * ++ * Revision 1.107 2000/05/10 23:08:21 rgb ++ * Print a debug warning about bogus packets received by the outgoing ++ * processing machinery only when klipsdebug is not set to none. ++ * Comment out the device initialisation informational messages. ++ * ++ * Revision 1.106 2000/05/10 19:17:14 rgb ++ * Define an IP_SEND macro, intending to have all packet passthroughs ++ * use fragmentation. This didn't quite work, but is a step in the ++ * right direction. ++ * Added buffer allocation debugging statements. ++ * Added configure option to shut off no eroute passthrough. ++ * Only check usetime against soft and hard limits if the tdb has been ++ * used. ++ * Cast output of ntohl so that the broken prototype doesn't make our ++ * compile noisy. ++ * ++ * Revision 1.105 2000/03/22 16:15:37 rgb ++ * Fixed renaming of dev_get (MB). ++ * ++ * Revision 1.104 2000/03/16 14:04:15 rgb ++ * Indented headers for readability. ++ * Fixed debug scope to enable compilation with debug off. ++ * Added macros for ip_chk_addr and IS_MYADDR for identifying self. ++ * ++ * Revision 1.103 2000/03/16 07:11:07 rgb ++ * Hardcode PF_KEYv2 support. ++ * Fixed bug which allowed UDP/500 packet from another machine ++ * through in the clear. ++ * Added disabled skb->protocol fix for ISDN/ASYNC PPP from Matjaz Godec. ++ * ++ * Revision 1.102 2000/03/14 12:26:59 rgb ++ * Added skb->nfct support for clearing netfilter conntrack bits (MB). ++ * ++ * Revision 1.101 2000/02/14 21:05:22 rgb ++ * Added MB's netif_queue fix for kernels 2.3.43+. ++ * ++ * Revision 1.100 2000/01/26 10:04:57 rgb ++ * Fixed noisy 2.0 printk arguments. ++ * ++ * Revision 1.99 2000/01/21 06:16:25 rgb ++ * Added sanity checks on skb_push(), skb_pull() to prevent panics. ++ * Switched to AF_ENCAP macro. ++ * Shortened debug output per packet and re-arranging debug_tunnel ++ * bitmap flags, while retaining necessary information to avoid ++ * trampling the kernel print ring buffer. ++ * Reformatted recursion switch code. ++ * Changed all references to tdb_proto to tdb_said.proto for clarity. ++ * ++ * Revision 1.98 2000/01/13 08:09:31 rgb ++ * Shuffled debug_tunnel switches to focus output. ++ * Fixed outgoing recursion bug, limiting to recursing only if the remote ++ * SG changes and if it is valid, ie. not passthrough. ++ * Clarified a number of debug messages. ++ * ++ * Revision 1.97 2000/01/10 16:37:16 rgb ++ * MB support for new ip_select_ident() upon disappearance of ++ * ip_id_count in 2.3.36+. ++ * ++ * Revision 1.96 1999/12/31 14:59:08 rgb ++ * MB fix to use new skb_copy_expand in kernel 2.3.35. ++ * ++ * Revision 1.95 1999/12/29 21:15:44 rgb ++ * Fix tncfg to aliased device bug. ++ * ++ * Revision 1.94 1999/12/22 04:26:06 rgb ++ * Converted all 'static' functions to 'DEBUG_NO_STATIC' to enable ++ * debugging by providing external labels to all functions with debugging ++ * turned on. ++ * ++ * Revision 1.93 1999/12/13 13:30:14 rgb ++ * Changed MTU reports and HW address reporting back to debug only. ++ * ++ * Revision 1.92 1999/12/07 18:57:56 rgb ++ * Fix PFKEY symbol compile error (SADB_*) without pfkey enabled. ++ * ++ * Revision 1.91 1999/12/01 22:15:36 rgb ++ * Add checks for LARVAL and DEAD SAs. ++ * Change state of SA from MATURE to DYING when a soft lifetime is ++ * reached and print debug warning. ++ * ++ * Revision 1.90 1999/11/23 23:04:04 rgb ++ * Use provided macro ADDRTOA_BUF instead of hardcoded value. ++ * Sort out pfkey and freeswan headers, putting them in a library path. ++ * ++ * Revision 1.89 1999/11/18 18:50:59 rgb ++ * Changed all device registrations for static linking to ++ * dynamic to reduce the number and size of patches. ++ * ++ * Revision 1.88 1999/11/18 04:09:19 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * ++ * Revision 1.87 1999/11/17 15:53:40 rgb ++ * Changed all occurrences of #include "../../../lib/freeswan.h" ++ * to #include which works due to -Ilibfreeswan in the ++ * klips/net/ipsec/Makefile. ++ * ++ * Revision 1.86 1999/10/16 18:25:37 rgb ++ * Moved SA lifetime expiry checks before packet processing. ++ * Expire SA on replay counter rollover. ++ * ++ * Revision 1.85 1999/10/16 04:24:31 rgb ++ * Add stats for time since last packet. ++ * ++ * Revision 1.84 1999/10/16 00:30:47 rgb ++ * Added SA lifetime counting. ++ * ++ * Revision 1.83 1999/10/15 22:15:57 rgb ++ * Clean out cruft. ++ * Add debugging. ++ * ++ * Revision 1.82 1999/10/08 18:26:19 rgb ++ * Fix 2.0.3x outgoing fragmented packet memory leak. ++ * ++ * Revision 1.81 1999/10/05 02:38:54 rgb ++ * Lower the default mtu of virtual devices to 16260. ++ * ++ * Revision 1.80 1999/10/03 18:56:41 rgb ++ * Spinlock support for 2.3.xx. ++ * Don't forget to undo spinlocks on error! ++ * Check for valid eroute before copying the structure. ++ * ++ * Revision 1.79 1999/10/01 15:44:53 rgb ++ * Move spinlock header include to 2.1> scope. ++ * ++ * Revision 1.78 1999/10/01 00:02:43 rgb ++ * Added tdb structure locking. ++ * Added eroute structure locking. ++ * ++ * Revision 1.77 1999/09/30 02:52:29 rgb ++ * Add Marc Boucher's Copy-On-Write code (same as ipsec_rcv.c). ++ * ++ * Revision 1.76 1999/09/25 19:31:27 rgb ++ * Refine MSS hack to affect SYN, but not SYN+ACK packets. ++ * ++ * Revision 1.75 1999/09/24 22:52:38 rgb ++ * Fix two things broken in 2.0.38 by trying to fix network notifiers. ++ * ++ * Revision 1.74 1999/09/24 00:30:37 rgb ++ * Add test for changed source as well as destination to check for ++ * recursion. ++ * ++ * Revision 1.73 1999/09/23 20:52:24 rgb ++ * Add James Morris' MSS hack patch, disabled. ++ * ++ * Revision 1.72 1999/09/23 20:22:40 rgb ++ * Enable, tidy and fix network notifier code. ++ * ++ * Revision 1.71 1999/09/23 18:09:05 rgb ++ * Clean up 2.2.x fragmenting traces. ++ * Disable dev->type switching, forcing ARPHRD_TUNNEL. ++ * ++ * Revision 1.70 1999/09/22 14:14:24 rgb ++ * Add sanity checks for revectored calls to prevent calling a downed I/F. ++ * ++ * Revision 1.69 1999/09/21 15:00:57 rgb ++ * Add Marc Boucher's packet size check. ++ * Flesh out network device notifier code. ++ * ++ * Revision 1.68 1999/09/18 11:39:57 rgb ++ * Start to add (disabled) netdevice notifier code. ++ * ++ * Revision 1.67 1999/09/17 23:44:40 rgb ++ * Add a comment warning potential code hackers to stay away from mac.raw. ++ * ++ * Revision 1.66 1999/09/17 18:04:02 rgb ++ * Add fix for unpredictable hard_header_len for ISDN folks (thanks MB). ++ * Ditch TTL decrement in 2.2 (MB). ++ * ++ * Revision 1.65 1999/09/15 23:15:35 henry ++ * Marc Boucher's PPP fixes ++ * ++ * Revision 1.64 1999/09/07 13:40:53 rgb ++ * Ditch unreliable references to skb->mac.raw. ++ * ++ * Revision 1.63 1999/08/28 11:33:09 rgb ++ * Check for null skb->mac pointer. ++ * ++ * Revision 1.62 1999/08/28 02:02:30 rgb ++ * Add Marc Boucher's fix for properly dealing with skb->sk. ++ * ++ * Revision 1.61 1999/08/27 05:23:05 rgb ++ * Clean up skb->data/raw/nh/h manipulation. ++ * Add Marc Boucher's mods to aid tcpdump. ++ * Add sanity checks to skb->raw/nh/h pointer copies in skb_copy_expand. ++ * Re-order hard_header stripping -- might be able to remove it... ++ * ++ * Revision 1.60 1999/08/26 20:01:02 rgb ++ * Tidy up compiler directives and macros. ++ * Re-enable ICMP for tunnels where inner_dst != outer_dst. ++ * Remove unnecessary skb->dev = physdev assignment affecting 2.2.x. ++ * ++ * Revision 1.59 1999/08/25 15:44:41 rgb ++ * Clean up from 2.2.x instrumenting for compilation under 2.0.36. ++ * ++ * Revision 1.58 1999/08/25 15:00:54 rgb ++ * Add dst cache code for 2.2.xx. ++ * Add sanity check for skb packet header pointers. ++ * Add/modify debugging instrumentation to *_start_xmit, *_hard_header and ++ * *_rebuild_header. ++ * Add neigh_* cache code. ++ * Change dev->type back to ARPHRD_TUNNEL. ++ * ++ * Revision 1.57 1999/08/17 21:50:23 rgb ++ * Fixed minor debug output bugs. ++ * Regrouped error recovery exit code. ++ * Added compiler directives to remove unwanted code and symbols. ++ * Shut off ICMP messages: to be refined to only send ICMP to remote systems. ++ * Add debugging code for output function addresses. ++ * Fix minor bug in (possibly unused) header_cache_bind function. ++ * Add device neighbour caching code. ++ * Change dev->type from ARPHRD_TUNNEL to physdev->type. ++ * ++ * Revision 1.56 1999/08/03 17:22:56 rgb ++ * Debug output clarification using KERN_* macros. Other inactive changes ++ * added. ++ * ++ * Revision 1.55 1999/08/03 16:58:46 rgb ++ * Fix skb_copy_expand size bug. Was getting incorrect size. ++ * ++ * Revision 1.54 1999/07/14 19:32:38 rgb ++ * Fix oversize packet crash and ssh stalling in 2.2.x kernels. ++ * ++ * Revision 1.53 1999/06/10 15:44:02 rgb ++ * Minor reformatting and clean-up. ++ * ++ * Revision 1.52 1999/05/09 03:25:36 rgb ++ * Fix bug introduced by 2.2 quick-and-dirty patch. ++ * ++ * Revision 1.51 1999/05/08 21:24:59 rgb ++ * Add casting to silence the 2.2.x compile. ++ * ++ * Revision 1.50 1999/05/05 22:02:32 rgb ++ * Add a quick and dirty port to 2.2 kernels by Marc Boucher . ++ * ++ * Revision 1.49 1999/04/29 15:18:52 rgb ++ * Change gettdb parameter to a pointer to reduce stack loading and ++ * facilitate parameter sanity checking. ++ * Fix undetected bug that might have tried to access a null pointer. ++ * Eliminate unnessessary usage of tdb_xform member to further switch ++ * away from the transform switch to the algorithm switch. ++ * Add return values to init and cleanup functions. ++ * ++ * Revision 1.48 1999/04/16 15:38:00 rgb ++ * Minor rearrangement of freeing code to avoid memory leaks with impossible or ++ * rare situations. ++ * ++ * Revision 1.47 1999/04/15 15:37:25 rgb ++ * Forward check changes from POST1_00 branch. ++ * ++ * Revision 1.32.2.4 1999/04/13 21:00:18 rgb ++ * Ditch 'things I wish I had known before...'. ++ * ++ * Revision 1.32.2.3 1999/04/13 20:34:38 rgb ++ * Free skb after fragmentation. ++ * Use stats more effectively. ++ * Add I/F to mtu notch-down reporting. ++ * ++ * Revision 1.32.2.2 1999/04/02 04:26:14 rgb ++ * Backcheck from HEAD, pre1.0. ++ * ++ * Revision 1.46 1999/04/11 00:29:00 henry ++ * GPL boilerplate ++ * ++ * Revision 1.45 1999/04/07 15:42:01 rgb ++ * Fix mtu/ping bug AGAIN! ++ * ++ * Revision 1.44 1999/04/06 04:54:27 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.43 1999/04/04 03:57:07 rgb ++ * ip_fragment() doesn't free the supplied skb. Freed. ++ * ++ * Revision 1.42 1999/04/01 23:27:15 rgb ++ * Preload size of virtual mtu. ++ * ++ * Revision 1.41 1999/04/01 09:31:23 rgb ++ * Invert meaning of ICMP PMTUD config option and clarify. ++ * Code clean-up. ++ * ++ * Revision 1.40 1999/04/01 04:37:17 rgb ++ * SSH stalling bug fix. ++ * ++ * Revision 1.39 1999/03/31 23:44:28 rgb ++ * Don't send ICMP on DF and frag_off. ++ * ++ * Revision 1.38 1999/03/31 15:20:10 rgb ++ * Quiet down debugging. ++ * ++ * Revision 1.37 1999/03/31 08:30:31 rgb ++ * Add switch to shut off ICMP PMTUD packets. ++ * ++ * Revision 1.36 1999/03/31 05:44:47 rgb ++ * Keep PMTU reduction private. ++ * ++ * Revision 1.35 1999/03/27 15:13:02 rgb ++ * PMTU/fragmentation bug fix. ++ * ++ * Revision 1.34 1999/03/17 21:19:26 rgb ++ * Fix kmalloc nonatomic bug. ++ * ++ * Revision 1.33 1999/03/17 15:38:42 rgb ++ * Code clean-up. ++ * ESP_NULL IV bug fix. ++ * ++ * Revision 1.32 1999/03/01 20:44:25 rgb ++ * Code clean-up. ++ * Memory leak bug fix. ++ * ++ * Revision 1.31 1999/02/27 00:02:09 rgb ++ * Tune to report the MTU reduction once, rather than after every recursion ++ * through the encapsulating code, preventing tcp stream stalling. ++ * ++ * Revision 1.30 1999/02/24 20:21:01 rgb ++ * Reformat debug printk's. ++ * Fix recursive encapsulation, dynamic MTU bugs and add debugging code. ++ * Clean-up. ++ * ++ * Revision 1.29 1999/02/22 17:08:14 rgb ++ * Fix recursive encapsulation code. ++ * ++ * Revision 1.28 1999/02/19 18:27:02 rgb ++ * Improve DF, fragmentation and PMTU behaviour and add dynamic MTU discovery. ++ * ++ * Revision 1.27 1999/02/17 16:51:37 rgb ++ * Clean out unused cruft. ++ * Temporarily tone down volume of debug output. ++ * Temporarily shut off fragment rejection. ++ * Disabled temporary failed recursive encapsulation loop. ++ * ++ * Revision 1.26 1999/02/12 21:21:26 rgb ++ * Move KLIPS_PRINT to ipsec_netlink.h for accessibility. ++ * ++ * Revision 1.25 1999/02/11 19:38:27 rgb ++ * More clean-up. ++ * Add sanity checking for skb_copy_expand() to prevent kernel panics on ++ * skb_put() values out of range. ++ * Fix head/tailroom calculation causing skb_put() out-of-range values. ++ * Fix return values to prevent 'nonatomic alloc_skb' warnings. ++ * Allocate new skb iff needed. ++ * Added more debug statements. ++ * Make headroom depend on structure, not hard-coded values. ++ * ++ * Revision 1.24 1999/02/10 23:20:33 rgb ++ * Shut up annoying 'statement has no effect' compiler warnings with ++ * debugging compiled out. ++ * ++ * Revision 1.23 1999/02/10 22:36:30 rgb ++ * Clean-up obsolete, unused and messy code. ++ * Converted most IPSEC_DEBUG statements to KLIPS_PRINT macros. ++ * Rename ipsec_tunnel_do_xmit to ipsec_tunnel_start_xmit and eliminated ++ * original ipsec_tunnel_start_xmit. ++ * Send all packet with different inner and outer destinations directly to ++ * the attached physical device, rather than back through ip_forward, ++ * preventing disappearing routes problems. ++ * Do sanity checking before investing too much CPU in allocating new ++ * structures. ++ * Fail on IP header options: We cannot process them yet. ++ * Add some helpful comments. ++ * Use virtual device for parameters instead of physical device. ++ * ++ * Revision 1.22 1999/02/10 03:03:02 rgb ++ * Duh. Fixed the TTL bug: forgot to update the checksum. ++ * ++ * Revision 1.21 1999/02/09 23:17:53 rgb ++ * Add structure members to ipsec_print_ip debug function. ++ * Temporarily fix TTL bug preventing tunnel mode from functioning. ++ * ++ * Revision 1.20 1999/02/09 00:14:25 rgb ++ * Add KLIPSPRINT macro. (Not used yet, though.) ++ * Delete old ip_tunnel code (BADCODE). ++ * Decrement TTL in outgoing packet. ++ * Set TTL on new IPIP_TUNNEL to default, not existing packet TTL. ++ * Delete ethernet only feature and fix hard-coded hard_header_len. ++ * ++ * Revision 1.19 1999/01/29 17:56:22 rgb ++ * 64-bit re-fix submitted by Peter Onion. ++ * ++ * Revision 1.18 1999/01/28 22:43:24 rgb ++ * Fixed bug in ipsec_print_ip that caused an OOPS, found by P.Onion. ++ * ++ * Revision 1.17 1999/01/26 02:08:16 rgb ++ * Removed CONFIG_IPSEC_ALGO_SWITCH macro. ++ * Removed dead code. ++ * ++ * Revision 1.16 1999/01/22 06:25:26 rgb ++ * Cruft clean-out. ++ * Added algorithm switch code. ++ * 64-bit clean-up. ++ * Passthrough on IPIP protocol, spi 0x0 fix. ++ * Enhanced debugging. ++ * ++ * Revision 1.15 1998/12/01 13:22:04 rgb ++ * Added support for debug printing of version info. ++ * ++ * Revision 1.14 1998/11/30 13:22:55 rgb ++ * Rationalised all the klips kernel file headers. They are much shorter ++ * now and won't conflict under RH5.2. ++ * ++ * Revision 1.13 1998/11/17 21:13:52 rgb ++ * Put IKE port bypass debug output in user-switched debug statements. ++ * ++ * Revision 1.12 1998/11/13 13:20:25 rgb ++ * Fixed ntohs bug in udp/500 hole for IKE. ++ * ++ * Revision 1.11 1998/11/10 08:01:19 rgb ++ * Kill tcp/500 hole, keep udp/500 hole. ++ * ++ * Revision 1.10 1998/11/09 21:29:26 rgb ++ * If no eroute is found, discard packet and incr. tx_error. ++ * ++ * Revision 1.9 1998/10/31 06:50:00 rgb ++ * Add tcp/udp/500 bypass. ++ * Fixed up comments in #endif directives. ++ * ++ * Revision 1.8 1998/10/27 00:34:31 rgb ++ * Reformat debug output of IP headers. ++ * Newlines added before calls to ipsec_print_ip. ++ * ++ * Revision 1.7 1998/10/19 14:44:28 rgb ++ * Added inclusion of freeswan.h. ++ * sa_id structure implemented and used: now includes protocol. ++ * ++ * Revision 1.6 1998/10/09 04:31:35 rgb ++ * Added 'klips_debug' prefix to all klips printk debug statements. ++ * ++ * Revision 1.5 1998/08/28 03:09:51 rgb ++ * Prevent kernel log spam with default route through ipsec. ++ * ++ * Revision 1.4 1998/08/05 22:23:09 rgb ++ * Change setdev return code to ENXIO for a non-existant physical device. ++ * ++ * Revision 1.3 1998/07/29 20:41:11 rgb ++ * Add ipsec_tunnel_clear to clear all tunnel attachments. ++ * ++ * Revision 1.2 1998/06/25 20:00:33 rgb ++ * Clean up #endif comments. ++ * Rename dev_ipsec to dev_ipsec0 for consistency. ++ * Document ipsec device fields. ++ * Make ipsec_tunnel_probe visible from rest of kernel for static linking. ++ * Get debugging report for *every* ipsec device initialisation. ++ * Comment out redundant code. ++ * ++ * Revision 1.1 1998/06/18 21:27:50 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.8 1998/06/14 23:49:40 rgb ++ * Clarify version reporting on module loading. ++ * ++ * Revision 1.7 1998/05/27 23:19:20 rgb ++ * Added version reporting. ++ * ++ * Revision 1.6 1998/05/18 21:56:23 rgb ++ * Clean up for numerical consistency of output and cleaning up debug code. ++ * ++ * Revision 1.5 1998/05/12 02:44:23 rgb ++ * Clarifying 'no e-route to host' message. ++ * ++ * Revision 1.4 1998/04/30 15:34:35 rgb ++ * Enclosed most remaining debugging statements in #ifdef's to make it quieter. ++ * ++ * Revision 1.3 1998/04/21 21:28:54 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.2 1998/04/12 22:03:24 rgb ++ * Updated ESP-3DES-HMAC-MD5-96, ++ * ESP-DES-HMAC-MD5-96, ++ * AH-HMAC-MD5-96, ++ * AH-HMAC-SHA1-96 since Henry started freeswan cvs repository ++ * from old standards (RFC182[5-9] to new (as of March 1998) drafts. ++ * ++ * Fixed eroute references in /proc/net/ipsec*. ++ * ++ * Started to patch module unloading memory leaks in ipsec_netlink and ++ * radij tree unloading. ++ * ++ * Revision 1.1 1998/04/09 03:06:12 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:04 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.5 1997/06/03 04:24:48 ji ++ * Added transport mode. ++ * Changed the way routing is done. ++ * Lots of bug fixes. ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * No changes. ++ * ++ * Revision 0.3 1996/11/20 14:39:04 ji ++ * Minor cleanups. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * Local Variables: ++ * c-style: linux ++ * End: ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_xform.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,360 @@ ++/* ++ * Common routines for IPSEC transformations. ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: ipsec_xform.c,v 1.65.2.1 2006-10-06 21:39:26 paul Exp $ ++ */ ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#include /* printk() */ ++ ++#include "freeswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#include /* get_random_bytes() */ ++#include ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++ ++#include "freeswan/radij.h" ++#include "freeswan/ipsec_encap.h" ++#include "freeswan/ipsec_radij.h" ++#include "freeswan/ipsec_xform.h" ++#include "freeswan/ipsec_ipe4.h" ++#include "freeswan/ipsec_ah.h" ++#include "freeswan/ipsec_esp.h" ++ ++#include ++#include ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_xform = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++#ifdef SPINLOCK ++spinlock_t tdb_lock = SPIN_LOCK_UNLOCKED; ++#else /* SPINLOCK */ ++spinlock_t tdb_lock; ++#endif /* SPINLOCK */ ++ ++/* ++ * $Log: ipsec_xform.c,v $ ++ * Revision 1.65.2.1 2006-10-06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.65 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.64 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.63 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.62.30.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.62 2002/05/14 02:34:21 rgb ++ * Delete stale code. ++ * ++ * Revision 1.61 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.60 2002/04/24 07:36:33 mcr ++ * Moved from ./klips/net/ipsec/ipsec_xform.c,v ++ * ++ * Revision 1.59 2002/03/29 15:01:36 rgb ++ * Delete decommissioned code. ++ * ++ * Revision 1.58 2002/01/29 17:17:57 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.57 2002/01/29 04:00:53 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.56 2001/11/27 05:17:22 mcr ++ * turn off the worst of the per-packet debugging. ++ * ++ * Revision 1.55 2001/11/26 09:23:50 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.54 2001/10/18 04:45:21 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/freeswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.53 2001/09/08 21:13:34 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.52 2001/06/14 19:35:11 rgb ++ * Update copyright date. ++ * ++ * Revision 1.51 2001/05/30 08:14:03 rgb ++ * Removed vestiges of esp-null transforms. ++ * ++ * Revision 1.50 2001/05/03 19:43:18 rgb ++ * Initialise error return variable. ++ * Update SENDERR macro. ++ * Fix sign of error return code for ipsec_tdbcleanup(). ++ * Use more appropriate return code for ipsec_tdbwipe(). ++ * ++ * Revision 1.49 2001/04/19 18:56:17 rgb ++ * Fixed tdb table locking comments. ++ * ++ * Revision 1.48 2001/02/27 22:24:55 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.47 2000/11/06 04:32:08 rgb ++ * Ditched spin_lock_irqsave in favour of spin_lock_bh. ++ * ++ * Revision 1.46 2000/09/20 16:21:57 rgb ++ * Cleaned up ident string alloc/free. ++ * ++ * Revision 1.45 2000/09/08 19:16:51 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * Removed all references to CONFIG_IPSEC_PFKEYv2. ++ * ++ * Revision 1.44 2000/08/30 05:29:04 rgb ++ * Compiler-define out no longer used tdb_init() in ipsec_xform.c. ++ * ++ * Revision 1.43 2000/08/18 21:30:41 rgb ++ * Purged all tdb_spi, tdb_proto and tdb_dst macros. They are unclear. ++ * ++ * Revision 1.42 2000/08/01 14:51:51 rgb ++ * Removed _all_ remaining traces of DES. ++ * ++ * Revision 1.41 2000/07/28 14:58:31 rgb ++ * Changed kfree_s to kfree, eliminating extra arg to fix 2.4.0-test5. ++ * ++ * Revision 1.40 2000/06/28 05:50:11 rgb ++ * Actually set iv_bits. ++ * ++ * Revision 1.39 2000/05/10 23:11:09 rgb ++ * Added netlink debugging output. ++ * Added a cast to quiet down the ntohl bug. ++ * ++ * Revision 1.38 2000/05/10 19:18:42 rgb ++ * Cast output of ntohl so that the broken prototype doesn't make our ++ * compile noisy. ++ * ++ * Revision 1.37 2000/03/16 14:04:59 rgb ++ * Hardwired CONFIG_IPSEC_PFKEYv2 on. ++ * ++ * Revision 1.36 2000/01/26 10:11:28 rgb ++ * Fixed spacing in error text causing run-in words. ++ * ++ * Revision 1.35 2000/01/21 06:17:16 rgb ++ * Tidied up compiler directive indentation for readability. ++ * Added ictx,octx vars for simplification.(kravietz) ++ * Added macros for HMAC padding magic numbers.(kravietz) ++ * Fixed missing key length reporting bug. ++ * Fixed bug in tdbwipe to return immediately on NULL tdbp passed in. ++ * ++ * Revision 1.34 1999/12/08 00:04:19 rgb ++ * Fixed SA direction overwriting bug for netlink users. ++ * ++ * Revision 1.33 1999/12/01 22:16:44 rgb ++ * Minor formatting changes in ESP MD5 initialisation. ++ * ++ * Revision 1.32 1999/11/25 09:06:36 rgb ++ * Fixed error return messages, should be returning negative numbers. ++ * Implemented SENDERR macro for propagating error codes. ++ * Added debug message and separate error code for algorithms not compiled ++ * in. ++ * ++ * Revision 1.31 1999/11/23 23:06:26 rgb ++ * Sort out pfkey and freeswan headers, putting them in a library path. ++ * ++ * Revision 1.30 1999/11/18 04:09:20 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * ++ * Revision 1.29 1999/11/17 15:53:40 rgb ++ * Changed all occurrences of #include "../../../lib/freeswan.h" ++ * to #include which works due to -Ilibfreeswan in the ++ * klips/net/ipsec/Makefile. ++ * ++ * Revision 1.28 1999/10/18 20:04:01 rgb ++ * Clean-out unused cruft. ++ * ++ * Revision 1.27 1999/10/03 19:01:03 rgb ++ * Spinlock support for 2.3.xx and 2.0.xx kernels. ++ * ++ * Revision 1.26 1999/10/01 16:22:24 rgb ++ * Switch from assignment init. to functional init. of spinlocks. ++ * ++ * Revision 1.25 1999/10/01 15:44:54 rgb ++ * Move spinlock header include to 2.1> scope. ++ * ++ * Revision 1.24 1999/10/01 00:03:46 rgb ++ * Added tdb structure locking. ++ * Minor formatting changes. ++ * Add function to initialize tdb hash table. ++ * ++ * Revision 1.23 1999/05/25 22:42:12 rgb ++ * Add deltdbchain() debugging. ++ * ++ * Revision 1.22 1999/05/25 21:24:31 rgb ++ * Add debugging statements to deltdbchain(). ++ * ++ * Revision 1.21 1999/05/25 03:51:48 rgb ++ * Refix error return code. ++ * ++ * Revision 1.20 1999/05/25 03:34:07 rgb ++ * Fix error return for flush. ++ * ++ * Revision 1.19 1999/05/09 03:25:37 rgb ++ * Fix bug introduced by 2.2 quick-and-dirty patch. ++ * ++ * Revision 1.18 1999/05/05 22:02:32 rgb ++ * Add a quick and dirty port to 2.2 kernels by Marc Boucher . ++ * ++ * Revision 1.17 1999/04/29 15:20:16 rgb ++ * Change gettdb parameter to a pointer to reduce stack loading and ++ * facilitate parameter sanity checking. ++ * Add sanity checking for null pointer arguments. ++ * Add debugging instrumentation. ++ * Add function deltdbchain() which will take care of unlinking, ++ * zeroing and deleting a chain of tdbs. ++ * Add a parameter to tdbcleanup to be able to delete a class of SAs. ++ * tdbwipe now actually zeroes the tdb as well as any of its pointed ++ * structures. ++ * ++ * Revision 1.16 1999/04/16 15:36:29 rgb ++ * Fix cut-and-paste error causing a memory leak in IPIP TDB freeing. ++ * ++ * Revision 1.15 1999/04/11 00:29:01 henry ++ * GPL boilerplate ++ * ++ * Revision 1.14 1999/04/06 04:54:28 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.13 1999/02/19 18:23:01 rgb ++ * Nix debug off compile warning. ++ * ++ * Revision 1.12 1999/02/17 16:52:16 rgb ++ * Consolidate satoa()s for space and speed efficiency. ++ * Convert DEBUG_IPSEC to KLIPS_PRINT ++ * Clean out unused cruft. ++ * Ditch NET_IPIP dependancy. ++ * Loop for 3des key setting. ++ * ++ * Revision 1.11 1999/01/26 02:09:05 rgb ++ * Remove ah/esp/IPIP switching on include files. ++ * Removed CONFIG_IPSEC_ALGO_SWITCH macro. ++ * Removed dead code. ++ * Clean up debug code when switched off. ++ * Remove references to INET_GET_PROTOCOL. ++ * Added code exclusion macros to reduce code from unused algorithms. ++ * ++ * Revision 1.10 1999/01/22 06:28:55 rgb ++ * Cruft clean-out. ++ * Put random IV generation in kernel. ++ * Added algorithm switch code. ++ * Enhanced debugging. ++ * 64-bit clean-up. ++ * ++ * Revision 1.9 1998/11/30 13:22:55 rgb ++ * Rationalised all the klips kernel file headers. They are much shorter ++ * now and won't conflict under RH5.2. ++ * ++ * Revision 1.8 1998/11/25 04:59:06 rgb ++ * Add conditionals for no IPIP tunnel code. ++ * Delete commented out code. ++ * ++ * Revision 1.7 1998/10/31 06:50:41 rgb ++ * Convert xform ASCII names to no spaces. ++ * Fixed up comments in #endif directives. ++ * ++ * Revision 1.6 1998/10/19 14:44:28 rgb ++ * Added inclusion of freeswan.h. ++ * sa_id structure implemented and used: now includes protocol. ++ * ++ * Revision 1.5 1998/10/09 04:32:19 rgb ++ * Added 'klips_debug' prefix to all klips printk debug statements. ++ * ++ * Revision 1.4 1998/08/12 00:11:31 rgb ++ * Added new xform functions to the xform table. ++ * Fixed minor debug output spelling error. ++ * ++ * Revision 1.3 1998/07/09 17:45:31 rgb ++ * Clarify algorithm not available message. ++ * ++ * Revision 1.2 1998/06/23 03:00:51 rgb ++ * Check for presence of IPIP protocol if it is setup one way (we don't ++ * know what has been set up the other way and can only assume it will be ++ * symmetrical with the exception of keys). ++ * ++ * Revision 1.1 1998/06/18 21:27:51 henry ++ * move sources from klips/src to klips/net/ipsec, to keep stupid ++ * kernel-build scripts happier in the presence of symlinks ++ * ++ * Revision 1.3 1998/06/11 05:54:59 rgb ++ * Added transform version string pointer to xformsw initialisations. ++ * ++ * Revision 1.2 1998/04/21 21:28:57 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.1 1998/04/09 03:06:13 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:02 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.5 1997/06/03 04:24:48 ji ++ * Added ESP-3DES-MD5-96 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * Added new transforms. ++ * ++ * Revision 0.3 1996/11/20 14:39:04 ji ++ * Minor cleanups. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ipsec_xmit.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1877 @@ ++/* ++ * IPSEC Transmit code. ++ * Copyright (C) 1996, 1997 John Ioannidis. ++ * Copyright (C) 1998-2003 Richard Guy Briggs. ++ * Copyright (C) 2004-2005 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ */ ++ ++char ipsec_xmit_c_version[] = "RCSID $Id: ipsec_xmit.c,v 1.20.2.13 2007-10-30 21:38:56 paul Exp $"; ++ ++#define __NO_VERSION__ ++#include ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif /* for CONFIG_IP_FORWARD */ ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, struct net_device_stats, dev_queue_xmit() and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include /* struct tcphdr */ ++#include /* struct udphdr */ ++#include ++#include ++#include ++#include ++#ifdef NET_21 ++# define MSS_HACK_ /* experimental */ ++# include ++# include ++# define proto_priv cb ++#endif /* NET_21 */ ++ ++#include /* icmp_send() */ ++#include ++#ifdef NETDEV_23 ++# include ++#endif /* NETDEV_23 */ ++ ++#include ++#ifdef MSS_HACK ++# include /* TCP options */ ++#endif /* MSS_HACK */ ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_life.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_eroute.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xmit.h" ++#include "openswan/ipsec_sa.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_ipe4.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++#include "openswan/ipcomp.h" ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipsec_alg.h" ++ ++ ++/* ++ * Stupid kernel API differences in APIs. Not only do some ++ * kernels not have ip_select_ident, but some have differing APIs, ++ * and SuSE has one with one parameter, but no way of checking to ++ * see what is really what. ++ */ ++ ++#ifdef SUSE_LINUX_2_4_19_IS_STUPID ++#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph) ++#else ++ ++/* simplest case, nothing */ ++#if !defined(IP_SELECT_IDENT) ++#define KLIPS_IP_SELECT_IDENT(iph, skb) do { iph->id = htons(ip_id_count++); } while(0) ++#endif ++ ++/* kernels > 2.3.37-ish */ ++#if defined(IP_SELECT_IDENT) && !defined(IP_SELECT_IDENT_NEW) ++#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph, skb->dst) ++#endif ++ ++/* kernels > 2.4.2 */ ++#if defined(IP_SELECT_IDENT) && defined(IP_SELECT_IDENT_NEW) ++#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph, skb->dst, NULL) ++#endif ++ ++#endif /* SUSE_LINUX_2_4_19_IS_STUPID */ ++ ++ ++ ++#if defined(CONFIG_KLIPS_AH) ++static __u32 zeroes[64]; ++#endif ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int sysctl_ipsec_debug_verbose = 0; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++int ipsec_xmit_trap_count = 0; ++int ipsec_xmit_trap_sendcount = 0; ++ ++int sysctl_ipsec_icmp = 0; ++int sysctl_ipsec_tos = 0; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++#define dmp(_x,_y,_z) if(debug_tunnel) ipsec_dmp_block(_x,_y,_z) ++#else /* CONFIG_KLIPS_DEBUG */ ++#define dmp(_x, _y, _z) ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ ++#if !defined(SKB_COPY_EXPAND) || defined(KLIPS_UNIT_TESTS) ++/* ++ * This is mostly skbuff.c:skb_copy(). ++ */ ++struct sk_buff * ++skb_copy_expand(const struct sk_buff *skb, int headroom, ++ int tailroom, int priority) ++{ ++ struct sk_buff *n; ++ unsigned long offset; ++ ++ /* ++ * Do sanity checking ++ */ ++ if((headroom < 0) || (tailroom < 0) || ((headroom+tailroom) < 0)) { ++ printk(KERN_WARNING ++ "klips_error:skb_copy_expand: " ++ "Illegal negative head,tailroom %d,%d\n", ++ headroom, ++ tailroom); ++ return NULL; ++ } ++ /* ++ * Allocate the copy buffer ++ */ ++ ++#ifndef NET_21 ++ IS_SKB(skb); ++#endif /* !NET_21 */ ++ ++ ++ n=alloc_skb(skb->end - skb->head + headroom + tailroom, priority); ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:skb_copy_expand: " ++ "allocating %d bytes, head=0p%p data=0p%p tail=0p%p end=0p%p end-head=%d tail-data=%d\n", ++ skb->end - skb->head + headroom + tailroom, ++ skb->head, ++ skb->data, ++ skb->tail, ++ skb->end, ++ skb->end - skb->head, ++ skb->tail - skb->data); ++ ++ if(n==NULL) ++ return NULL; ++ ++ /* ++ * Shift between the two data areas in bytes ++ */ ++ ++ /* Set the data pointer */ ++ skb_reserve(n,skb->data-skb->head+headroom); ++ /* Set the tail pointer and length */ ++ if(skb_tailroom(n) < skb->len) { ++ printk(KERN_WARNING "klips_error:skb_copy_expand: " ++ "tried to skb_put %ld, %d available. This should never happen, please report.\n", ++ (unsigned long int)skb->len, ++ skb_tailroom(n)); ++ ipsec_kfree_skb(n); ++ return NULL; ++ } ++ skb_put(n,skb->len); ++ ++ offset=n->head + headroom - skb->head; ++ ++ /* Copy the bytes */ ++ memcpy(n->head + headroom, skb->head,skb->end-skb->head); ++#ifdef NET_21 ++ n->csum=skb->csum; ++ n->priority=skb->priority; ++ n->dst=dst_clone(skb->dst); ++ if(skb->nh.raw) ++ n->nh.raw=skb->nh.raw+offset; ++#ifndef NETDEV_23 ++ n->is_clone=0; ++#endif /* NETDEV_23 */ ++ atomic_set(&n->users, 1); ++ n->destructor = NULL; ++#ifdef HAVE_SOCK_SECURITY ++ n->security=skb->security; ++#endif ++#else /* NET_21 */ ++ n->link3=NULL; ++ n->when=skb->when; ++ if(skb->ip_hdr) ++ n->ip_hdr=(struct iphdr *)(((char *)skb->ip_hdr)+offset); ++ n->saddr=skb->saddr; ++ n->daddr=skb->daddr; ++ n->raddr=skb->raddr; ++ n->seq=skb->seq; ++ n->end_seq=skb->end_seq; ++ n->ack_seq=skb->ack_seq; ++ n->acked=skb->acked; ++ n->free=1; ++ n->arp=skb->arp; ++ n->tries=0; ++ n->lock=0; ++ n->users=0; ++#endif /* NET_21 */ ++ n->protocol=skb->protocol; ++ n->list=NULL; ++ n->sk=NULL; ++ n->dev=skb->dev; ++ if(skb->h.raw) ++ n->h.raw=skb->h.raw+offset; ++ if(skb->mac.raw) ++ n->mac.raw=skb->mac.raw+offset; ++ memcpy(n->proto_priv, skb->proto_priv, sizeof(skb->proto_priv)); ++#ifndef NETDEV_23 ++ n->used=skb->used; ++#endif /* !NETDEV_23 */ ++ n->pkt_type=skb->pkt_type; ++ n->stamp=skb->stamp; ++ ++#ifndef NET_21 ++ IS_SKB(n); ++#endif /* !NET_21 */ ++ return n; ++} ++#endif /* !SKB_COPY_EXPAND */ ++ ++#ifdef CONFIG_KLIPS_DEBUG ++void ++ipsec_print_ip(struct iphdr *ip) ++{ ++ char buf[ADDRTOA_BUF]; ++ ++ printk(KERN_INFO "klips_debug: IP:"); ++ printk(" ihl:%d", ip->ihl << 2); ++ printk(" ver:%d", ip->version); ++ printk(" tos:%d", ip->tos); ++ printk(" tlen:%d", ntohs(ip->tot_len)); ++ printk(" id:%d", ntohs(ip->id)); ++ printk(" %s%s%sfrag_off:%d", ++ ip->frag_off & __constant_htons(IP_CE) ? "CE " : "", ++ ip->frag_off & __constant_htons(IP_DF) ? "DF " : "", ++ ip->frag_off & __constant_htons(IP_MF) ? "MF " : "", ++ (ntohs(ip->frag_off) & IP_OFFSET) << 3); ++ printk(" ttl:%d", ip->ttl); ++ printk(" proto:%d", ip->protocol); ++ if(ip->protocol == IPPROTO_UDP) ++ printk(" (UDP)"); ++ if(ip->protocol == IPPROTO_TCP) ++ printk(" (TCP)"); ++ if(ip->protocol == IPPROTO_ICMP) ++ printk(" (ICMP)"); ++ if(ip->protocol == IPPROTO_ESP) ++ printk(" (ESP)"); ++ if(ip->protocol == IPPROTO_AH) ++ printk(" (AH)"); ++ if(ip->protocol == IPPROTO_COMP) ++ printk(" (COMP)"); ++ printk(" chk:%d", ntohs(ip->check)); ++ addrtoa(*((struct in_addr*)(&ip->saddr)), 0, buf, sizeof(buf)); ++ printk(" saddr:%s", buf); ++ if(ip->protocol == IPPROTO_UDP) ++ printk(":%d", ++ ntohs(((struct udphdr*)((caddr_t)ip + (ip->ihl << 2)))->source)); ++ if(ip->protocol == IPPROTO_TCP) ++ printk(":%d", ++ ntohs(((struct tcphdr*)((caddr_t)ip + (ip->ihl << 2)))->source)); ++ addrtoa(*((struct in_addr*)(&ip->daddr)), 0, buf, sizeof(buf)); ++ printk(" daddr:%s", buf); ++ if(ip->protocol == IPPROTO_UDP) ++ printk(":%d", ++ ntohs(((struct udphdr*)((caddr_t)ip + (ip->ihl << 2)))->dest)); ++ if(ip->protocol == IPPROTO_TCP) ++ printk(":%d", ++ ntohs(((struct tcphdr*)((caddr_t)ip + (ip->ihl << 2)))->dest)); ++ if(ip->protocol == IPPROTO_ICMP) ++ printk(" type:code=%d:%d", ++ ((struct icmphdr*)((caddr_t)ip + (ip->ihl << 2)))->type, ++ ((struct icmphdr*)((caddr_t)ip + (ip->ihl << 2)))->code); ++ printk("\n"); ++ ++ if(sysctl_ipsec_debug_verbose) { ++ __u8 *c; ++ int len = ntohs(ip->tot_len) - ip->ihl*4; ++ ++ c = ((__u8*)ip) + ip->ihl*4; ++ ipsec_dmp_block("ip_print", c, len); ++ } ++} ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++#ifdef MSS_HACK ++/* ++ * Issues: ++ * 1) Fragments arriving in the tunnel should probably be rejected. ++ * 2) How does this affect syncookies, mss_cache, dst cache ? ++ * 3) Path MTU discovery handling needs to be reviewed. For example, ++ * if we receive an ICMP 'packet too big' message from an intermediate ++ * router specifying it's next hop MTU, our stack may process this and ++ * adjust the MSS without taking our AH/ESP overheads into account. ++ */ ++ ++ ++/* ++ * Recaclulate checksum using differences between changed datum, ++ * borrowed from netfilter. ++ */ ++DEBUG_NO_STATIC u_int16_t ++ipsec_fast_csum(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) ++{ ++ u_int32_t diffs[] = { oldvalinv, newval }; ++ return csum_fold(csum_partial((char *)diffs, sizeof(diffs), ++ oldcheck^0xFFFF)); ++} ++ ++/* ++ * Determine effective MSS. ++ * ++ * Note that we assume that there is always an MSS option for our own ++ * SYN segments, which is mentioned in tcp_syn_build_options(), kernel 2.2.x. ++ * This could change, and we should probably parse TCP options instead. ++ * ++ */ ++DEBUG_NO_STATIC u_int8_t ++ipsec_adjust_mss(struct sk_buff *skb, struct tcphdr *tcph, u_int16_t mtu) ++{ ++ u_int16_t oldmss, newmss; ++ u_int32_t *mssp; ++ struct sock *sk = skb->sk; ++ ++ newmss = tcp_sync_mss(sk, mtu); ++ printk(KERN_INFO "klips: setting mss to %u\n", newmss); ++ mssp = (u_int32_t *)tcph + sizeof(struct tcphdr) / sizeof(u_int32_t); ++ oldmss = ntohl(*mssp) & 0x0000FFFF; ++ *mssp = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | newmss); ++ tcph->check = ipsec_fast_csum(htons(~oldmss), ++ htons(newmss), tcph->check); ++ return 1; ++} ++#endif /* MSS_HACK */ ++ ++/* ++ * Sanity checks ++ */ ++enum ipsec_xmit_value ++ipsec_xmit_sanity_check_dev(struct ipsec_xmit_state *ixs) ++{ ++ ++ if (ixs->dev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_error:ipsec_xmit_sanity_check_dev: " ++ "No device associated with skb!\n" ); ++ return IPSEC_XMIT_NODEV; ++ } ++ ++ ixs->prv = ixs->dev->priv; ++ if (ixs->prv == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_error:ipsec_xmit_sanity_check_dev: " ++ "Device has no private structure!\n" ); ++ return IPSEC_XMIT_NOPRIVDEV; ++ } ++ ++ ixs->physdev = ixs->prv->dev; ++ if (ixs->physdev == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_error:ipsec_xmit_sanity_check_dev: " ++ "Device is not attached to physical device!\n" ); ++ return IPSEC_XMIT_NOPHYSDEV; ++ } ++ ++ ixs->physmtu = ixs->physdev->mtu; ++ ixs->cur_mtu = ixs->physdev->mtu; ++ ixs->stats = (struct net_device_stats *) &(ixs->prv->mystats); ++ ++ return IPSEC_XMIT_OK; ++} ++ ++enum ipsec_xmit_value ++ipsec_xmit_sanity_check_skb(struct ipsec_xmit_state *ixs) ++{ ++ /* ++ * Return if there is nothing to do. (Does this ever happen?) XXX ++ */ ++ if (ixs->skb == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_error:ipsec_xmit_sanity_check_skb: " ++ "Nothing to do!\n" ); ++ return IPSEC_XMIT_NOSKB; ++ } ++ ++ /* if skb was cloned (most likely due to a packet sniffer such as ++ tcpdump being momentarily attached to the interface), make ++ a copy of our own to modify */ ++ if(skb_cloned(ixs->skb)) { ++ if ++#ifdef SKB_COW_NEW ++ (skb_cow(ixs->skb, skb_headroom(ixs->skb)) != 0) ++#else /* SKB_COW_NEW */ ++ ((ixs->skb = skb_cow(ixs->skb, skb_headroom(ixs->skb))) == NULL) ++#endif /* SKB_COW_NEW */ ++ { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_error:ipsec_xmit_sanity_check_skb: " ++ "skb_cow failed to allocate buffer, dropping.\n" ); ++ ixs->stats->tx_dropped++; ++ return IPSEC_XMIT_ERRSKBALLOC; ++ } ++ } ++ ++ ixs->iph = ip_hdr(ixs->skb); ++ ++ /* sanity check for IP version as we can't handle IPv6 right now */ ++ if (ixs->iph->version != 4) { ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_xmit_sanity_check_skb: " ++ "found IP Version %d but cannot process other IP versions than v4.\n", ++ ixs->iph->version); /* XXX */ ++ ixs->stats->tx_dropped++; ++ return IPSEC_XMIT_NOIPV6; ++ } ++ ++#if IPSEC_DISALLOW_IPOPTIONS ++ if ((ixs->iph->ihl << 2) != sizeof (struct iphdr)) { ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_xmit_sanity_check_skb: " ++ "cannot process IP header options yet. May be mal-formed packet.\n"); /* XXX */ ++ ixs->stats->tx_dropped++; ++ return IPSEC_XMIT_NOIPOPTIONS; ++ } ++#endif /* IPSEC_DISALLOW_IPOPTIONS */ ++ ++#ifndef NET_21 ++ if (ixs->iph->ttl <= 0) { ++ /* Tell the sender its packet died... */ ++ ICMP_SEND(ixs->skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, ixs->physdev); ++ ++ KLIPS_PRINT(debug_tunnel, "klips_debug:ipsec_xmit_sanity_check_skb: " ++ "TTL=0, too many hops!\n"); ++ ixs->stats->tx_dropped++; ++ return IPSEC_XMIT_TTLEXPIRED; ++ } ++#endif /* !NET_21 */ ++ ++ return IPSEC_XMIT_OK; ++} ++ ++enum ipsec_xmit_value ++ipsec_xmit_encap_once(struct ipsec_xmit_state *ixs) ++{ ++#ifdef CONFIG_KLIPS_ESP ++ struct esphdr *espp; ++ unsigned char *idat, *pad; ++ int authlen = 0, padlen = 0, i; ++#endif /* !CONFIG_KLIPS_ESP */ ++#ifdef CONFIG_KLIPS_AH ++ struct iphdr ipo; ++ struct ahhdr *ahp; ++#endif /* CONFIG_KLIPS_AH */ ++#if defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1) ++ union { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ MD5_CTX md5; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ SHA1_CTX sha1; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ } tctx; ++ __u8 hash[AH_AMAX]; ++#endif /* defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMACn_SHA1) */ ++ int headroom = 0, tailroom = 0, ilen = 0, len = 0; ++ unsigned char *dat; ++ int blocksize = 8; /* XXX: should be inside ixs --jjo */ ++ struct ipsec_alg_enc *ixt_e = NULL; ++ struct ipsec_alg_auth *ixt_a = NULL; ++ ++ ixs->iphlen = ixs->iph->ihl << 2; ++ ixs->pyldsz = ntohs(ixs->iph->tot_len) - ixs->iphlen; ++ ixs->sa_len = KLIPS_SATOT(debug_tunnel, &ixs->ipsp->ips_said, 0, ixs->sa_txt, SATOT_BUF); ++ KLIPS_PRINT(debug_tunnel & DB_TN_OXFS, ++ "klips_debug:ipsec_xmit_encap_once: " ++ "calling output for <%s%s%s>, SA:%s\n", ++ IPS_XFORM_NAME(ixs->ipsp), ++ ixs->sa_len ? ixs->sa_txt : " (error)"); ++ ++ switch(ixs->ipsp->ips_said.proto) { ++#ifdef CONFIG_KLIPS_AH ++ case IPPROTO_AH: ++ headroom += sizeof(struct ahhdr); ++ break; ++#endif /* CONFIG_KLIPS_AH */ ++ ++#ifdef CONFIG_KLIPS_ESP ++ case IPPROTO_ESP: ++ ixt_e=ixs->ipsp->ips_alg_enc; ++ if (ixt_e) { ++ blocksize = ixt_e->ixt_common.ixt_blocksize; ++ headroom += ESP_HEADER_LEN + ixt_e->ixt_common.ixt_support.ias_ivlen/8; ++ } else { ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESP_BADALG; ++ } ++ ++ ixt_a=ixs->ipsp->ips_alg_auth; ++ if (ixt_a) { ++ tailroom += AHHMAC_HASHLEN; ++ authlen = AHHMAC_HASHLEN; ++ } else ++ switch(ixs->ipsp->ips_authalg) { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: ++ authlen = AHHMAC_HASHLEN; ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: ++ authlen = AHHMAC_HASHLEN; ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ case AH_NONE: ++ break; ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESP_BADALG; ++ } ++ tailroom += blocksize != 1 ? ++ ((blocksize - ((ixs->pyldsz + 2) % blocksize)) % blocksize) + 2 : ++ ((4 - ((ixs->pyldsz + 2) % 4)) % 4) + 2; ++ tailroom += authlen; ++ break; ++#endif /* CONFIG_KLIPS_ESP */ ++ ++#ifdef CONFIG_KLIPS_IPIP ++ case IPPROTO_IPIP: ++ headroom += sizeof(struct iphdr); ++ ixs->iphlen = sizeof(struct iphdr); ++ break; ++#endif /* !CONFIG_KLIPS_IPIP */ ++ ++#ifdef CONFIG_KLIPS_IPCOMP ++ case IPPROTO_COMP: ++ break; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_BADPROTO; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_once: " ++ "pushing %d bytes, putting %d, proto %d.\n", ++ headroom, tailroom, ixs->ipsp->ips_said.proto); ++ if(skb_headroom(ixs->skb) < headroom) { ++ printk(KERN_WARNING ++ "klips_error:ipsec_xmit_encap_once: " ++ "tried to skb_push headroom=%d, %d available. This should never happen, please report.\n", ++ headroom, skb_headroom(ixs->skb)); ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESP_PUSHPULLERR; ++ } ++ ++ dat = skb_push(ixs->skb, headroom); ++ ilen = ixs->skb->len - tailroom; ++ if(skb_tailroom(ixs->skb) < tailroom) { ++ printk(KERN_WARNING ++ "klips_error:ipsec_xmit_encap_once: " ++ "tried to skb_put %d, %d available. This should never happen, please report.\n", ++ tailroom, skb_tailroom(ixs->skb)); ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESP_PUSHPULLERR; ++ } ++ skb_put(ixs->skb, tailroom); ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_once: " ++ "head,tailroom: %d,%d before xform.\n", ++ skb_headroom(ixs->skb), skb_tailroom(ixs->skb)); ++ len = ixs->skb->len; ++ if(len > 0xfff0) { ++ printk(KERN_WARNING "klips_error:ipsec_xmit_encap_once: " ++ "tot_len (%d) > 65520. This should never happen, please report.\n", ++ len); ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_BADLEN; ++ } ++ memmove((void *)dat, (void *)(dat + headroom), ixs->iphlen); ++ ixs->iph = (struct iphdr *)dat; ++ ixs->iph->tot_len = htons(ixs->skb->len); ++ ++ switch(ixs->ipsp->ips_said.proto) { ++#ifdef CONFIG_KLIPS_ESP ++ case IPPROTO_ESP: ++ espp = (struct esphdr *)(dat + ixs->iphlen); ++ espp->esp_spi = ixs->ipsp->ips_said.spi; ++ espp->esp_rpl = htonl(++(ixs->ipsp->ips_replaywin_lastseq)); ++ ++ if (!ixt_e) { ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_ESP_BADALG; ++ } ++ ++ idat = dat + ixs->iphlen + headroom; ++ ilen = len - (ixs->iphlen + headroom + authlen); ++ ++ /* Self-describing padding */ ++ pad = &dat[len - tailroom]; ++ padlen = tailroom - 2 - authlen; ++ for (i = 0; i < padlen; i++) { ++ pad[i] = i + 1; ++ } ++ dat[len - authlen - 2] = padlen; ++ ++ dat[len - authlen - 1] = ixs->iph->protocol; ++ ixs->iph->protocol = IPPROTO_ESP; ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_tunnel & DB_TN_ENCAP) { ++ dmp("pre-encrypt", dat, len); ++ } ++#endif ++ ++ /* ++ * Do all operations here: ++ * copy IV->ESP, encrypt, update ips IV ++ * ++ */ ++ { ++ int ret; ++ memcpy(espp->esp_iv, ++ ixs->ipsp->ips_iv, ++ ixs->ipsp->ips_iv_size); ++ ret=ipsec_alg_esp_encrypt(ixs->ipsp, ++ idat, ilen, espp->esp_iv, ++ IPSEC_ALG_ENCRYPT); ++ ++ prng_bytes(&ipsec_prng, ++ (char *)ixs->ipsp->ips_iv, ++ ixs->ipsp->ips_iv_size); ++ } ++ ++ if (ixt_a) { ++ ipsec_alg_sa_esp_hash(ixs->ipsp, ++ (caddr_t)espp, len - ixs->iphlen - authlen, ++ &(dat[len - authlen]), authlen); ++ ++ } else ++ switch(ixs->ipsp->ips_authalg) { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: ++ dmp("espp", (char*)espp, len - ixs->iphlen - authlen); ++ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->ictx; ++ dmp("ictx", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, (caddr_t)espp, len - ixs->iphlen - authlen); ++ dmp("ictx+dat", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Final(hash, &tctx.md5); ++ dmp("ictx hash", (char*)&hash, sizeof(hash)); ++ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->octx; ++ dmp("octx", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, hash, AHMD596_ALEN); ++ dmp("octx+hash", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Final(hash, &tctx.md5); ++ dmp("octx hash", (char*)&hash, sizeof(hash)); ++ memcpy(&(dat[len - authlen]), hash, authlen); ++ ++ /* paranoid */ ++ memset((caddr_t)&tctx.md5, 0, sizeof(tctx.md5)); ++ memset((caddr_t)hash, 0, sizeof(*hash)); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: ++ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->ictx; ++ SHA1Update(&tctx.sha1, (caddr_t)espp, len - ixs->iphlen - authlen); ++ SHA1Final(hash, &tctx.sha1); ++ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->octx; ++ SHA1Update(&tctx.sha1, hash, AHSHA196_ALEN); ++ SHA1Final(hash, &tctx.sha1); ++ memcpy(&(dat[len - authlen]), hash, authlen); ++ ++ /* paranoid */ ++ memset((caddr_t)&tctx.sha1, 0, sizeof(tctx.sha1)); ++ memset((caddr_t)hash, 0, sizeof(*hash)); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ case AH_NONE: ++ break; ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_AH_BADALG; ++ } ++#ifdef NET_21 ++ /*ixs->skb->h.raw = (unsigned char*)espp;*/ ++ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, espp)); ++#endif /* NET_21 */ ++ break; ++#endif /* !CONFIG_KLIPS_ESP */ ++#ifdef CONFIG_KLIPS_AH ++ case IPPROTO_AH: ++ ahp = (struct ahhdr *)(dat + ixs->iphlen); ++ ahp->ah_spi = ixs->ipsp->ips_said.spi; ++ ahp->ah_rpl = htonl(++(ixs->ipsp->ips_replaywin_lastseq)); ++ ahp->ah_rv = 0; ++ ahp->ah_nh = ixs->iph->protocol; ++ ahp->ah_hl = (headroom >> 2) - sizeof(__u64)/sizeof(__u32); ++ ixs->iph->protocol = IPPROTO_AH; ++ dmp("ahp", (char*)ahp, sizeof(*ahp)); ++ ++ ipo = *ixs->iph; ++ ipo.tos = 0; ++ ipo.frag_off = 0; ++ ipo.ttl = 0; ++ ipo.check = 0; ++ dmp("ipo", (char*)&ipo, sizeof(ipo)); ++ ++ switch(ixs->ipsp->ips_authalg) { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: ++ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->ictx; ++ dmp("ictx", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, (unsigned char *)&ipo, sizeof (struct iphdr)); ++ dmp("ictx+ipo", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, (unsigned char *)ahp, headroom - sizeof(ahp->ah_data)); ++ dmp("ictx+ahp", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, (unsigned char *)zeroes, AHHMAC_HASHLEN); ++ dmp("ictx+zeroes", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, dat + ixs->iphlen + headroom, len - ixs->iphlen - headroom); ++ dmp("ictx+dat", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Final(hash, &tctx.md5); ++ dmp("ictx hash", (char*)&hash, sizeof(hash)); ++ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->octx; ++ dmp("octx", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Update(&tctx.md5, hash, AHMD596_ALEN); ++ dmp("octx+hash", (char*)&tctx.md5, sizeof(tctx.md5)); ++ osMD5Final(hash, &tctx.md5); ++ dmp("octx hash", (char*)&hash, sizeof(hash)); ++ ++ memcpy(ahp->ah_data, hash, AHHMAC_HASHLEN); ++ ++ /* paranoid */ ++ memset((caddr_t)&tctx.md5, 0, sizeof(tctx.md5)); ++ memset((caddr_t)hash, 0, sizeof(*hash)); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: ++ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->ictx; ++ SHA1Update(&tctx.sha1, (unsigned char *)&ipo, sizeof (struct iphdr)); ++ SHA1Update(&tctx.sha1, (unsigned char *)ahp, headroom - sizeof(ahp->ah_data)); ++ SHA1Update(&tctx.sha1, (unsigned char *)zeroes, AHHMAC_HASHLEN); ++ SHA1Update(&tctx.sha1, dat + ixs->iphlen + headroom, len - ixs->iphlen - headroom); ++ SHA1Final(hash, &tctx.sha1); ++ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->octx; ++ SHA1Update(&tctx.sha1, hash, AHSHA196_ALEN); ++ SHA1Final(hash, &tctx.sha1); ++ ++ memcpy(ahp->ah_data, hash, AHHMAC_HASHLEN); ++ ++ /* paranoid */ ++ memset((caddr_t)&tctx.sha1, 0, sizeof(tctx.sha1)); ++ memset((caddr_t)hash, 0, sizeof(*hash)); ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_AH_BADALG; ++ } ++#ifdef NET_21 ++ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ahp)); ++#endif /* NET_21 */ ++ break; ++#endif /* CONFIG_KLIPS_AH */ ++#ifdef CONFIG_KLIPS_IPIP ++ case IPPROTO_IPIP: ++ ixs->iph->version = 4; ++ switch(sysctl_ipsec_tos) { ++ case 0: ++#ifdef NET_21 ++ ixs->iph->tos = ip_hdr(ixs->skb)->tos; ++#else /* NET_21 */ ++ ixs->iph->tos = ixs->skb->ip_hdr->tos; ++#endif /* NET_21 */ ++ break; ++ case 1: ++ ixs->iph->tos = 0; ++ break; ++ default: ++ break; ++ } ++ ixs->iph->ttl = SYSCTL_IPSEC_DEFAULT_TTL; ++ ixs->iph->frag_off = 0; ++ ixs->iph->saddr = ((struct sockaddr_in*)(ixs->ipsp->ips_addr_s))->sin_addr.s_addr; ++ ixs->iph->daddr = ((struct sockaddr_in*)(ixs->ipsp->ips_addr_d))->sin_addr.s_addr; ++ ixs->iph->protocol = IPPROTO_IPIP; ++ ixs->iph->ihl = sizeof(struct iphdr) >> 2; ++ ++ KLIPS_IP_SELECT_IDENT(ixs->iph, ixs->skb); ++ ++ ixs->newdst = (__u32)ixs->iph->daddr; ++ ixs->newsrc = (__u32)ixs->iph->saddr; ++ ++#ifdef NET_21 ++ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ip_hdr(ixs->skb))); ++#endif /* NET_21 */ ++ break; ++#endif /* !CONFIG_KLIPS_IPIP */ ++#ifdef CONFIG_KLIPS_IPCOMP ++ case IPPROTO_COMP: ++ { ++ unsigned int flags = 0; ++#ifdef CONFIG_KLIPS_DEBUG ++ unsigned int old_tot_len = ntohs(ixs->iph->tot_len); ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ixs->ipsp->ips_comp_ratio_dbytes += ntohs(ixs->iph->tot_len); ++ ++ ixs->skb = skb_compress(ixs->skb, ixs->ipsp, &flags); ++ ++#ifdef NET_21 ++ ixs->iph = ip_hdr(ixs->skb); ++#else /* NET_21 */ ++ ixs->iph = ixs->skb->ip_hdr; ++#endif /* NET_21 */ ++ ++ ixs->ipsp->ips_comp_ratio_cbytes += ntohs(ixs->iph->tot_len); ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (debug_tunnel & DB_TN_CROUT) ++ { ++ if (old_tot_len > ntohs(ixs->iph->tot_len)) ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_once: " ++ "packet shrunk from %d to %d bytes after compression, cpi=%04x (should be from spi=%08x, spi&0xffff=%04x.\n", ++ old_tot_len, ntohs(ixs->iph->tot_len), ++ ntohs(((struct ipcomphdr*)(((char*)ixs->iph) + ((ixs->iph->ihl) << 2)))->ipcomp_cpi), ++ ntohl(ixs->ipsp->ips_said.spi), ++ (__u16)(ntohl(ixs->ipsp->ips_said.spi) & 0x0000ffff)); ++ else ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_once: " ++ "packet did not compress (flags = %d).\n", ++ flags); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ } ++ break; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ default: ++ ixs->stats->tx_errors++; ++ return IPSEC_XMIT_BADPROTO; ++ } ++ ++#ifdef NET_21 ++ skb_set_network_header(ixs->skb, ipsec_skb_offset(ixs->skb, ixs->skb->data)); ++ ++#else /* NET_21 */ ++ ixs->skb->ip_hdr = ixs->skb->h.iph = (struct iphdr *) ixs->skb->data; ++#endif /* NET_21 */ ++ ixs->iph->check = 0; ++ ixs->iph->check = ip_fast_csum((unsigned char *)ixs->iph, ixs->iph->ihl); ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_once: " ++ "after <%s%s%s>, SA:%s:\n", ++ IPS_XFORM_NAME(ixs->ipsp), ++ ixs->sa_len ? ixs->sa_txt : " (error)"); ++ KLIPS_IP_PRINT(debug_tunnel & DB_TN_XMIT, ixs->iph); ++ ++ ixs->ipsp->ips_life.ipl_bytes.ipl_count += len; ++ ixs->ipsp->ips_life.ipl_bytes.ipl_last = len; ++ ++ if(!ixs->ipsp->ips_life.ipl_usetime.ipl_count) { ++ ixs->ipsp->ips_life.ipl_usetime.ipl_count = jiffies / HZ; ++ } ++ ixs->ipsp->ips_life.ipl_usetime.ipl_last = jiffies / HZ; ++ ixs->ipsp->ips_life.ipl_packets.ipl_count++; ++ ++ ixs->ipsp = ixs->ipsp->ips_onext; ++ ++ return IPSEC_XMIT_OK; ++} ++ ++/* ++ * If the IP packet (iph) is a carrying TCP/UDP, then set the encaps ++ * source and destination ports to those from the TCP/UDP header. ++ */ ++void ipsec_extract_ports(struct iphdr * iph, struct sockaddr_encap * er) ++{ ++ struct udphdr *udp; ++ ++ switch (iph->protocol) { ++ case IPPROTO_UDP: ++ case IPPROTO_TCP: ++ /* ++ * The ports are at the same offsets in a TCP and UDP ++ * header so hack it ... ++ */ ++ udp = (struct udphdr*)(((char*)iph)+(iph->ihl<<2)); ++ er->sen_sport = udp->source; ++ er->sen_dport = udp->dest; ++ break; ++ default: ++ er->sen_sport = 0; ++ er->sen_dport = 0; ++ break; ++ } ++} ++ ++/* ++ * A TRAP eroute is installed and we want to replace it with a HOLD ++ * eroute. ++ */ ++static int create_hold_eroute(struct eroute *origtrap, ++ struct sk_buff * skb, struct iphdr * iph, ++ uint32_t eroute_pid) ++{ ++ struct eroute hold_eroute; ++ ip_said hold_said; ++ struct sk_buff *first, *last; ++ int error; ++ ++ first = last = NULL; ++ memset((caddr_t)&hold_eroute, 0, sizeof(hold_eroute)); ++ memset((caddr_t)&hold_said, 0, sizeof(hold_said)); ++ ++ hold_said.proto = IPPROTO_INT; ++ hold_said.spi = htonl(SPI_HOLD); ++ hold_said.dst.u.v4.sin_addr.s_addr = INADDR_ANY; ++ ++ hold_eroute.er_eaddr.sen_len = sizeof(struct sockaddr_encap); ++ hold_eroute.er_emask.sen_len = sizeof(struct sockaddr_encap); ++ hold_eroute.er_eaddr.sen_family = AF_ENCAP; ++ hold_eroute.er_emask.sen_family = AF_ENCAP; ++ hold_eroute.er_eaddr.sen_type = SENT_IP4; ++ hold_eroute.er_emask.sen_type = 255; ++ ++ hold_eroute.er_eaddr.sen_ip_src.s_addr = iph->saddr; ++ hold_eroute.er_eaddr.sen_ip_dst.s_addr = iph->daddr; ++ hold_eroute.er_emask.sen_ip_src.s_addr = INADDR_BROADCAST; ++ hold_eroute.er_emask.sen_ip_dst.s_addr = INADDR_BROADCAST; ++ hold_eroute.er_emask.sen_sport = 0; ++ hold_eroute.er_emask.sen_dport = 0; ++ hold_eroute.er_pid = eroute_pid; ++ hold_eroute.er_count = 0; ++ hold_eroute.er_lasttime = jiffies/HZ; ++ ++ /* ++ * if it wasn't captured by a wildcard, then don't record it as ++ * a wildcard. ++ */ ++ if(origtrap->er_eaddr.sen_proto != 0) { ++ hold_eroute.er_eaddr.sen_proto = iph->protocol; ++ ++ if((iph->protocol == IPPROTO_TCP || ++ iph->protocol == IPPROTO_UDP) && ++ (origtrap->er_eaddr.sen_sport != 0 || ++ origtrap->er_eaddr.sen_dport != 0)) { ++ ++ if(origtrap->er_eaddr.sen_sport != 0) ++ hold_eroute.er_emask.sen_sport = ~0; ++ ++ if(origtrap->er_eaddr.sen_dport != 0) ++ hold_eroute.er_emask.sen_dport = ~0; ++ ++ ipsec_extract_ports(iph, &hold_eroute.er_eaddr); ++ } ++ } ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (debug_pfkey) { ++ char buf1[64], buf2[64]; ++ subnettoa(hold_eroute.er_eaddr.sen_ip_src, ++ hold_eroute.er_emask.sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(hold_eroute.er_eaddr.sen_ip_dst, ++ hold_eroute.er_emask.sen_ip_dst, 0, buf2, sizeof(buf2)); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "calling breakeroute and makeroute for %s:%d->%s:%d %d HOLD eroute.\n", ++ buf1, ntohs(hold_eroute.er_eaddr.sen_sport), ++ buf2, ntohs(hold_eroute.er_eaddr.sen_dport), ++ hold_eroute.er_eaddr.sen_proto); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ if (ipsec_breakroute(&(hold_eroute.er_eaddr), &(hold_eroute.er_emask), ++ &first, &last)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "HOLD breakeroute found nothing.\n"); ++ } else { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "HOLD breakroute deleted %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u %u\n", ++ NIPQUAD(hold_eroute.er_eaddr.sen_ip_src), ++ ntohs(hold_eroute.er_eaddr.sen_sport), ++ NIPQUAD(hold_eroute.er_eaddr.sen_ip_dst), ++ ntohs(hold_eroute.er_eaddr.sen_dport), ++ hold_eroute.er_eaddr.sen_proto); ++ } ++ if (first != NULL) ++ kfree_skb(first); ++ if (last != NULL) ++ kfree_skb(last); ++ ++ error = ipsec_makeroute(&(hold_eroute.er_eaddr), ++ &(hold_eroute.er_emask), ++ hold_said, eroute_pid, skb, NULL, NULL); ++ if (error) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "HOLD makeroute returned %d, failed.\n", error); ++ } else { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "HOLD makeroute call successful.\n"); ++ } ++ return (error == 0); ++} ++ ++/* ++ * upon entry to this function, ixs->skb should be setup ++ * as follows: ++ * ++ * data = beginning of IP packet <- differs from ipsec_rcv(). ++ * nh.raw = beginning of IP packet. ++ * h.raw = data after the IP packet. ++ * ++ */ ++enum ipsec_xmit_value ++ipsec_xmit_encap_bundle(struct ipsec_xmit_state *ixs) ++{ ++#ifdef CONFIG_KLIPS_ALG ++ struct ipsec_alg_enc *ixt_e = NULL; ++ struct ipsec_alg_auth *ixt_a = NULL; ++ int blocksize = 8; ++#endif ++ enum ipsec_xmit_value bundle_stat = IPSEC_XMIT_OK; ++ ++ ixs->newdst = ixs->orgdst = ixs->iph->daddr; ++ ixs->newsrc = ixs->orgsrc = ixs->iph->saddr; ++ ixs->orgedst = ixs->outgoing_said.dst.u.v4.sin_addr.s_addr; ++ ixs->iphlen = ixs->iph->ihl << 2; ++ ixs->pyldsz = ntohs(ixs->iph->tot_len) - ixs->iphlen; ++ ixs->max_headroom = ixs->max_tailroom = 0; ++ ++ if (ixs->outgoing_said.proto == IPPROTO_INT) { ++ switch (ntohl(ixs->outgoing_said.spi)) { ++ case SPI_DROP: ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "shunt SA of DROP or no eroute: dropping.\n"); ++ ixs->stats->tx_dropped++; ++ break; ++ ++ case SPI_REJECT: ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "shunt SA of REJECT: notifying and dropping.\n"); ++ ICMP_SEND(ixs->skb, ++ ICMP_DEST_UNREACH, ++ ICMP_PKT_FILTERED, ++ 0, ++ ixs->physdev); ++ ixs->stats->tx_dropped++; ++ break; ++ ++ case SPI_PASS: ++#ifdef NET_21 ++ ixs->pass = 1; ++#endif /* NET_21 */ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "PASS: calling dev_queue_xmit\n"); ++ return IPSEC_XMIT_PASS; ++ goto cleanup; ++ ++ case SPI_HOLD: ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "shunt SA of HOLD: this does not make sense here, dropping.\n"); ++ ixs->stats->tx_dropped++; ++ break; ++ ++ case SPI_TRAP: ++ case SPI_TRAPSUBNET: ++ { ++ struct sockaddr_in src, dst; ++#ifdef CONFIG_KLIPS_DEBUG ++ char bufsrc[ADDRTOA_BUF], bufdst[ADDRTOA_BUF]; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ /* Signal all listening KMds with a PF_KEY ACQUIRE */ ++ ++ memset(&src, 0, sizeof(src)); ++ memset(&dst, 0, sizeof(dst)); ++ src.sin_family = AF_INET; ++ dst.sin_family = AF_INET; ++ src.sin_addr.s_addr = ixs->iph->saddr; ++ dst.sin_addr.s_addr = ixs->iph->daddr; ++ ++ ixs->ips.ips_transport_protocol = 0; ++ src.sin_port = 0; ++ dst.sin_port = 0; ++ ++ if(ixs->eroute->er_eaddr.sen_proto != 0) { ++ ixs->ips.ips_transport_protocol = ixs->iph->protocol; ++ ++ if(ixs->eroute->er_eaddr.sen_sport != 0) { ++ src.sin_port = ++ (ixs->iph->protocol == IPPROTO_UDP ++ ? ((struct udphdr*) (((caddr_t)ixs->iph) + (ixs->iph->ihl << 2)))->source ++ : (ixs->iph->protocol == IPPROTO_TCP ++ ? ((struct tcphdr*)((caddr_t)ixs->iph + (ixs->iph->ihl << 2)))->source ++ : 0)); ++ } ++ if(ixs->eroute->er_eaddr.sen_dport != 0) { ++ dst.sin_port = ++ (ixs->iph->protocol == IPPROTO_UDP ++ ? ((struct udphdr*) (((caddr_t)ixs->iph) + (ixs->iph->ihl << 2)))->dest ++ : (ixs->iph->protocol == IPPROTO_TCP ++ ? ((struct tcphdr*)((caddr_t)ixs->iph + (ixs->iph->ihl << 2)))->dest ++ : 0)); ++ } ++ } ++ ++ ixs->ips.ips_addr_s = (struct sockaddr*)(&src); ++ ixs->ips.ips_addr_d = (struct sockaddr*)(&dst); ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "SADB_ACQUIRE sent with src=%s:%d, dst=%s:%d, proto=%d.\n", ++ addrtoa(((struct sockaddr_in*)(ixs->ips.ips_addr_s))->sin_addr, 0, bufsrc, sizeof(bufsrc)) <= ADDRTOA_BUF ? bufsrc : "BAD_ADDR", ++ ntohs(((struct sockaddr_in*)(ixs->ips.ips_addr_s))->sin_port), ++ addrtoa(((struct sockaddr_in*)(ixs->ips.ips_addr_d))->sin_addr, 0, bufdst, sizeof(bufdst)) <= ADDRTOA_BUF ? bufdst : "BAD_ADDR", ++ ntohs(((struct sockaddr_in*)(ixs->ips.ips_addr_d))->sin_port), ++ ixs->ips.ips_said.proto); ++ ++ /* increment count of total traps needed */ ++ ipsec_xmit_trap_count++; ++ ++ if (pfkey_acquire(&ixs->ips) == 0) { ++ ++ /* note that we succeeded */ ++ ipsec_xmit_trap_sendcount++; ++ ++ if (ixs->outgoing_said.spi==htonl(SPI_TRAPSUBNET)) { ++ /* ++ * The spinlock is to prevent any other ++ * process from accessing or deleting ++ * the eroute while we are using and ++ * updating it. ++ */ ++ spin_lock(&eroute_lock); ++ ixs->eroute = ipsec_findroute(&ixs->matcher); ++ if(ixs->eroute) { ++ ixs->eroute->er_said.spi = htonl(SPI_HOLD); ++ ixs->eroute->er_first = ixs->skb; ++ ixs->skb = NULL; ++ } ++ spin_unlock(&eroute_lock); ++ } else if (create_hold_eroute(ixs->eroute, ++ ixs->skb, ++ ixs->iph, ++ ixs->eroute_pid)) { ++ ixs->skb = NULL; ++ } ++ /* whether or not the above succeeded, we continue */ ++ ++ } ++ ixs->stats->tx_dropped++; ++ } ++ default: ++ /* XXX what do we do with an unknown shunt spi? */ ++ break; ++ } /* switch (ntohl(ixs->outgoing_said.spi)) */ ++ return IPSEC_XMIT_STOLEN; ++ } /* if (ixs->outgoing_said.proto == IPPROTO_INT) */ ++ ++ /* ++ The spinlock is to prevent any other process from ++ accessing or deleting the ipsec_sa hash table or any of the ++ ipsec_sa s while we are using and updating them. ++ ++ This is not optimal, but was relatively straightforward ++ at the time. A better way to do it has been planned for ++ more than a year, to lock the hash table and put reference ++ counts on each ipsec_sa instead. This is not likely to happen ++ in KLIPS1 unless a volunteer contributes it, but will be ++ designed into KLIPS2. ++ */ ++ spin_lock(&tdb_lock); ++ ++ ixs->ipsp = ipsec_sa_getbyid(&ixs->outgoing_said); ++ ixs->sa_len = KLIPS_SATOT(debug_tunnel, &ixs->outgoing_said, 0, ixs->sa_txt, sizeof(ixs->sa_txt)); ++ ++ if (ixs->ipsp == NULL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "no ipsec_sa for SA%s: outgoing packet with no SA, dropped.\n", ++ ixs->sa_len ? ixs->sa_txt : " (error)"); ++ if(ixs->stats) { ++ ixs->stats->tx_dropped++; ++ } ++ bundle_stat = IPSEC_XMIT_SAIDNOTFOUND; ++ goto cleanup; ++ } ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "found ipsec_sa -- SA:<%s%s%s> %s\n", ++ IPS_XFORM_NAME(ixs->ipsp), ++ ixs->sa_len ? ixs->sa_txt : " (error)"); ++ ++ /* ++ * How much headroom do we need to be able to apply ++ * all the grouped transforms? ++ */ ++ ixs->ipsq = ixs->ipsp; /* save the head of the ipsec_sa chain */ ++ while (ixs->ipsp) { ++ if (debug_tunnel & DB_TN_XMIT) { ++ ixs->sa_len = KLIPS_SATOT(debug_tunnel, &ixs->ipsp->ips_said, 0, ixs->sa_txt, sizeof(ixs->sa_txt)); ++ if(ixs->sa_len == 0) { ++ strcpy(ixs->sa_txt, "(error)"); ++ } ++ } else { ++ *ixs->sa_txt = 0; ++ ixs->sa_len = 0; ++ } ++ ++ /* If it is in larval state, drop the packet, we cannot process yet. */ ++ if(ixs->ipsp->ips_state == SADB_SASTATE_LARVAL) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "ipsec_sa in larval state for SA:<%s%s%s> %s, cannot be used yet, dropping packet.\n", ++ IPS_XFORM_NAME(ixs->ipsp), ++ ixs->sa_len ? ixs->sa_txt : " (error)"); ++ if(ixs->stats) { ++ ixs->stats->tx_errors++; ++ } ++ bundle_stat = IPSEC_XMIT_SAIDNOTLIVE; ++ goto cleanup; ++ } ++ ++ if(ixs->ipsp->ips_state == SADB_SASTATE_DEAD) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "ipsec_sa in dead state for SA:<%s%s%s> %s, can no longer be used, dropping packet.\n", ++ IPS_XFORM_NAME(ixs->ipsp), ++ ixs->sa_len ? ixs->sa_txt : " (error)"); ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_SAIDNOTLIVE; ++ goto cleanup; ++ } ++ ++ /* If the replay window counter == -1, expire SA, it will roll */ ++ if(ixs->ipsp->ips_replaywin && ixs->ipsp->ips_replaywin_lastseq == -1) { ++ pfkey_expire(ixs->ipsp, 1); ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "replay window counter rolled for SA:<%s%s%s> %s, packet dropped, expiring SA.\n", ++ IPS_XFORM_NAME(ixs->ipsp), ++ ixs->sa_len ? ixs->sa_txt : " (error)"); ++ ipsec_sa_delchain(ixs->ipsp); ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_REPLAYROLLED; ++ goto cleanup; ++ } ++ ++ /* ++ * if this is the first time we are using this SA, mark start time, ++ * and offset hard/soft counters by "now" for later checking. ++ */ ++#if 0 ++ if(ixs->ipsp->ips_life.ipl_usetime.count == 0) { ++ ixs->ipsp->ips_life.ipl_usetime.count = jiffies; ++ ixs->ipsp->ips_life.ipl_usetime.hard += jiffies; ++ ixs->ipsp->ips_life.ipl_usetime.soft += jiffies; ++ } ++#endif ++ ++ ++ if(ipsec_lifetime_check(&ixs->ipsp->ips_life.ipl_bytes, "bytes", ixs->sa_txt, ++ ipsec_life_countbased, ipsec_outgoing, ixs->ipsp) == ipsec_life_harddied || ++ ipsec_lifetime_check(&ixs->ipsp->ips_life.ipl_addtime, "addtime",ixs->sa_txt, ++ ipsec_life_timebased, ipsec_outgoing, ixs->ipsp) == ipsec_life_harddied || ++ ipsec_lifetime_check(&ixs->ipsp->ips_life.ipl_usetime, "usetime",ixs->sa_txt, ++ ipsec_life_timebased, ipsec_outgoing, ixs->ipsp) == ipsec_life_harddied || ++ ipsec_lifetime_check(&ixs->ipsp->ips_life.ipl_packets, "packets",ixs->sa_txt, ++ ipsec_life_countbased, ipsec_outgoing, ixs->ipsp) == ipsec_life_harddied) { ++ ++ ipsec_sa_delchain(ixs->ipsp); ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_LIFETIMEFAILED; ++ goto cleanup; ++ } ++ ++ ++ ixs->headroom = ixs->tailroom = 0; ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "calling room for <%s%s%s>, SA:%s\n", ++ IPS_XFORM_NAME(ixs->ipsp), ++ ixs->sa_len ? ixs->sa_txt : " (error)"); ++ switch(ixs->ipsp->ips_said.proto) { ++#ifdef CONFIG_KLIPS_AH ++ case IPPROTO_AH: ++ ixs->headroom += sizeof(struct ahhdr); ++ break; ++#endif /* CONFIG_KLIPS_AH */ ++#ifdef CONFIG_KLIPS_ESP ++ case IPPROTO_ESP: ++ ixt_e=ixs->ipsp->ips_alg_enc; ++ if (ixt_e) { ++ blocksize = ixt_e->ixt_common.ixt_blocksize; ++ ixs->headroom += ESP_HEADER_LEN + ixt_e->ixt_common.ixt_support.ias_ivlen/8; ++ } ++ else { ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_ESP_BADALG; ++ goto cleanup; ++ } ++ ++ if ((ixt_a=ixs->ipsp->ips_alg_auth)) { ++ ixs->tailroom += AHHMAC_HASHLEN; ++ } else ++ switch(ixs->ipsp->ips_authalg) { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ case AH_MD5: ++ ixs->tailroom += AHHMAC_HASHLEN; ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ case AH_SHA: ++ ixs->tailroom += AHHMAC_HASHLEN; ++ break; ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ case AH_NONE: ++ break; ++ default: ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_AH_BADALG; ++ goto cleanup; ++ } ++ ixs->tailroom += blocksize != 1 ? ++ ((blocksize - ((ixs->pyldsz + 2) % blocksize)) % blocksize) + 2 : ++ ((4 - ((ixs->pyldsz + 2) % 4)) % 4) + 2; ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ if ((ixs->ipsp->ips_natt_type) && (!ixs->natt_type)) { ++ ixs->natt_type = ixs->ipsp->ips_natt_type; ++ ixs->natt_sport = ixs->ipsp->ips_natt_sport; ++ ixs->natt_dport = ixs->ipsp->ips_natt_dport; ++ switch (ixs->natt_type) { ++ case ESPINUDP_WITH_NON_IKE: ++ ixs->natt_head = sizeof(struct udphdr)+(2*sizeof(__u32)); ++ break; ++ ++ case ESPINUDP_WITH_NON_ESP: ++ ixs->natt_head = sizeof(struct udphdr); ++ break; ++ ++ default: ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT ++ , "klips_xmit: invalid nat-t type %d" ++ , ixs->natt_type); ++ bundle_stat = IPSEC_XMIT_ESPUDP_BADTYPE; ++ goto cleanup; ++ ++ break; ++ } ++ ixs->tailroom += ixs->natt_head; ++ } ++#endif ++ break; ++#endif /* !CONFIG_KLIPS_ESP */ ++#ifdef CONFIG_KLIPS_IPIP ++ case IPPROTO_IPIP: ++ ixs->headroom += sizeof(struct iphdr); ++ break; ++#endif /* !CONFIG_KLIPS_IPIP */ ++ case IPPROTO_COMP: ++#ifdef CONFIG_KLIPS_IPCOMP ++ /* ++ We can't predict how much the packet will ++ shrink without doing the actual compression. ++ We could do it here, if we were the first ++ encapsulation in the chain. That might save ++ us a skb_copy_expand, since we might fit ++ into the existing skb then. However, this ++ would be a bit unclean (and this hack has ++ bit us once), so we better not do it. After ++ all, the skb_copy_expand is cheap in ++ comparison to the actual compression. ++ At least we know the packet will not grow. ++ */ ++ break; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ default: ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_BADPROTO; ++ goto cleanup; ++ } ++ ixs->ipsp = ixs->ipsp->ips_onext; ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "Required head,tailroom: %d,%d\n", ++ ixs->headroom, ixs->tailroom); ++ ixs->max_headroom += ixs->headroom; ++ ixs->max_tailroom += ixs->tailroom; ++ ixs->pyldsz += (ixs->headroom + ixs->tailroom); ++ } ++ ixs->ipsp = ixs->ipsq; /* restore the head of the ipsec_sa chain */ ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "existing head,tailroom: %d,%d before applying xforms with head,tailroom: %d,%d .\n", ++ skb_headroom(ixs->skb), skb_tailroom(ixs->skb), ++ ixs->max_headroom, ixs->max_tailroom); ++ ++ ixs->tot_headroom += ixs->max_headroom; ++ ixs->tot_tailroom += ixs->max_tailroom; ++ ++ ixs->mtudiff = ixs->cur_mtu + ixs->tot_headroom + ixs->tot_tailroom - ixs->physmtu; ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "mtu:%d physmtu:%d tothr:%d tottr:%d mtudiff:%d ippkttotlen:%d\n", ++ ixs->cur_mtu, ixs->physmtu, ++ ixs->tot_headroom, ixs->tot_tailroom, ixs->mtudiff, ntohs(ixs->iph->tot_len)); ++ if(ixs->cur_mtu == 0 || ixs->mtudiff > 0) { ++ int newmtu = ixs->physmtu - (ixs->tot_headroom + ((ixs->tot_tailroom + 2) & ~7) + 5); ++ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_info:ipsec_xmit_encap_bundle: " ++ "dev %s mtu of %d decreased by %d to %d\n", ++ ixs->dev ? ixs->dev->name : "ifX", ++ ixs->cur_mtu, ++ ixs->cur_mtu - newmtu, ++ newmtu); ++ ixs->cur_mtu = newmtu; ++ ++ /* this would seem to adjust the MTU of the route as well */ ++#if 0 ++ ixs->skb->dst->pmtu = ixs->prv->mtu; /* RGB */ ++#endif /* 0 */ ++ } ++ ++ /* ++ If the sender is doing PMTU discovery, and the ++ packet doesn't fit within ixs->prv->mtu, notify him ++ (unless it was an ICMP packet, or it was not the ++ zero-offset packet) and send it anyways. ++ ++ Note: buggy firewall configuration may prevent the ++ ICMP packet from getting back. ++ */ ++ if(sysctl_ipsec_icmp ++ && ixs->cur_mtu < ntohs(ixs->iph->tot_len) ++ && (ixs->iph->frag_off & __constant_htons(IP_DF)) ) { ++ int notify = ixs->iph->protocol != IPPROTO_ICMP ++ && (ixs->iph->frag_off & __constant_htons(IP_OFFSET)) == 0; ++ ++#ifdef IPSEC_obey_DF ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "fragmentation needed and DF set; %sdropping packet\n", ++ notify ? "sending ICMP and " : ""); ++ if (notify) ++ ICMP_SEND(ixs->skb, ++ ICMP_DEST_UNREACH, ++ ICMP_FRAG_NEEDED, ++ ixs->cur_mtu, ++ ixs->physdev); ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_CANNOTFRAG; ++ goto cleanup; ++#else /* IPSEC_obey_DF */ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "fragmentation needed and DF set; %spassing packet\n", ++ notify ? "sending ICMP and " : ""); ++ if (notify) ++ ICMP_SEND(ixs->skb, ++ ICMP_DEST_UNREACH, ++ ICMP_FRAG_NEEDED, ++ ixs->cur_mtu, ++ ixs->physdev); ++#endif /* IPSEC_obey_DF */ ++ } ++ ++#ifdef MSS_HACK ++ /* ++ * If this is a transport mode TCP packet with ++ * SYN set, determine an effective MSS based on ++ * AH/ESP overheads determined above. ++ */ ++ if (ixs->iph->protocol == IPPROTO_TCP ++ && ixs->outgoing_said.proto != IPPROTO_IPIP) { ++ struct tcphdr *tcph = ixs->skb->h.th; ++ if (tcph->syn && !tcph->ack) { ++ if(!ipsec_adjust_mss(ixs->skb, tcph, ixs->cur_mtu)) { ++ printk(KERN_WARNING ++ "klips_warning:ipsec_xmit_encap_bundle: " ++ "ipsec_adjust_mss() failed\n"); ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_MSSERR; ++ goto cleanup; ++ } ++ } ++ } ++#endif /* MSS_HACK */ ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ if ((ixs->natt_type) && (ixs->outgoing_said.proto != IPPROTO_IPIP)) { ++ /** ++ * NAT-Traversal and Transport Mode: ++ * we need to correct TCP/UDP checksum ++ * ++ * If we've got NAT-OA, we can fix checksum without recalculation. ++ * If we don't we can zero udp checksum. ++ */ ++ __u32 natt_oa = ixs->ipsp->ips_natt_oa ? ++ ((struct sockaddr_in*)(ixs->ipsp->ips_natt_oa))->sin_addr.s_addr : 0; ++ unsigned int pkt_len = skb_tail_pointer(ixs->skb) - (unsigned char *)ixs->iph; ++ __u16 data_len = pkt_len - (ixs->iph->ihl << 2); ++ switch (ixs->iph->protocol) { ++ case IPPROTO_TCP: ++ if (data_len >= sizeof(struct tcphdr)) { ++ struct tcphdr *tcp = (struct tcphdr *)((__u32 *)ixs->iph+ixs->iph->ihl); ++ if (natt_oa) { ++ __u32 buff[2] = { ~ixs->iph->daddr, natt_oa }; ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "NAT-T & TRANSPORT: " ++ "fix TCP checksum using NAT-OA\n"); ++ tcp->check = csum_fold( ++ csum_partial((unsigned char *)buff, sizeof(buff), ++ tcp->check^0xffff)); ++ } ++ else { ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "NAT-T & TRANSPORT: do not recalc TCP checksum\n"); ++ } ++ } ++ else { ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "NAT-T & TRANSPORT: can't fix TCP checksum\n"); ++ } ++ break; ++ case IPPROTO_UDP: ++ if (data_len >= sizeof(struct udphdr)) { ++ struct udphdr *udp = (struct udphdr *)((__u32 *)ixs->iph+ixs->iph->ihl); ++ if (udp->check == 0) { ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "NAT-T & TRANSPORT: UDP checksum already 0\n"); ++ } ++ else if (natt_oa) { ++ __u32 buff[2] = { ~ixs->iph->daddr, natt_oa }; ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "NAT-T & TRANSPORT: " ++ "fix UDP checksum using NAT-OA\n"); ++ udp->check = csum_fold( ++ csum_partial((unsigned char *)buff, sizeof(buff), ++ udp->check^0xffff)); ++ } ++ else { ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "NAT-T & TRANSPORT: zero UDP checksum\n"); ++ udp->check = 0; ++ } ++ } ++ else { ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "NAT-T & TRANSPORT: can't fix UDP checksum\n"); ++ } ++ break; ++ default: ++ KLIPS_PRINT(debug_tunnel, ++ "klips_debug:ipsec_tunnel_start_xmit: " ++ "NAT-T & TRANSPORT: non TCP/UDP packet -- do nothing\n"); ++ break; ++ } ++ } ++#endif /* CONFIG_IPSEC_NAT_TRAVERSAL */ ++ ++ if(!ixs->hard_header_stripped && ixs->hard_header_len>0) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "allocating %d bytes for hardheader.\n", ++ ixs->hard_header_len); ++ if((ixs->saved_header = kmalloc(ixs->hard_header_len, GFP_ATOMIC)) == NULL) { ++ printk(KERN_WARNING "klips_debug:ipsec_xmit_encap_bundle: " ++ "Failed, tried to allocate %d bytes for temp hard_header.\n", ++ ixs->hard_header_len); ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_ERRMEMALLOC; ++ goto cleanup; ++ } ++ { ++ int i; ++ for (i = 0; i < ixs->hard_header_len; i++) { ++ ixs->saved_header[i] = ixs->skb->data[i]; ++ } ++ } ++ if(ixs->skb->len < ixs->hard_header_len) { ++ printk(KERN_WARNING "klips_error:ipsec_xmit_encap_bundle: " ++ "tried to skb_pull hhlen=%d, %d available. This should never happen, please report.\n", ++ ixs->hard_header_len, (int)(ixs->skb->len)); ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_ESP_PUSHPULLERR; ++ goto cleanup; ++ } ++ skb_pull(ixs->skb, ixs->hard_header_len); ++ ixs->hard_header_stripped = 1; ++ ++/* ixs->iph = (struct iphdr *) (ixs->skb->data); */ ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "head,tailroom: %d,%d after hard_header stripped.\n", ++ skb_headroom(ixs->skb), skb_tailroom(ixs->skb)); ++ KLIPS_IP_PRINT(debug_tunnel & DB_TN_CROUT, ixs->iph); ++ } else { ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "hard header already stripped.\n"); ++ } ++ ++ ixs->ll_headroom = (ixs->hard_header_len + 15) & ~15; ++ ++ if ((skb_headroom(ixs->skb) >= ixs->max_headroom + 2 * ixs->ll_headroom) && ++ (skb_tailroom(ixs->skb) >= ixs->max_tailroom) ++#ifndef NET_21 ++ && ixs->skb->free ++#endif /* !NET_21 */ ++ ) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "data fits in existing skb\n"); ++ } else { ++ struct sk_buff* tskb; ++ ++ if(!ixs->oskb) { ++ ixs->oskb = ixs->skb; ++ } ++ ++ tskb = skb_copy_expand(ixs->skb, ++ /* The need for 2 * link layer length here remains unexplained...RGB */ ++ ixs->max_headroom + 2 * ixs->ll_headroom, ++ ixs->max_tailroom, ++ GFP_ATOMIC); ++ ++ if(tskb && ixs->skb->sk) { ++ skb_set_owner_w(tskb, ixs->skb->sk); ++ } ++ ++ if(ixs->skb != ixs->oskb) { ++ ipsec_kfree_skb(ixs->skb); ++ } ++ ixs->skb = tskb; ++ if (!ixs->skb) { ++ printk(KERN_WARNING ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "Failed, tried to allocate %d head and %d tailroom\n", ++ ixs->max_headroom, ixs->max_tailroom); ++ ixs->stats->tx_errors++; ++ bundle_stat = IPSEC_XMIT_ERRSKBALLOC; ++ goto cleanup; ++ } ++ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT, ++ "klips_debug:ipsec_xmit_encap_bundle: " ++ "head,tailroom: %d,%d after allocation\n", ++ skb_headroom(ixs->skb), skb_tailroom(ixs->skb)); ++ } ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_tunnel & DB_TN_ENCAP) { ++ ipsec_print_ip(ixs->iph); ++ } ++#endif ++ ++ /* ++ * Apply grouped transforms to packet ++ */ ++ while (ixs->ipsp) { ++ enum ipsec_xmit_value encap_stat = IPSEC_XMIT_OK; ++ ++ encap_stat = ipsec_xmit_encap_once(ixs); ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_tunnel & DB_TN_ENCAP) { ++ ipsec_print_ip(ixs->iph); ++ } ++#endif ++ ++ if(encap_stat != IPSEC_XMIT_OK) { ++ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, ++ "klips_debug:ipsec_xmit_encap_bundle: encap_once failed: %d\n", ++ encap_stat); ++ ++ bundle_stat = IPSEC_XMIT_ENCAPFAIL; ++ goto cleanup; ++ } ++ } ++ ++ /* we are done with this SA */ ++ ipsec_sa_put(ixs->ipsp); ++ ++ /* end encapsulation loop here XXX */ ++ cleanup: ++ spin_unlock(&tdb_lock); ++ return bundle_stat; ++} ++ ++/* ++ * $Log: ipsec_xmit.c,v $ ++ * Revision 1.20.2.13 2007-10-30 21:38:56 paul ++ * Use skb_tail_pointer [dhr] ++ * ++ * Revision 1.20.2.12 2007-10-28 00:26:03 paul ++ * Start of fix for 2.6.22+ kernels and skb_tail_pointer() ++ * ++ * Revision 1.20.2.11 2007/10/22 15:40:45 paul ++ * Missing #ifdef CONFIG_KLIPS_ALG [davidm] ++ * ++ * Revision 1.20.2.10 2007/09/05 02:56:10 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.20.2.9 2007/07/06 17:18:43 paul ++ * Fix for authentication field on sent packets has size equals to zero when ++ * using custom auth algorithms. This is bug #811. Patch by "iamscared". ++ * ++ * Revision 1.20.2.8 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.20.2.7 2006/08/24 03:02:01 paul ++ * Compile fixes for when CONFIG_KLIPS_DEBUG is not set. (bug #642) ++ * ++ * Revision 1.20.2.6 2006/07/07 22:09:49 paul ++ * From: Bart Trojanowski ++ * Removing a left over '#else' that split another '#if/#endif' block in two. ++ * ++ * Revision 1.20.2.5 2006/07/07 15:43:17 paul ++ * From: Bart Trojanowski ++ * improved protocol detection in ipsec_print_ip() -- a debug aid. ++ * ++ * Revision 1.20.2.4 2006/04/20 16:33:07 mcr ++ * remove all of CONFIG_KLIPS_ALG --- one can no longer build without it. ++ * Fix in-kernel module compilation. Sub-makefiles do not work. ++ * ++ * Revision 1.20.2.3 2005/11/29 21:52:57 ken ++ * Fix for #518 MTU issues ++ * ++ * Revision 1.20.2.2 2005/11/27 21:41:03 paul ++ * Pull down TTL fixes from head. this fixes "Unknown symbol sysctl_ip_default_ttl"in for klips as module. ++ * ++ * Revision 1.20.2.1 2005/08/27 23:40:00 paul ++ * recommited HAVE_SOCK_SECURITY fixes for linux 2.6.13 ++ * ++ * Revision 1.20 2005/07/12 15:39:27 paul ++ * include asm/uaccess.h for VERIFY_WRITE ++ * ++ * Revision 1.19 2005/05/24 01:02:35 mcr ++ * some refactoring/simplification of situation where alg ++ * is not found. ++ * ++ * Revision 1.18 2005/05/23 23:52:33 mcr ++ * adjust comments, add additional debugging. ++ * ++ * Revision 1.17 2005/05/23 22:57:23 mcr ++ * removed explicit 3DES support. ++ * ++ * Revision 1.16 2005/05/21 03:29:15 mcr ++ * fixed warning about unused zeroes if AH is off. ++ * ++ * Revision 1.15 2005/05/20 16:47:59 mcr ++ * include asm/checksum.h to get ip_fast_csum macro. ++ * ++ * Revision 1.14 2005/05/11 01:43:03 mcr ++ * removed "poor-man"s OOP in favour of proper C structures. ++ * ++ * Revision 1.13 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.12 2005/04/15 01:28:34 mcr ++ * use ipsec_dmp_block. ++ * ++ * Revision 1.11 2005/01/26 00:50:35 mcr ++ * adjustment of confusion of CONFIG_IPSEC_NAT vs CONFIG_KLIPS_NAT, ++ * and make sure that NAT_TRAVERSAL is set as well to match ++ * userspace compiles of code. ++ * ++ * Revision 1.10 2004/09/13 17:55:21 ken ++ * MD5* -> osMD5* ++ * ++ * Revision 1.9 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.8 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.7 2004/02/03 03:13:41 mcr ++ * mark invalid encapsulation states. ++ * ++ * Revision 1.6.2.1 2003/12/22 15:25:52 jjo ++ * Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.6 2003/12/10 01:14:27 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.5 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.4.4.2 2003/10/29 01:37:39 mcr ++ * when creating %hold from %trap, only make the %hold as ++ * specific as the %trap was - so if the protocol and ports ++ * were wildcards, then the %hold will be too. ++ * ++ * Revision 1.4.4.1 2003/09/21 13:59:56 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.4 2003/06/20 02:28:10 mcr ++ * misstype of variable name, not detected by module build. ++ * ++ * Revision 1.3 2003/06/20 01:42:21 mcr ++ * added counters to measure how many ACQUIREs we send to pluto, ++ * and how many are successfully sent. ++ * ++ * Revision 1.2 2003/04/03 17:38:35 rgb ++ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}. ++ * Normalised coding style. ++ * Simplified logic and reduced duplication of code. ++ * ++ * Revision 1.1 2003/02/12 19:31:23 rgb ++ * Refactored from ipsec_tunnel.c ++ * ++ * Local Variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/match586.S Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,357 @@ ++/* match.s -- Pentium-optimized version of longest_match() ++ * Written for zlib 1.1.2 ++ * Copyright (C) 1998 Brian Raiter ++ * ++ * This is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License. ++ */ ++ ++#ifndef NO_UNDERLINE ++#define match_init _ipcomp_match_init ++#define longest_match _ipcomp_longest_match ++#else ++#define match_init ipcomp_match_init ++#define longest_match ipcomp_longest_match ++#endif ++ ++#define MAX_MATCH (258) ++#define MIN_MATCH (3) ++#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) ++#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) ++ ++/* stack frame offsets */ ++ ++#define wmask 0 /* local copy of s->wmask */ ++#define window 4 /* local copy of s->window */ ++#define windowbestlen 8 /* s->window + bestlen */ ++#define chainlenscanend 12 /* high word: current chain len */ ++ /* low word: last bytes sought */ ++#define scanstart 16 /* first two bytes of string */ ++#define scanalign 20 /* dword-misalignment of string */ ++#define nicematch 24 /* a good enough match size */ ++#define bestlen 28 /* size of best match so far */ ++#define scan 32 /* ptr to string wanting match */ ++ ++#define LocalVarsSize (36) ++/* saved ebx 36 */ ++/* saved edi 40 */ ++/* saved esi 44 */ ++/* saved ebp 48 */ ++/* return address 52 */ ++#define deflatestate 56 /* the function arguments */ ++#define curmatch 60 ++ ++/* Offsets for fields in the deflate_state structure. These numbers ++ * are calculated from the definition of deflate_state, with the ++ * assumption that the compiler will dword-align the fields. (Thus, ++ * changing the definition of deflate_state could easily cause this ++ * program to crash horribly, without so much as a warning at ++ * compile time. Sigh.) ++ */ ++#define dsWSize 36 ++#define dsWMask 44 ++#define dsWindow 48 ++#define dsPrev 56 ++#define dsMatchLen 88 ++#define dsPrevMatch 92 ++#define dsStrStart 100 ++#define dsMatchStart 104 ++#define dsLookahead 108 ++#define dsPrevLen 112 ++#define dsMaxChainLen 116 ++#define dsGoodMatch 132 ++#define dsNiceMatch 136 ++ ++ ++.file "match.S" ++ ++.globl match_init, longest_match ++ ++.text ++ ++/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ ++ ++longest_match: ++ ++/* Save registers that the compiler may be using, and adjust %esp to */ ++/* make room for our stack frame. */ ++ ++ pushl %ebp ++ pushl %edi ++ pushl %esi ++ pushl %ebx ++ subl $LocalVarsSize, %esp ++ ++/* Retrieve the function arguments. %ecx will hold cur_match */ ++/* throughout the entire function. %edx will hold the pointer to the */ ++/* deflate_state structure during the function's setup (before */ ++/* entering the main loop). */ ++ ++ movl deflatestate(%esp), %edx ++ movl curmatch(%esp), %ecx ++ ++/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ ++ ++ movl dsNiceMatch(%edx), %eax ++ movl dsLookahead(%edx), %ebx ++ cmpl %eax, %ebx ++ jl LookaheadLess ++ movl %eax, %ebx ++LookaheadLess: movl %ebx, nicematch(%esp) ++ ++/* register Bytef *scan = s->window + s->strstart; */ ++ ++ movl dsWindow(%edx), %esi ++ movl %esi, window(%esp) ++ movl dsStrStart(%edx), %ebp ++ lea (%esi,%ebp), %edi ++ movl %edi, scan(%esp) ++ ++/* Determine how many bytes the scan ptr is off from being */ ++/* dword-aligned. */ ++ ++ movl %edi, %eax ++ negl %eax ++ andl $3, %eax ++ movl %eax, scanalign(%esp) ++ ++/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ ++/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ ++ ++ movl dsWSize(%edx), %eax ++ subl $MIN_LOOKAHEAD, %eax ++ subl %eax, %ebp ++ jg LimitPositive ++ xorl %ebp, %ebp ++LimitPositive: ++ ++/* unsigned chain_length = s->max_chain_length; */ ++/* if (s->prev_length >= s->good_match) { */ ++/* chain_length >>= 2; */ ++/* } */ ++ ++ movl dsPrevLen(%edx), %eax ++ movl dsGoodMatch(%edx), %ebx ++ cmpl %ebx, %eax ++ movl dsMaxChainLen(%edx), %ebx ++ jl LastMatchGood ++ shrl $2, %ebx ++LastMatchGood: ++ ++/* chainlen is decremented once beforehand so that the function can */ ++/* use the sign flag instead of the zero flag for the exit test. */ ++/* It is then shifted into the high word, to make room for the scanend */ ++/* scanend value, which it will always accompany. */ ++ ++ decl %ebx ++ shll $16, %ebx ++ ++/* int best_len = s->prev_length; */ ++ ++ movl dsPrevLen(%edx), %eax ++ movl %eax, bestlen(%esp) ++ ++/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ ++ ++ addl %eax, %esi ++ movl %esi, windowbestlen(%esp) ++ ++/* register ush scan_start = *(ushf*)scan; */ ++/* register ush scan_end = *(ushf*)(scan+best_len-1); */ ++ ++ movw (%edi), %bx ++ movw %bx, scanstart(%esp) ++ movw -1(%edi,%eax), %bx ++ movl %ebx, chainlenscanend(%esp) ++ ++/* Posf *prev = s->prev; */ ++/* uInt wmask = s->w_mask; */ ++ ++ movl dsPrev(%edx), %edi ++ movl dsWMask(%edx), %edx ++ mov %edx, wmask(%esp) ++ ++/* Jump into the main loop. */ ++ ++ jmp LoopEntry ++ ++.balign 16 ++ ++/* do { ++ * match = s->window + cur_match; ++ * if (*(ushf*)(match+best_len-1) != scan_end || ++ * *(ushf*)match != scan_start) continue; ++ * [...] ++ * } while ((cur_match = prev[cur_match & wmask]) > limit ++ * && --chain_length != 0); ++ * ++ * Here is the inner loop of the function. The function will spend the ++ * majority of its time in this loop, and majority of that time will ++ * be spent in the first ten instructions. ++ * ++ * Within this loop: ++ * %ebx = chainlenscanend - i.e., ((chainlen << 16) | scanend) ++ * %ecx = curmatch ++ * %edx = curmatch & wmask ++ * %esi = windowbestlen - i.e., (window + bestlen) ++ * %edi = prev ++ * %ebp = limit ++ * ++ * Two optimization notes on the choice of instructions: ++ * ++ * The first instruction uses a 16-bit address, which costs an extra, ++ * unpairable cycle. This is cheaper than doing a 32-bit access and ++ * zeroing the high word, due to the 3-cycle misalignment penalty which ++ * would occur half the time. This also turns out to be cheaper than ++ * doing two separate 8-bit accesses, as the memory is so rarely in the ++ * L1 cache. ++ * ++ * The window buffer, however, apparently spends a lot of time in the ++ * cache, and so it is faster to retrieve the word at the end of the ++ * match string with two 8-bit loads. The instructions that test the ++ * word at the beginning of the match string, however, are executed ++ * much less frequently, and there it was cheaper to use 16-bit ++ * instructions, which avoided the necessity of saving off and ++ * subsequently reloading one of the other registers. ++ */ ++LookupLoop: ++ /* 1 U & V */ ++ movw (%edi,%edx,2), %cx /* 2 U pipe */ ++ movl wmask(%esp), %edx /* 2 V pipe */ ++ cmpl %ebp, %ecx /* 3 U pipe */ ++ jbe LeaveNow /* 3 V pipe */ ++ subl $0x00010000, %ebx /* 4 U pipe */ ++ js LeaveNow /* 4 V pipe */ ++LoopEntry: movb -1(%esi,%ecx), %al /* 5 U pipe */ ++ andl %ecx, %edx /* 5 V pipe */ ++ cmpb %bl, %al /* 6 U pipe */ ++ jnz LookupLoop /* 6 V pipe */ ++ movb (%esi,%ecx), %ah ++ cmpb %bh, %ah ++ jnz LookupLoop ++ movl window(%esp), %eax ++ movw (%eax,%ecx), %ax ++ cmpw scanstart(%esp), %ax ++ jnz LookupLoop ++ ++/* Store the current value of chainlen. */ ++ ++ movl %ebx, chainlenscanend(%esp) ++ ++/* Point %edi to the string under scrutiny, and %esi to the string we */ ++/* are hoping to match it up with. In actuality, %esi and %edi are */ ++/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ ++/* initialized to -(MAX_MATCH_8 - scanalign). */ ++ ++ movl window(%esp), %esi ++ movl scan(%esp), %edi ++ addl %ecx, %esi ++ movl scanalign(%esp), %eax ++ movl $(-MAX_MATCH_8), %edx ++ lea MAX_MATCH_8(%edi,%eax), %edi ++ lea MAX_MATCH_8(%esi,%eax), %esi ++ ++/* Test the strings for equality, 8 bytes at a time. At the end, ++ * adjust %edx so that it is offset to the exact byte that mismatched. ++ * ++ * We already know at this point that the first three bytes of the ++ * strings match each other, and they can be safely passed over before ++ * starting the compare loop. So what this code does is skip over 0-3 ++ * bytes, as much as necessary in order to dword-align the %edi ++ * pointer. (%esi will still be misaligned three times out of four.) ++ * ++ * It should be confessed that this loop usually does not represent ++ * much of the total running time. Replacing it with a more ++ * straightforward "rep cmpsb" would not drastically degrade ++ * performance. ++ */ ++LoopCmps: ++ movl (%esi,%edx), %eax ++ movl (%edi,%edx), %ebx ++ xorl %ebx, %eax ++ jnz LeaveLoopCmps ++ movl 4(%esi,%edx), %eax ++ movl 4(%edi,%edx), %ebx ++ xorl %ebx, %eax ++ jnz LeaveLoopCmps4 ++ addl $8, %edx ++ jnz LoopCmps ++ jmp LenMaximum ++LeaveLoopCmps4: addl $4, %edx ++LeaveLoopCmps: testl $0x0000FFFF, %eax ++ jnz LenLower ++ addl $2, %edx ++ shrl $16, %eax ++LenLower: subb $1, %al ++ adcl $0, %edx ++ ++/* Calculate the length of the match. If it is longer than MAX_MATCH, */ ++/* then automatically accept it as the best possible match and leave. */ ++ ++ lea (%edi,%edx), %eax ++ movl scan(%esp), %edi ++ subl %edi, %eax ++ cmpl $MAX_MATCH, %eax ++ jge LenMaximum ++ ++/* If the length of the match is not longer than the best match we */ ++/* have so far, then forget it and return to the lookup loop. */ ++ ++ movl deflatestate(%esp), %edx ++ movl bestlen(%esp), %ebx ++ cmpl %ebx, %eax ++ jg LongerMatch ++ movl chainlenscanend(%esp), %ebx ++ movl windowbestlen(%esp), %esi ++ movl dsPrev(%edx), %edi ++ movl wmask(%esp), %edx ++ andl %ecx, %edx ++ jmp LookupLoop ++ ++/* s->match_start = cur_match; */ ++/* best_len = len; */ ++/* if (len >= nice_match) break; */ ++/* scan_end = *(ushf*)(scan+best_len-1); */ ++ ++LongerMatch: movl nicematch(%esp), %ebx ++ movl %eax, bestlen(%esp) ++ movl %ecx, dsMatchStart(%edx) ++ cmpl %ebx, %eax ++ jge LeaveNow ++ movl window(%esp), %esi ++ addl %eax, %esi ++ movl %esi, windowbestlen(%esp) ++ movl chainlenscanend(%esp), %ebx ++ movw -1(%edi,%eax), %bx ++ movl dsPrev(%edx), %edi ++ movl %ebx, chainlenscanend(%esp) ++ movl wmask(%esp), %edx ++ andl %ecx, %edx ++ jmp LookupLoop ++ ++/* Accept the current string, with the maximum possible length. */ ++ ++LenMaximum: movl deflatestate(%esp), %edx ++ movl $MAX_MATCH, bestlen(%esp) ++ movl %ecx, dsMatchStart(%edx) ++ ++/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ ++/* return s->lookahead; */ ++ ++LeaveNow: ++ movl deflatestate(%esp), %edx ++ movl bestlen(%esp), %ebx ++ movl dsLookahead(%edx), %eax ++ cmpl %eax, %ebx ++ jg LookaheadRet ++ movl %ebx, %eax ++LookaheadRet: ++ ++/* Restore the stack and return from whence we came. */ ++ ++ addl $LocalVarsSize, %esp ++ popl %ebx ++ popl %esi ++ popl %edi ++ popl %ebp ++match_init: ret +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/match686.S Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,330 @@ ++/* match.s -- Pentium-Pro-optimized version of longest_match() ++ * Written for zlib 1.1.2 ++ * Copyright (C) 1998 Brian Raiter ++ * ++ * This is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License. ++ */ ++ ++#ifndef NO_UNDERLINE ++#define match_init _ipcomp_match_init ++#define longest_match _ipcomp_longest_match ++#else ++#define match_init ipcomp_match_init ++#define longest_match ipcomp_longest_match ++#endif ++ ++#define MAX_MATCH (258) ++#define MIN_MATCH (3) ++#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) ++#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) ++ ++/* stack frame offsets */ ++ ++#define chainlenwmask 0 /* high word: current chain len */ ++ /* low word: s->wmask */ ++#define window 4 /* local copy of s->window */ ++#define windowbestlen 8 /* s->window + bestlen */ ++#define scanstart 16 /* first two bytes of string */ ++#define scanend 12 /* last two bytes of string */ ++#define scanalign 20 /* dword-misalignment of string */ ++#define nicematch 24 /* a good enough match size */ ++#define bestlen 28 /* size of best match so far */ ++#define scan 32 /* ptr to string wanting match */ ++ ++#define LocalVarsSize (36) ++/* saved ebx 36 */ ++/* saved edi 40 */ ++/* saved esi 44 */ ++/* saved ebp 48 */ ++/* return address 52 */ ++#define deflatestate 56 /* the function arguments */ ++#define curmatch 60 ++ ++/* Offsets for fields in the deflate_state structure. These numbers ++ * are calculated from the definition of deflate_state, with the ++ * assumption that the compiler will dword-align the fields. (Thus, ++ * changing the definition of deflate_state could easily cause this ++ * program to crash horribly, without so much as a warning at ++ * compile time. Sigh.) ++ */ ++#define dsWSize 36 ++#define dsWMask 44 ++#define dsWindow 48 ++#define dsPrev 56 ++#define dsMatchLen 88 ++#define dsPrevMatch 92 ++#define dsStrStart 100 ++#define dsMatchStart 104 ++#define dsLookahead 108 ++#define dsPrevLen 112 ++#define dsMaxChainLen 116 ++#define dsGoodMatch 132 ++#define dsNiceMatch 136 ++ ++ ++.file "match.S" ++ ++.globl match_init, longest_match ++ ++.text ++ ++/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ ++ ++longest_match: ++ ++/* Save registers that the compiler may be using, and adjust %esp to */ ++/* make room for our stack frame. */ ++ ++ pushl %ebp ++ pushl %edi ++ pushl %esi ++ pushl %ebx ++ subl $LocalVarsSize, %esp ++ ++/* Retrieve the function arguments. %ecx will hold cur_match */ ++/* throughout the entire function. %edx will hold the pointer to the */ ++/* deflate_state structure during the function's setup (before */ ++/* entering the main loop). */ ++ ++ movl deflatestate(%esp), %edx ++ movl curmatch(%esp), %ecx ++ ++/* uInt wmask = s->w_mask; */ ++/* unsigned chain_length = s->max_chain_length; */ ++/* if (s->prev_length >= s->good_match) { */ ++/* chain_length >>= 2; */ ++/* } */ ++ ++ movl dsPrevLen(%edx), %eax ++ movl dsGoodMatch(%edx), %ebx ++ cmpl %ebx, %eax ++ movl dsWMask(%edx), %eax ++ movl dsMaxChainLen(%edx), %ebx ++ jl LastMatchGood ++ shrl $2, %ebx ++LastMatchGood: ++ ++/* chainlen is decremented once beforehand so that the function can */ ++/* use the sign flag instead of the zero flag for the exit test. */ ++/* It is then shifted into the high word, to make room for the wmask */ ++/* value, which it will always accompany. */ ++ ++ decl %ebx ++ shll $16, %ebx ++ orl %eax, %ebx ++ movl %ebx, chainlenwmask(%esp) ++ ++/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ ++ ++ movl dsNiceMatch(%edx), %eax ++ movl dsLookahead(%edx), %ebx ++ cmpl %eax, %ebx ++ jl LookaheadLess ++ movl %eax, %ebx ++LookaheadLess: movl %ebx, nicematch(%esp) ++ ++/* register Bytef *scan = s->window + s->strstart; */ ++ ++ movl dsWindow(%edx), %esi ++ movl %esi, window(%esp) ++ movl dsStrStart(%edx), %ebp ++ lea (%esi,%ebp), %edi ++ movl %edi, scan(%esp) ++ ++/* Determine how many bytes the scan ptr is off from being */ ++/* dword-aligned. */ ++ ++ movl %edi, %eax ++ negl %eax ++ andl $3, %eax ++ movl %eax, scanalign(%esp) ++ ++/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ ++/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ ++ ++ movl dsWSize(%edx), %eax ++ subl $MIN_LOOKAHEAD, %eax ++ subl %eax, %ebp ++ jg LimitPositive ++ xorl %ebp, %ebp ++LimitPositive: ++ ++/* int best_len = s->prev_length; */ ++ ++ movl dsPrevLen(%edx), %eax ++ movl %eax, bestlen(%esp) ++ ++/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ ++ ++ addl %eax, %esi ++ movl %esi, windowbestlen(%esp) ++ ++/* register ush scan_start = *(ushf*)scan; */ ++/* register ush scan_end = *(ushf*)(scan+best_len-1); */ ++/* Posf *prev = s->prev; */ ++ ++ movzwl (%edi), %ebx ++ movl %ebx, scanstart(%esp) ++ movzwl -1(%edi,%eax), %ebx ++ movl %ebx, scanend(%esp) ++ movl dsPrev(%edx), %edi ++ ++/* Jump into the main loop. */ ++ ++ movl chainlenwmask(%esp), %edx ++ jmp LoopEntry ++ ++.balign 16 ++ ++/* do { ++ * match = s->window + cur_match; ++ * if (*(ushf*)(match+best_len-1) != scan_end || ++ * *(ushf*)match != scan_start) continue; ++ * [...] ++ * } while ((cur_match = prev[cur_match & wmask]) > limit ++ * && --chain_length != 0); ++ * ++ * Here is the inner loop of the function. The function will spend the ++ * majority of its time in this loop, and majority of that time will ++ * be spent in the first ten instructions. ++ * ++ * Within this loop: ++ * %ebx = scanend ++ * %ecx = curmatch ++ * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) ++ * %esi = windowbestlen - i.e., (window + bestlen) ++ * %edi = prev ++ * %ebp = limit ++ */ ++LookupLoop: ++ andl %edx, %ecx ++ movzwl (%edi,%ecx,2), %ecx ++ cmpl %ebp, %ecx ++ jbe LeaveNow ++ subl $0x00010000, %edx ++ js LeaveNow ++LoopEntry: movzwl -1(%esi,%ecx), %eax ++ cmpl %ebx, %eax ++ jnz LookupLoop ++ movl window(%esp), %eax ++ movzwl (%eax,%ecx), %eax ++ cmpl scanstart(%esp), %eax ++ jnz LookupLoop ++ ++/* Store the current value of chainlen. */ ++ ++ movl %edx, chainlenwmask(%esp) ++ ++/* Point %edi to the string under scrutiny, and %esi to the string we */ ++/* are hoping to match it up with. In actuality, %esi and %edi are */ ++/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ ++/* initialized to -(MAX_MATCH_8 - scanalign). */ ++ ++ movl window(%esp), %esi ++ movl scan(%esp), %edi ++ addl %ecx, %esi ++ movl scanalign(%esp), %eax ++ movl $(-MAX_MATCH_8), %edx ++ lea MAX_MATCH_8(%edi,%eax), %edi ++ lea MAX_MATCH_8(%esi,%eax), %esi ++ ++/* Test the strings for equality, 8 bytes at a time. At the end, ++ * adjust %edx so that it is offset to the exact byte that mismatched. ++ * ++ * We already know at this point that the first three bytes of the ++ * strings match each other, and they can be safely passed over before ++ * starting the compare loop. So what this code does is skip over 0-3 ++ * bytes, as much as necessary in order to dword-align the %edi ++ * pointer. (%esi will still be misaligned three times out of four.) ++ * ++ * It should be confessed that this loop usually does not represent ++ * much of the total running time. Replacing it with a more ++ * straightforward "rep cmpsb" would not drastically degrade ++ * performance. ++ */ ++LoopCmps: ++ movl (%esi,%edx), %eax ++ xorl (%edi,%edx), %eax ++ jnz LeaveLoopCmps ++ movl 4(%esi,%edx), %eax ++ xorl 4(%edi,%edx), %eax ++ jnz LeaveLoopCmps4 ++ addl $8, %edx ++ jnz LoopCmps ++ jmp LenMaximum ++LeaveLoopCmps4: addl $4, %edx ++LeaveLoopCmps: testl $0x0000FFFF, %eax ++ jnz LenLower ++ addl $2, %edx ++ shrl $16, %eax ++LenLower: subb $1, %al ++ adcl $0, %edx ++ ++/* Calculate the length of the match. If it is longer than MAX_MATCH, */ ++/* then automatically accept it as the best possible match and leave. */ ++ ++ lea (%edi,%edx), %eax ++ movl scan(%esp), %edi ++ subl %edi, %eax ++ cmpl $MAX_MATCH, %eax ++ jge LenMaximum ++ ++/* If the length of the match is not longer than the best match we */ ++/* have so far, then forget it and return to the lookup loop. */ ++ ++ movl deflatestate(%esp), %edx ++ movl bestlen(%esp), %ebx ++ cmpl %ebx, %eax ++ jg LongerMatch ++ movl windowbestlen(%esp), %esi ++ movl dsPrev(%edx), %edi ++ movl scanend(%esp), %ebx ++ movl chainlenwmask(%esp), %edx ++ jmp LookupLoop ++ ++/* s->match_start = cur_match; */ ++/* best_len = len; */ ++/* if (len >= nice_match) break; */ ++/* scan_end = *(ushf*)(scan+best_len-1); */ ++ ++LongerMatch: movl nicematch(%esp), %ebx ++ movl %eax, bestlen(%esp) ++ movl %ecx, dsMatchStart(%edx) ++ cmpl %ebx, %eax ++ jge LeaveNow ++ movl window(%esp), %esi ++ addl %eax, %esi ++ movl %esi, windowbestlen(%esp) ++ movzwl -1(%edi,%eax), %ebx ++ movl dsPrev(%edx), %edi ++ movl %ebx, scanend(%esp) ++ movl chainlenwmask(%esp), %edx ++ jmp LookupLoop ++ ++/* Accept the current string, with the maximum possible length. */ ++ ++LenMaximum: movl deflatestate(%esp), %edx ++ movl $MAX_MATCH, bestlen(%esp) ++ movl %ecx, dsMatchStart(%edx) ++ ++/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ ++/* return s->lookahead; */ ++ ++LeaveNow: ++ movl deflatestate(%esp), %edx ++ movl bestlen(%esp), %ebx ++ movl dsLookahead(%edx), %eax ++ cmpl %eax, %ebx ++ jg LookaheadRet ++ movl %ebx, %eax ++LookaheadRet: ++ ++/* Restore the stack and return from whence we came. */ ++ ++ addl $LocalVarsSize, %esp ++ popl %ebx ++ popl %esi ++ popl %edi ++ popl %ebp ++match_init: ret +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/null/ipsec_alg_null.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,147 @@ ++/* ++ * ipsec_alg NULL cipher stubs ++ * ++ * Author: JuanJo Ciarlante ++ * ++ * $Id: ipsec_alg_null.c,v 1.1.2.2 2008-01-11 21:07:23 paul Exp $ ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ */ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++/* ++ * special case: ipsec core modular with this static algo inside: ++ * must avoid MODULE magic for this file ++ */ ++#if defined(CONFIG_KLIPS_MODULE) && defined(CONFIG_KLIPS_ENC_NULL) ++#undef MODULE ++#endif ++ ++#include ++#include ++ ++#include /* printk() */ ++#include /* error codes */ ++#include /* size_t */ ++#include ++ ++/* Check if __exit is defined, if not null it */ ++#ifndef __exit ++#define __exit ++#endif ++ ++/* Low freeswan header coupling */ ++#include "openswan/ipsec_alg.h" ++ ++#define ESP_NULL 11 /* from ipsec drafts */ ++#define ESP_NULL_BLK_LEN 1 ++ ++MODULE_AUTHOR("JuanJo Ciarlante "); ++static int debug_null=0; ++static int test_null=0; ++#ifdef module_param ++module_param(debug_null, int, 0600); ++module_param(test_null, int, 0600); ++#else ++MODULE_PARM(debug_null, "i"); ++MODULE_PARM(test_null, "i"); ++#endif ++ ++typedef int null_context; ++ ++struct null_eks{ ++ null_context null_ctx; ++}; ++static int _null_set_key(struct ipsec_alg_enc *alg, ++ __u8 * key_e, const __u8 * key, ++ size_t keysize) { ++ null_context *ctx=&((struct null_eks*)key_e)->null_ctx; ++ if (debug_null > 0) ++ printk(KERN_DEBUG "klips_debug:_null_set_key:" ++ "key_e=%p key=%p keysize=%d\n", ++ key_e, key, keysize); ++ *ctx = 1; ++ return 0; ++} ++static int _null_cbc_encrypt(struct ipsec_alg_enc *alg, ++ __u8 * key_e, __u8 * in, int ilen, const __u8 * iv, ++ int encrypt) { ++ null_context *ctx=&((struct null_eks*)key_e)->null_ctx; ++ if (debug_null > 0) ++ printk(KERN_DEBUG "klips_debug:_null_cbc_encrypt:" ++ "key_e=%p in=%p ilen=%d iv=%p encrypt=%d\n", ++ key_e, in, ilen, iv, encrypt); ++ (*ctx)++; ++ return ilen; ++} ++static struct ipsec_alg_enc ipsec_alg_NULL = { ++ ixt_common: { ixt_version: IPSEC_ALG_VERSION, ++ ixt_refcnt: ATOMIC_INIT(0), ++ ixt_name: "null", ++ ixt_blocksize: ESP_NULL_BLK_LEN, ++ ixt_support: { ++ ias_exttype: IPSEC_ALG_TYPE_ENCRYPT, ++ ias_id: ESP_NULL, ++ ias_ivlen: 0, ++ ias_keyminbits: 0, ++ ias_keymaxbits: 0, ++ }, ++ }, ++#if defined(CONFIG_KLIPS_ENC_NULL_MODULE) ++ ixt_module: THIS_MODULE, ++#endif ++ ixt_e_keylen: 0, ++ ixt_e_ctx_size: sizeof(null_context), ++ ixt_e_set_key: _null_set_key, ++ ixt_e_cbc_encrypt:_null_cbc_encrypt, ++}; ++ ++#if defined(CONFIG_KLIPS_ENC_NULL_MODULE) ++IPSEC_ALG_MODULE_INIT_MOD( ipsec_null_init ) ++#else ++IPSEC_ALG_MODULE_INIT_STATIC( ipsec_null_init ) ++#endif ++{ ++ int ret, test_ret; ++ ret=register_ipsec_alg_enc(&ipsec_alg_NULL); ++ printk("ipsec_null_init(alg_type=%d alg_id=%d name=%s): ret=%d\n", ++ ipsec_alg_NULL.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_NULL.ixt_common.ixt_support.ias_id, ++ ipsec_alg_NULL.ixt_common.ixt_name, ++ ret); ++ if (ret==0 && test_null) { ++ test_ret=ipsec_alg_test( ++ ipsec_alg_NULL.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_NULL.ixt_common.ixt_support.ias_id, ++ test_null); ++ printk("ipsec_null_init(alg_type=%d alg_id=%d): test_ret=%d\n", ++ ipsec_alg_NULL.ixt_common.ixt_support.ias_exttype, ++ ipsec_alg_NULL.ixt_common.ixt_support.ias_id, ++ test_ret); ++ } ++ return ret; ++} ++#if defined(CONFIG_KLIPS_ENC_NULL_MODULE) ++IPSEC_ALG_MODULE_EXIT_MOD( ipsec_null_fini ) ++#else ++IPSEC_ALG_MODULE_EXIT_STATIC( ipsec_null_fini ) ++#endif ++{ ++ unregister_ipsec_alg_enc(&ipsec_alg_NULL); ++ return; ++} ++#ifdef MODULE_LICENSE ++MODULE_LICENSE("GPL"); ++#endif +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/pfkey_v2.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,2043 @@ ++/* ++ * @(#) RFC2367 PF_KEYv2 Key management API domain socket I/F ++ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: pfkey_v2.c,v 1.97.2.16 2007-10-31 19:57:41 paul Exp $ ++ */ ++ ++/* ++ * Template from /usr/src/linux-2.0.36/net/unix/af_unix.c. ++ * Hints from /usr/src/linux-2.0.36/net/ipv4/udp.c. ++ */ ++ ++#define __NO_VERSION__ ++#include ++#include ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++ ++#include "openswan/ipsec_param.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* struct socket */ ++#include ++#include ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include ++#include ++#include ++#include /* struct sock */ ++#include ++/* #include */ ++#include ++#ifdef CONFIG_PROC_FS ++# include ++#endif /* CONFIG_PROC_FS */ ++ ++#include ++ ++#include ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipsec_kern24.h" ++ ++#ifdef CONFIG_KLIPS_DEBUG ++int debug_pfkey = 0; ++extern int sysctl_ipsec_debug_verbose; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0) ++ ++#ifndef SOCKOPS_WRAPPED ++#define SOCKOPS_WRAPPED(name) name ++#endif /* SOCKOPS_WRAPPED */ ++ ++#ifdef NET_26 ++static rwlock_t pfkey_sock_lock = RW_LOCK_UNLOCKED; ++HLIST_HEAD(pfkey_sock_list); ++static DECLARE_WAIT_QUEUE_HEAD(pfkey_sock_wait); ++static atomic_t pfkey_sock_users = ATOMIC_INIT(0); ++#else ++struct sock *pfkey_sock_list = NULL; ++#endif ++ ++struct supported_list *pfkey_supported_list[SADB_SATYPE_MAX+1]; ++ ++struct socket_list *pfkey_open_sockets = NULL; ++struct socket_list *pfkey_registered_sockets[SADB_SATYPE_MAX+1]; ++ ++int pfkey_msg_interp(struct sock *, struct sadb_msg *, struct sadb_msg **); ++ ++DEBUG_NO_STATIC int pfkey_create(struct socket *sock, int protocol); ++DEBUG_NO_STATIC int pfkey_shutdown(struct socket *sock, int mode); ++DEBUG_NO_STATIC int pfkey_release(struct socket *sock); ++ ++#ifdef NET_26 ++DEBUG_NO_STATIC int pfkey_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len); ++DEBUG_NO_STATIC int pfkey_recvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg ++ , size_t size, int flags); ++#else ++DEBUG_NO_STATIC int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm); ++DEBUG_NO_STATIC int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags, struct scm_cookie *scm); ++#endif ++ ++struct net_proto_family pfkey_family_ops = { ++#ifdef NETDEV_23 ++ .family = PF_KEY, ++ .create = pfkey_create, ++#ifdef NET_26 ++ .owner = THIS_MODULE, ++#endif ++#else ++ PF_KEY, ++ pfkey_create ++#endif ++}; ++ ++struct proto_ops SOCKOPS_WRAPPED(pfkey_ops) = { ++#ifdef NETDEV_23 ++ family: PF_KEY, ++#ifdef NET_26 ++ owner: THIS_MODULE, ++#endif ++ release: pfkey_release, ++ bind: sock_no_bind, ++ connect: sock_no_connect, ++ socketpair: sock_no_socketpair, ++ accept: sock_no_accept, ++ getname: sock_no_getname, ++ poll: datagram_poll, ++ ioctl: sock_no_ioctl, ++ listen: sock_no_listen, ++ shutdown: pfkey_shutdown, ++ setsockopt: sock_no_setsockopt, ++ getsockopt: sock_no_getsockopt, ++ sendmsg: pfkey_sendmsg, ++ recvmsg: pfkey_recvmsg, ++ mmap: sock_no_mmap, ++#else /* NETDEV_23 */ ++ PF_KEY, ++ sock_no_dup, ++ pfkey_release, ++ sock_no_bind, ++ sock_no_connect, ++ sock_no_socketpair, ++ sock_no_accept, ++ sock_no_getname, ++ datagram_poll, ++ sock_no_ioctl, ++ sock_no_listen, ++ pfkey_shutdown, ++ sock_no_setsockopt, ++ sock_no_getsockopt, ++ sock_no_fcntl, ++ pfkey_sendmsg, ++ pfkey_recvmsg ++#endif /* NETDEV_23 */ ++}; ++ ++#ifdef NETDEV_23 ++#include ++SOCKOPS_WRAP(pfkey, PF_KEY); ++#endif /* NETDEV_23 */ ++ ++#ifdef NET_26 ++static void pfkey_sock_list_grab(void) ++{ ++ write_lock_bh(&pfkey_sock_lock); ++ ++ if (atomic_read(&pfkey_sock_users)) { ++ DECLARE_WAITQUEUE(wait, current); ++ ++ add_wait_queue_exclusive(&pfkey_sock_wait, &wait); ++ for(;;) { ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ if (atomic_read(&pfkey_sock_users) == 0) ++ break; ++ write_unlock_bh(&pfkey_sock_lock); ++ schedule(); ++ write_lock_bh(&pfkey_sock_lock); ++ } ++ ++ __set_current_state(TASK_RUNNING); ++ remove_wait_queue(&pfkey_sock_wait, &wait); ++ } ++} ++ ++static __inline__ void pfkey_sock_list_ungrab(void) ++{ ++ write_unlock_bh(&pfkey_sock_lock); ++ wake_up(&pfkey_sock_wait); ++} ++ ++static __inline__ void pfkey_lock_sock_list(void) ++{ ++ /* read_lock() synchronizes us to pfkey_table_grab */ ++ ++ read_lock(&pfkey_sock_lock); ++ atomic_inc(&pfkey_sock_users); ++ read_unlock(&pfkey_sock_lock); ++} ++ ++static __inline__ void pfkey_unlock_sock_list(void) ++{ ++ if (atomic_dec_and_test(&pfkey_sock_users)) ++ wake_up(&pfkey_sock_wait); ++} ++#endif ++ ++int ++pfkey_list_remove_socket(struct socket *socketp, struct socket_list **sockets) ++{ ++ struct socket_list *socket_listp,*prev; ++ ++ if(!socketp) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_remove_socket: " ++ "NULL socketp handed in, failed.\n"); ++ return -EINVAL; ++ } ++ ++ if(!sockets) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_remove_socket: " ++ "NULL sockets list handed in, failed.\n"); ++ return -EINVAL; ++ } ++ ++ socket_listp = *sockets; ++ prev = NULL; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_remove_socket: " ++ "removing sock=0p%p\n", ++ socketp); ++ ++ while(socket_listp != NULL) { ++ if(socket_listp->socketp == socketp) { ++ if(prev != NULL) { ++ prev->next = socket_listp->next; ++ } else { ++ *sockets = socket_listp->next; ++ } ++ ++ kfree((void*)socket_listp); ++ ++ break; ++ } ++ prev = socket_listp; ++ socket_listp = socket_listp->next; ++ } ++ ++ return 0; ++} ++ ++int ++pfkey_list_insert_socket(struct socket *socketp, struct socket_list **sockets) ++{ ++ struct socket_list *socket_listp; ++ ++ if(!socketp) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_socket: " ++ "NULL socketp handed in, failed.\n"); ++ return -EINVAL; ++ } ++ ++ if(!sockets) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_socket: " ++ "NULL sockets list handed in, failed.\n"); ++ return -EINVAL; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_socket: " ++ "allocating %lu bytes for socketp=0p%p\n", ++ (unsigned long) sizeof(struct socket_list), ++ socketp); ++ ++ if((socket_listp = (struct socket_list *)kmalloc(sizeof(struct socket_list), GFP_KERNEL)) == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_socket: " ++ "memory allocation error.\n"); ++ return -ENOMEM; ++ } ++ ++ socket_listp->socketp = socketp; ++ socket_listp->next = *sockets; ++ *sockets = socket_listp; ++ ++ return 0; ++} ++ ++int ++pfkey_list_remove_supported(struct ipsec_alg_supported *supported, struct supported_list **supported_list) ++{ ++ struct supported_list *supported_listp = *supported_list, *prev = NULL; ++ ++ if(!supported) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_remove_supported: " ++ "NULL supported handed in, failed.\n"); ++ return -EINVAL; ++ } ++ ++ if(!supported_list) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_remove_supported: " ++ "NULL supported_list handed in, failed.\n"); ++ return -EINVAL; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_remove_supported: " ++ "removing supported=0p%p\n", ++ supported); ++ ++ while(supported_listp != NULL) { ++ if(supported_listp->supportedp == supported) { ++ if(prev != NULL) { ++ prev->next = supported_listp->next; ++ } else { ++ *supported_list = supported_listp->next; ++ } ++ ++ kfree((void*)supported_listp); ++ ++ break; ++ } ++ prev = supported_listp; ++ supported_listp = supported_listp->next; ++ } ++ ++ return 0; ++} ++ ++int ++pfkey_list_insert_supported(struct ipsec_alg_supported *supported ++ , struct supported_list **supported_list) ++{ ++ struct supported_list *supported_listp; ++ ++ if(!supported) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_supported: " ++ "NULL supported handed in, failed.\n"); ++ return -EINVAL; ++ } ++ ++ if(!supported_list) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_supported: " ++ "NULL supported_list handed in, failed.\n"); ++ return -EINVAL; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_supported: " ++ "allocating %lu bytes for incoming, supported=0p%p, supported_list=0p%p\n", ++ (unsigned long) sizeof(struct supported_list), ++ supported, ++ supported_list); ++ ++ supported_listp = (struct supported_list *)kmalloc(sizeof(struct supported_list), GFP_KERNEL); ++ ++ if(supported_listp == NULL) ++ { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_supported: " ++ "memory allocation error.\n"); ++ return -ENOMEM; ++ } ++ ++ supported_listp->supportedp = supported; ++ supported_listp->next = *supported_list; ++ *supported_list = supported_listp; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_list_insert_supported: " ++ "outgoing, supported=0p%p, supported_list=0p%p\n", ++ supported, ++ supported_list); ++ ++ return 0; ++} ++ ++#ifdef NET_26 ++DEBUG_NO_STATIC void ++pfkey_insert_socket(struct sock *sk) ++{ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_insert_socket: " ++ "sk=0p%p\n", ++ sk); ++ pfkey_sock_list_grab(); ++ sk_add_node(sk, &pfkey_sock_list); ++ pfkey_sock_list_ungrab(); ++} ++ ++DEBUG_NO_STATIC void ++pfkey_remove_socket(struct sock *sk) ++{ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_remove_socket: 0p%p\n", sk); ++ pfkey_sock_list_grab(); ++ sk_del_node_init(sk); ++ pfkey_sock_list_ungrab(); ++ return; ++} ++#else ++ ++DEBUG_NO_STATIC void ++pfkey_insert_socket(struct sock *sk) ++{ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_insert_socket: " ++ "sk=0p%p\n", ++ sk); ++ cli(); ++ sk->next=pfkey_sock_list; ++ pfkey_sock_list=sk; ++ sti(); ++} ++DEBUG_NO_STATIC void ++pfkey_remove_socket(struct sock *sk) ++{ ++ struct sock **s; ++ ++ s = NULL; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_remove_socket: .\n"); ++ ++ cli(); ++ s=&pfkey_sock_list; ++ ++ while(*s!=NULL) { ++ if(*s==sk) { ++ *s=sk->next; ++ sk->next=NULL; ++ sti(); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_remove_socket: " ++ "succeeded.\n"); ++ return; ++ } ++ s=&((*s)->next); ++ } ++ sti(); ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_remove_socket: " ++ "not found.\n"); ++ return; ++} ++#endif ++ ++DEBUG_NO_STATIC void ++pfkey_destroy_socket(struct sock *sk) ++{ ++ struct sk_buff *skb; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_destroy_socket: 0p%p\n",sk); ++ pfkey_remove_socket(sk); ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_destroy_socket: " ++ "pfkey_remove_socket called, sk=0p%p\n",sk); ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_destroy_socket: " ++ "sk(0p%p)->(&0p%p)receive_queue.{next=0p%p,prev=0p%p}.\n", ++ sk, ++ &(sk->sk_receive_queue), ++ sk->sk_receive_queue.next, ++ sk->sk_receive_queue.prev); ++ ++ while(sk && ((skb=skb_dequeue(&(sk->sk_receive_queue)))!=NULL)) { ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_pfkey && sysctl_ipsec_debug_verbose) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_destroy_socket: " ++ "skb=0p%p dequeued.\n", skb); ++ printk(KERN_INFO "klips_debug:pfkey_destroy_socket: " ++ "pfkey_skb contents:"); ++ printk(" next:0p%p", skb->next); ++ printk(" prev:0p%p", skb->prev); ++ printk(" sk:0p%p", skb->sk); ++ printk(" dev:0p%p", skb->dev); ++ if(skb->dev) { ++ if(skb->dev->name) { ++ printk(" dev->name:%s", skb->dev->name); ++ } else { ++ printk(" dev->name:NULL?"); ++ } ++ } else { ++ printk(" dev:NULL"); ++ } ++ printk(" h:0p%p", skb_transport_header(skb)); ++ printk(" nh:0p%p", skb_network_header(skb)); ++ printk(" mac:0p%p", skb_mac_header(skb)); ++ printk(" dst:0p%p", skb->dst); ++ if(sysctl_ipsec_debug_verbose) { ++ int i; ++ ++ printk(" cb"); ++ for(i=0; i<48; i++) { ++ printk(":%2x", skb->cb[i]); ++ } ++ } ++ printk(" len:%d", skb->len); ++ printk(" csum:%d", skb->csum); ++#ifndef NETDEV_23 ++ printk(" used:%d", skb->used); ++ printk(" is_clone:%d", skb->is_clone); ++#endif /* NETDEV_23 */ ++ printk(" cloned:%d", skb->cloned); ++ printk(" pkt_type:%d", skb->pkt_type); ++ printk(" ip_summed:%d", skb->ip_summed); ++ printk(" priority:%d", skb->priority); ++ printk(" protocol:%d", skb->protocol); ++#ifdef HAVE_SOCK_SECURITY ++ printk(" security:%d", skb->security); ++#endif ++ printk(" truesize:%d", skb->truesize); ++ printk(" head:0p%p", skb->head); ++ printk(" data:0p%p", skb->data); ++ printk(" tail:0p%p", skb_tail_pointer(skb)); ++ printk(" end:0p%p", skb_end_pointer(skb)); ++ if(sysctl_ipsec_debug_verbose) { ++ unsigned char* i; ++ printk(" data"); ++ for(i = skb->head; i < skb_end_pointer(skb); i++) { ++ printk(":%2x", (unsigned char)(*(i))); ++ } ++ } ++ printk(" destructor:0p%p", skb->destructor); ++ printk("\n"); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_destroy_socket: " ++ "skb=0p%p freed.\n", ++ skb); ++ ipsec_kfree_skb(skb); ++ } ++ ++#ifdef NET_26 ++ sock_set_flag(sk, SOCK_DEAD); ++#else ++ sk->dead = 1; ++#endif ++ sk_free(sk); ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_destroy_socket: destroyed.\n"); ++} ++ ++int ++pfkey_upmsg(struct socket *sock, struct sadb_msg *pfkey_msg) ++{ ++ int error = 0; ++ struct sk_buff * skb = NULL; ++ struct sock *sk; ++ ++ if(sock == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_upmsg: " ++ "NULL socket passed in.\n"); ++ return -EINVAL; ++ } ++ ++ if(pfkey_msg == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_upmsg: " ++ "NULL pfkey_msg passed in.\n"); ++ return -EINVAL; ++ } ++ ++ sk = sock->sk; ++ ++ if(sk == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_upmsg: " ++ "NULL sock passed in.\n"); ++ return -EINVAL; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_upmsg: " ++ "allocating %d bytes...\n", ++ (int)(pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN)); ++ if(!(skb = alloc_skb(pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN, GFP_ATOMIC) )) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_upmsg: " ++ "no buffers left to send up a message.\n"); ++ return -ENOBUFS; ++ } ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_upmsg: " ++ "...allocated at 0p%p.\n", ++ skb); ++ ++ skb->dev = NULL; ++ ++ if(skb_tailroom(skb) < pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN) { ++ printk(KERN_WARNING "klips_error:pfkey_upmsg: " ++ "tried to skb_put %ld, %d available. This should never happen, please report.\n", ++ (unsigned long int)pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN, ++ skb_tailroom(skb)); ++ ipsec_kfree_skb(skb); ++ return -ENOBUFS; ++ } ++ skb_set_transport_header(skb, ipsec_skb_offset(skb, skb_put(skb, pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN))); ++ memcpy(skb_transport_header(skb), pfkey_msg, pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN); ++ ++ if((error = sock_queue_rcv_skb(sk, skb)) < 0) { ++ skb->sk=NULL; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_upmsg: " ++ "error=%d calling sock_queue_rcv_skb with skb=0p%p.\n", ++ error, ++ skb); ++ ipsec_kfree_skb(skb); ++ return error; ++ } ++ return error; ++} ++ ++#ifdef NET_26_12_SKALLOC ++static struct proto key_proto = { ++ .name = "KEY", ++ .owner = THIS_MODULE, ++ .obj_size = sizeof(struct sock), ++ ++}; ++#endif ++ ++DEBUG_NO_STATIC int ++pfkey_create(struct socket *sock, int protocol) ++{ ++ struct sock *sk; ++ ++ if(sock == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_create: " ++ "socket NULL.\n"); ++ return -EINVAL; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_create: " ++ "sock=0p%p type:%d state:%d flags:%ld protocol:%d\n", ++ sock, ++ sock->type, ++ (unsigned int)(sock->state), ++ sock->flags, protocol); ++ ++ if(sock->type != SOCK_RAW) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_create: " ++ "only SOCK_RAW supported.\n"); ++ return -ESOCKTNOSUPPORT; ++ } ++ ++ if(protocol != PF_KEY_V2) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_create: " ++ "protocol not PF_KEY_V2.\n"); ++ return -EPROTONOSUPPORT; ++ } ++ ++ if((current->uid != 0)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_create: " ++ "must be root to open pfkey sockets.\n"); ++ return -EACCES; ++ } ++ ++ sock->state = SS_UNCONNECTED; ++ ++ KLIPS_INC_USE; ++ ++#ifdef NET_26 ++#ifdef NET_26_12_SKALLOC ++ sk=(struct sock *)sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1); ++#else ++ sk=(struct sock *)sk_alloc(PF_KEY, GFP_KERNEL, 1, NULL); ++#endif ++#else ++ /* 2.4 interface */ ++ sk=(struct sock *)sk_alloc(PF_KEY, GFP_KERNEL, 1); ++#endif ++ ++ if(sk == NULL) ++ { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_create: " ++ "Out of memory trying to allocate.\n"); ++ KLIPS_DEC_USE; ++ return -ENOMEM; ++ } ++ ++ sock_init_data(sock, sk); ++ ++ sk->sk_destruct = NULL; ++ sk->sk_reuse = 1; ++ sock->ops = &pfkey_ops; ++ ++ sk->sk_family = PF_KEY; ++/* sk->num = protocol; */ ++ sk->sk_protocol = protocol; ++ key_pid(sk) = current->pid; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_create: " ++ "sock->fasync_list=0p%p sk->sleep=0p%p.\n", ++ sock->fasync_list, ++ sk->sk_sleep); ++ ++ pfkey_insert_socket(sk); ++ pfkey_list_insert_socket(sock, &pfkey_open_sockets); ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_create: " ++ "Socket sock=0p%p sk=0p%p initialised.\n", sock, sk); ++ return 0; ++} ++ ++DEBUG_NO_STATIC int ++#ifdef NETDEV_23 ++pfkey_release(struct socket *sock) ++#else /* NETDEV_23 */ ++pfkey_release(struct socket *sock, struct socket *peersock) ++#endif /* NETDEV_23 */ ++{ ++ struct sock *sk; ++ int i; ++ ++ if(sock==NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_release: " ++ "No socket attached.\n"); ++ return 0; /* -EINVAL; */ ++ } ++ ++ sk=sock->sk; ++ ++ /* May not have data attached */ ++ if(sk==NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_release: " ++ "No sk attached to sock=0p%p.\n", sock); ++ return 0; /* -EINVAL; */ ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_release: " ++ "sock=0p%p sk=0p%p\n", sock, sk); ++ ++ if(sock_flag(sk, SOCK_DEAD)) ++ if(sk->sk_state_change) { ++ sk->sk_state_change(sk); ++ } ++ ++ sock->sk = NULL; ++ ++ /* Try to flush out this socket. Throw out buffers at least */ ++ pfkey_destroy_socket(sk); ++ pfkey_list_remove_socket(sock, &pfkey_open_sockets); ++ for(i = SADB_SATYPE_UNSPEC; i <= SADB_SATYPE_MAX; i++) { ++ pfkey_list_remove_socket(sock, &(pfkey_registered_sockets[i])); ++ } ++ ++ KLIPS_DEC_USE; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_release: " ++ "succeeded.\n"); ++ ++ return 0; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_shutdown(struct socket *sock, int mode) ++{ ++ struct sock *sk; ++ ++ if(sock == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_shutdown: " ++ "NULL socket passed in.\n"); ++ return -EINVAL; ++ } ++ ++ sk=sock->sk; ++ ++ if(sk == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_shutdown: " ++ "No sock attached to socket.\n"); ++ return -EINVAL; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_shutdown: " ++ "mode=%x.\n", mode); ++ mode++; ++ ++ if(mode&SEND_SHUTDOWN) { ++ sk->sk_shutdown|=SEND_SHUTDOWN; ++ sk->sk_state_change(sk); ++ } ++ ++ if(mode&RCV_SHUTDOWN) { ++ sk->sk_shutdown|=RCV_SHUTDOWN; ++ sk->sk_state_change(sk); ++ } ++ return 0; ++} ++ ++/* ++ * Send PF_KEY data down. ++ */ ++ ++DEBUG_NO_STATIC int ++#ifdef NET_26 ++pfkey_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) ++#else ++pfkey_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm) ++#endif ++{ ++ struct sock *sk; ++ int error = 0; ++ struct sadb_msg *pfkey_msg = NULL, *pfkey_reply = NULL; ++ ++ if(sock == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "Null socket passed in.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ sk = sock->sk; ++ ++ if(sk == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "Null sock passed in.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(msg == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "Null msghdr passed in.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: .\n"); ++ if(sk->sk_err) { ++ error = sock_error(sk); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "sk->err is non-zero, returns %d.\n", ++ error); ++ SENDERR(-error); ++ } ++ ++ if((current->uid != 0)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "must be root to send messages to pfkey sockets.\n"); ++ SENDERR(EACCES); ++ } ++ ++ if(msg->msg_control) ++ { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "can't set flags or set msg_control.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(sk->sk_shutdown & SEND_SHUTDOWN) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "shutdown.\n"); ++ send_sig(SIGPIPE, current, 0); ++ SENDERR(EPIPE); ++ } ++ ++ if(len < sizeof(struct sadb_msg)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "bogus msg len of %d, too small.\n", (int)len); ++ SENDERR(EMSGSIZE); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "allocating %d bytes for downward message.\n", ++ (int)len); ++ if((pfkey_msg = (struct sadb_msg*)kmalloc(len, GFP_KERNEL)) == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "memory allocation error.\n"); ++ SENDERR(ENOBUFS); ++ } ++ ++ memcpy_fromiovec((void *)pfkey_msg, msg->msg_iov, len); ++ ++ if(pfkey_msg->sadb_msg_version != PF_KEY_V2) { ++ KLIPS_PRINT(1 || debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "not PF_KEY_V2 msg, found %d, should be %d.\n", ++ pfkey_msg->sadb_msg_version, ++ PF_KEY_V2); ++ kfree((void*)pfkey_msg); ++ return -EINVAL; ++ } ++ ++ if(len != pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "bogus msg len of %d, not %d byte aligned.\n", ++ (int)len, (int)IPSEC_PFKEYv2_ALIGN); ++ SENDERR(EMSGSIZE); ++ } ++ ++#if 0 ++ /* This check is questionable, since a downward message could be ++ the result of an ACQUIRE either from kernel (PID==0) or ++ userspace (some other PID). */ ++ /* check PID */ ++ if(pfkey_msg->sadb_msg_pid != current->pid) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "pid (%d) does not equal sending process pid (%d).\n", ++ pfkey_msg->sadb_msg_pid, current->pid); ++ SENDERR(EINVAL); ++ } ++#endif ++ ++ if(pfkey_msg->sadb_msg_reserved) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "reserved field must be zero, set to %d.\n", ++ pfkey_msg->sadb_msg_reserved); ++ SENDERR(EINVAL); ++ } ++ ++ if((pfkey_msg->sadb_msg_type > SADB_MAX) || (!pfkey_msg->sadb_msg_type)){ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "msg type too large or small:%d.\n", ++ pfkey_msg->sadb_msg_type); ++ SENDERR(EINVAL); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "msg sent for parsing.\n"); ++ ++ if((error = pfkey_msg_interp(sk, pfkey_msg, &pfkey_reply))) { ++ struct socket_list *pfkey_socketsp; ++ ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_sendmsg: " ++ "pfkey_msg_parse returns %d.\n", ++ error); ++ ++ if((pfkey_reply = (struct sadb_msg*)kmalloc(sizeof(struct sadb_msg), GFP_KERNEL)) == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "memory allocation error.\n"); ++ SENDERR(ENOBUFS); ++ } ++ memcpy((void*)pfkey_reply, (void*)pfkey_msg, sizeof(struct sadb_msg)); ++ pfkey_reply->sadb_msg_errno = -error; ++ pfkey_reply->sadb_msg_len = sizeof(struct sadb_msg) / IPSEC_PFKEYv2_ALIGN; ++ ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ int error_upmsg = 0; ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_sendmsg: " ++ "sending up error=%d message=0p%p to socket=0p%p.\n", ++ error, ++ pfkey_reply, ++ pfkey_socketsp->socketp); ++ if((error_upmsg = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_sendmsg: " ++ "sending up error message to socket=0p%p failed with error=%d.\n", ++ pfkey_socketsp->socketp, ++ error_upmsg); ++ /* pfkey_msg_free(&pfkey_reply); */ ++ /* SENDERR(-error); */ ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_sendmsg: " ++ "sending up error message to socket=0p%p succeeded.\n", ++ pfkey_socketsp->socketp); ++ } ++ ++ pfkey_msg_free(&pfkey_reply); ++ ++ SENDERR(-error); ++ } ++ ++ errlab: ++ if (pfkey_msg) { ++ kfree((void*)pfkey_msg); ++ } ++ ++ if(error) { ++ return error; ++ } else { ++ return len; ++ } ++} ++ ++/* ++ * Receive PF_KEY data up. ++ */ ++ ++DEBUG_NO_STATIC int ++#ifdef NET_26 ++pfkey_recvmsg(struct kiocb *kiocb ++ , struct socket *sock ++ , struct msghdr *msg ++ , size_t size ++ , int flags) ++#else ++pfkey_recvmsg(struct socket *sock ++ , struct msghdr *msg ++ , int size, int flags ++ , struct scm_cookie *scm) ++#endif ++{ ++ struct sock *sk; ++ int noblock = flags & MSG_DONTWAIT; ++ struct sk_buff *skb; ++ int error; ++ ++ if(sock == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_recvmsg: " ++ "Null socket passed in.\n"); ++ return -EINVAL; ++ } ++ ++ sk = sock->sk; ++ ++ if(sk == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_recvmsg: " ++ "Null sock passed in for sock=0p%p.\n", sock); ++ return -EINVAL; ++ } ++ ++ if(msg == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_recvmsg: " ++ "Null msghdr passed in for sock=0p%p, sk=0p%p.\n", ++ sock, sk); ++ return -EINVAL; ++ } ++ ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "klips_debug:pfkey_recvmsg: sock=0p%p sk=0p%p msg=0p%p size=%d.\n", ++ sock, sk, msg, (int)size); ++ if(flags & ~MSG_PEEK) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "flags (%d) other than MSG_PEEK not supported.\n", ++ flags); ++ return -EOPNOTSUPP; ++ } ++ ++ msg->msg_namelen = 0; /* sizeof(*ska); */ ++ ++ if(sk->sk_err) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sendmsg: " ++ "sk->sk_err=%d.\n", sk->sk_err); ++ return sock_error(sk); ++ } ++ ++ if((skb = skb_recv_datagram(sk, flags, noblock, &error) ) == NULL) { ++ return error; ++ } ++ ++ if(size > skb->len) { ++ size = skb->len; ++ } ++ else if(size len) { ++ msg->msg_flags |= MSG_TRUNC; ++ } ++ ++ skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size); ++#ifdef HAVE_KERNEL_TSTAMP ++ sk->sk_stamp = skb->tstamp; ++#elif defined(HAVE_TSTAMP) ++ sk->sk_stamp.tv_sec = skb->tstamp.off_sec; ++ sk->sk_stamp.tv_usec = skb->tstamp.off_usec; ++#else ++ sk->sk_stamp=skb->stamp; ++#endif ++ ++ skb_free_datagram(sk, skb); ++ return size; ++} ++ ++#ifdef CONFIG_PROC_FS ++#ifndef PROC_FS_2325 ++DEBUG_NO_STATIC ++#endif /* PROC_FS_2325 */ ++int ++pfkey_get_info(char *buffer, char **start, off_t offset, int length ++#ifndef PROC_NO_DUMMY ++, int dummy ++#endif /* !PROC_NO_DUMMY */ ++) ++{ ++ const int max_content = length > 0? length-1 : 0; /* limit of useful snprintf output */ ++#ifdef NET_26 ++ struct hlist_node *node; ++#endif ++ off_t begin=0; ++ int len=0; ++ struct sock *sk; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if(!sysctl_ipsec_debug_verbose) { ++#endif /* CONFIG_KLIPS_DEBUG */ ++ len += ipsec_snprintf(buffer, length, ++ " sock pid socket next prev e n p sndbf Flags Type St\n"); ++#ifdef CONFIG_KLIPS_DEBUG ++ } else { ++ len += ipsec_snprintf(buffer, length, ++ " sock pid d sleep socket next prev e r z n p sndbf stamp Flags Type St\n"); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ sk_for_each(sk, node, &pfkey_sock_list) { ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if(!sysctl_ipsec_debug_verbose) { ++#endif /* CONFIG_KLIPS_DEBUG */ ++ len += ipsec_snprintf(buffer+len, length-len, ++ "%8p %5d %8p %d %d %5d %08lX %8X %2X\n", ++ sk, ++ key_pid(sk), ++ sk->sk_socket, ++ sk->sk_err, ++ sk->sk_protocol, ++ sk->sk_sndbuf, ++ sk->sk_socket->flags, ++ sk->sk_socket->type, ++ sk->sk_socket->state); ++#ifdef CONFIG_KLIPS_DEBUG ++ } else { ++ struct timeval t; ++ grab_socket_timeval(t, *sk); ++ len += ipsec_snprintf(buffer+len, length-len, ++ "%8p %5d %d %8p %8p %d %d %d %d %5d %d.%06d %08lX %8X %2X\n", ++ sk, ++ key_pid(sk), ++ sock_flag(sk, SOCK_DEAD), ++ sk->sk_sleep, ++ sk->sk_socket, ++ sk->sk_err, ++ sk->sk_reuse, ++#ifdef HAVE_SOCK_ZAPPED ++ sock_flag(sk, SOCK_ZAPPED), ++#else ++ sk->sk_zapped, ++#endif ++ sk->sk_protocol, ++ sk->sk_sndbuf, ++ (unsigned int)t.tv_sec, ++ (unsigned int)t.tv_usec, ++ sk->sk_socket->flags, ++ sk->sk_socket->type, ++ sk->sk_socket->state); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ if (len >= max_content) { ++ /* we've done all that can fit -- stop loop */ ++ len = max_content; /* truncate crap */ ++ break; ++ } else { ++ const off_t pos = begin + len; /* file position of end of what we've generated */ ++ ++ if (pos <= offset) { ++ /* all is before first interesting character: ++ * discard, but note where we are. ++ */ ++ len = 0; ++ begin = pos; ++ } ++ } ++ } ++ ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ return len - (offset - begin); ++} ++ ++#ifndef PROC_FS_2325 ++DEBUG_NO_STATIC ++#endif /* PROC_FS_2325 */ ++int ++pfkey_supported_get_info(char *buffer, char **start, off_t offset, int length ++#ifndef PROC_NO_DUMMY ++, int dummy ++#endif /* !PROC_NO_DUMMY */ ++) ++{ ++ /* limit of useful snprintf output */ ++ const int max_content = length > 0? length-1 : 0; ++ off_t begin=0; ++ int len=0; ++ int satype; ++ struct supported_list *ps; ++ ++ len += ipsec_snprintf(buffer, length, ++ "satype exttype alg_id ivlen minbits maxbits name\n"); ++ ++ for(satype = SADB_SATYPE_UNSPEC; satype <= SADB_SATYPE_MAX; satype++) { ++ ps = pfkey_supported_list[satype]; ++ while(ps) { ++ struct ipsec_alg_supported *alg = ps->supportedp; ++ unsigned char *n = alg->ias_name; ++ if(n == NULL) n = "unknown"; ++ ++ len += ipsec_snprintf(buffer+len, length-len, ++ " %2d %2d %2d %3d %3d %3d %20s\n", ++ satype, ++ alg->ias_exttype, ++ alg->ias_id, ++ alg->ias_ivlen, ++ alg->ias_keyminbits, ++ alg->ias_keymaxbits, ++ n); ++ ++ if (len >= max_content) { ++ /* we've done all that can fit -- stop loop */ ++ len = max_content; /* truncate crap */ ++ break; ++ } else { ++ const off_t pos = begin + len; /* file position of end of what we've generated */ ++ ++ if (pos <= offset) { ++ /* all is before first interesting character: ++ * discard, but note where we are. ++ */ ++ len = 0; ++ begin = pos; ++ } ++ } ++ ++ ps = ps->next; ++ } ++ } ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ return len - (offset - begin); ++} ++ ++#ifndef PROC_FS_2325 ++DEBUG_NO_STATIC ++#endif /* PROC_FS_2325 */ ++int ++pfkey_registered_get_info(char *buffer, char **start, off_t offset, int length ++#ifndef PROC_NO_DUMMY ++, int dummy ++#endif /* !PROC_NO_DUMMY */ ++) ++{ ++ const int max_content = length > 0? length-1 : 0; /* limit of useful snprintf output */ ++ off_t begin=0; ++ int len=0; ++ int satype; ++ struct socket_list *pfkey_sockets; ++ ++ len += ipsec_snprintf(buffer, length, ++ "satype socket pid sk\n"); ++ ++ for(satype = SADB_SATYPE_UNSPEC; satype <= SADB_SATYPE_MAX; satype++) { ++ pfkey_sockets = pfkey_registered_sockets[satype]; ++ while(pfkey_sockets) { ++ len += ipsec_snprintf(buffer+len, length-len, ++ " %2d %8p %5d %8p\n", ++ satype, ++ pfkey_sockets->socketp, ++ key_pid(pfkey_sockets->socketp->sk), ++ pfkey_sockets->socketp->sk); ++ ++ if (len >= max_content) { ++ /* we've done all that can fit -- stop loop (could stop two) */ ++ len = max_content; /* truncate crap */ ++ break; ++ } else { ++ const off_t pos = begin + len; /* file position of end of what we've generated */ ++ ++ if (pos <= offset) { ++ /* all is before first interesting character: ++ * discard, but note where we are. ++ */ ++ len = 0; ++ begin = pos; ++ } ++ } ++ ++ pfkey_sockets = pfkey_sockets->next; ++ } ++ } ++ *start = buffer + (offset - begin); /* Start of wanted data */ ++ return len - (offset - begin); ++} ++ ++#ifndef PROC_FS_2325 ++struct proc_dir_entry proc_net_pfkey = ++{ ++ 0, ++ 6, "pf_key", ++ S_IFREG | S_IRUGO, 1, 0, 0, ++ 0, &proc_net_inode_operations, ++ pfkey_get_info ++}; ++struct proc_dir_entry proc_net_pfkey_supported = ++{ ++ 0, ++ 16, "pf_key_supported", ++ S_IFREG | S_IRUGO, 1, 0, 0, ++ 0, &proc_net_inode_operations, ++ pfkey_supported_get_info ++}; ++struct proc_dir_entry proc_net_pfkey_registered = ++{ ++ 0, ++ 17, "pf_key_registered", ++ S_IFREG | S_IRUGO, 1, 0, 0, ++ 0, &proc_net_inode_operations, ++ pfkey_registered_get_info ++}; ++#endif /* !PROC_FS_2325 */ ++#endif /* CONFIG_PROC_FS */ ++ ++DEBUG_NO_STATIC int ++supported_add_all(int satype, struct ipsec_alg_supported supported[], int size) ++{ ++ int i; ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:init_pfkey: " ++ "sizeof(supported_init_)[%d]/sizeof(struct ipsec_alg_supported)[%d]=%d.\n", ++ satype, ++ size, ++ (int)sizeof(struct ipsec_alg_supported), ++ (int)(size/sizeof(struct ipsec_alg_supported))); ++ ++ for(i = 0; i < size / sizeof(struct ipsec_alg_supported); i++) { ++ ++ unsigned char *n = supported[i].ias_name; ++ if(n == NULL) n="unknown"; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:init_pfkey: " ++ "i=%d inserting satype=%d exttype=%d id=%d ivlen=%d minbits=%d maxbits=%d name=%s.\n", ++ i, ++ satype, ++ supported[i].ias_exttype, ++ supported[i].ias_id, ++ supported[i].ias_ivlen, ++ supported[i].ias_keyminbits, ++ supported[i].ias_keymaxbits, ++ n); ++ ++ error |= pfkey_list_insert_supported(&(supported[i]), ++ &(pfkey_supported_list[satype])); ++ } ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++supported_remove_all(int satype) ++{ ++ int error = 0; ++ struct ipsec_alg_supported*supportedp; ++ ++ while(pfkey_supported_list[satype]) { ++ unsigned char *n; ++ supportedp = pfkey_supported_list[satype]->supportedp; ++ ++ n = supportedp->ias_name; ++ if(n == NULL) n="unknown"; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:init_pfkey: " ++ "removing satype=%d exttype=%d id=%d ivlen=%d minbits=%d maxbits=%d name=%s.\n", ++ satype, ++ supportedp->ias_exttype, ++ supportedp->ias_id, ++ supportedp->ias_ivlen, ++ supportedp->ias_keyminbits, ++ supportedp->ias_keymaxbits, n); ++ ++ error |= pfkey_list_remove_supported(supportedp, ++ &(pfkey_supported_list[satype])); ++ } ++ return error; ++} ++ ++int ++pfkey_init(void) ++{ ++ int error = 0; ++ int i; ++ ++ static struct ipsec_alg_supported supported_init_ah[] = { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ {SADB_EXT_SUPPORTED_AUTH, SADB_AALG_MD5HMAC, 0, 128, 128}, ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ {SADB_EXT_SUPPORTED_AUTH, SADB_AALG_SHA1HMAC, 0, 160, 160} ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++ }; ++ static struct ipsec_alg_supported supported_init_esp[] = { ++#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 ++ {SADB_EXT_SUPPORTED_AUTH, SADB_AALG_MD5HMAC, 0, 128, 128}, ++#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ ++#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 ++ {SADB_EXT_SUPPORTED_AUTH, SADB_AALG_SHA1HMAC, 0, 160, 160}, ++#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ ++#ifdef CONFIG_KLIPS_ENC_3DES ++ {SADB_EXT_SUPPORTED_ENCRYPT, SADB_EALG_3DESCBC, 64, 168, 168}, ++#endif /* CONFIG_KLIPS_ENC_3DES */ ++ }; ++ static struct ipsec_alg_supported supported_init_ipip[] = { ++ {SADB_EXT_SUPPORTED_ENCRYPT, SADB_X_TALG_IPv4_in_IPv4, 0, 32, 32} ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ , {SADB_EXT_SUPPORTED_ENCRYPT, SADB_X_TALG_IPv6_in_IPv4, 0, 128, 32} ++ , {SADB_EXT_SUPPORTED_ENCRYPT, SADB_X_TALG_IPv4_in_IPv6, 0, 32, 128} ++ , {SADB_EXT_SUPPORTED_ENCRYPT, SADB_X_TALG_IPv6_in_IPv6, 0, 128, 128} ++#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ ++ }; ++#ifdef CONFIG_KLIPS_IPCOMP ++ static struct ipsec_alg_supported supported_init_ipcomp[] = { ++ {SADB_EXT_SUPPORTED_ENCRYPT, SADB_X_CALG_DEFLATE, 0, 1, 1} ++ }; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#if 0 ++ printk(KERN_INFO ++ "klips_info:pfkey_init: " ++ "FreeS/WAN: initialising PF_KEYv2 domain sockets.\n"); ++#endif ++ ++ for(i = SADB_SATYPE_UNSPEC; i <= SADB_SATYPE_MAX; i++) { ++ pfkey_registered_sockets[i] = NULL; ++ pfkey_supported_list[i] = NULL; ++ } ++ ++ error |= supported_add_all(SADB_SATYPE_AH, supported_init_ah, sizeof(supported_init_ah)); ++ error |= supported_add_all(SADB_SATYPE_ESP, supported_init_esp, sizeof(supported_init_esp)); ++#ifdef CONFIG_KLIPS_IPCOMP ++ error |= supported_add_all(SADB_X_SATYPE_COMP, supported_init_ipcomp, sizeof(supported_init_ipcomp)); ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ error |= supported_add_all(SADB_X_SATYPE_IPIP, supported_init_ipip, sizeof(supported_init_ipip)); ++ ++ error |= sock_register(&pfkey_family_ops); ++ ++#ifdef CONFIG_PROC_FS ++# ifndef PROC_FS_2325 ++# ifdef PROC_FS_21 ++ error |= proc_register(proc_net, &proc_net_pfkey); ++ error |= proc_register(proc_net, &proc_net_pfkey_supported); ++ error |= proc_register(proc_net, &proc_net_pfkey_registered); ++# else /* PROC_FS_21 */ ++ error |= proc_register_dynamic(&proc_net, &proc_net_pfkey); ++ error |= proc_register_dynamic(&proc_net, &proc_net_pfkey_supported); ++ error |= proc_register_dynamic(&proc_net, &proc_net_pfkey_registered); ++# endif /* PROC_FS_21 */ ++# else /* !PROC_FS_2325 */ ++ proc_net_create ("pf_key", 0, pfkey_get_info); ++ proc_net_create ("pf_key_supported", 0, pfkey_supported_get_info); ++ proc_net_create ("pf_key_registered", 0, pfkey_registered_get_info); ++# endif /* !PROC_FS_2325 */ ++#endif /* CONFIG_PROC_FS */ ++ ++ return error; ++} ++ ++int ++pfkey_cleanup(void) ++{ ++ int error = 0; ++ ++ printk(KERN_INFO "klips_info:pfkey_cleanup: " ++ "shutting down PF_KEY domain sockets.\n"); ++#ifdef VOID_SOCK_UNREGISTER ++ sock_unregister(PF_KEY); ++#else ++ sock_unregister(PF_KEY); ++#endif ++ ++ error |= supported_remove_all(SADB_SATYPE_AH); ++ error |= supported_remove_all(SADB_SATYPE_ESP); ++#ifdef CONFIG_KLIPS_IPCOMP ++ error |= supported_remove_all(SADB_X_SATYPE_COMP); ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ error |= supported_remove_all(SADB_X_SATYPE_IPIP); ++ ++#ifdef CONFIG_PROC_FS ++# ifndef PROC_FS_2325 ++ if (proc_net_unregister(proc_net_pfkey.low_ino) != 0) ++ printk("klips_debug:pfkey_cleanup: " ++ "cannot unregister /proc/net/pf_key\n"); ++ if (proc_net_unregister(proc_net_pfkey_supported.low_ino) != 0) ++ printk("klips_debug:pfkey_cleanup: " ++ "cannot unregister /proc/net/pf_key_supported\n"); ++ if (proc_net_unregister(proc_net_pfkey_registered.low_ino) != 0) ++ printk("klips_debug:pfkey_cleanup: " ++ "cannot unregister /proc/net/pf_key_registered\n"); ++# else /* !PROC_FS_2325 */ ++ proc_net_remove ("pf_key"); ++ proc_net_remove ("pf_key_supported"); ++ proc_net_remove ("pf_key_registered"); ++# endif /* !PROC_FS_2325 */ ++#endif /* CONFIG_PROC_FS */ ++ ++ /* other module unloading cleanup happens here */ ++ return error; ++} ++ ++#ifdef MODULE ++#if 0 ++int ++init_module(void) ++{ ++ pfkey_init(); ++ return 0; ++} ++ ++void ++cleanup_module(void) ++{ ++ pfkey_cleanup(); ++} ++#endif /* 0 */ ++#else /* MODULE */ ++struct net_protocol; ++void pfkey_proto_init(struct net_protocol *pro) ++{ ++ pfkey_init(); ++} ++#endif /* MODULE */ ++ ++/* ++ * $Log: pfkey_v2.c,v $ ++ * Revision 1.97.2.16 2007-10-31 19:57:41 paul ++ * type of sock.sk_stamp changed from timeval to ktime [dhr] ++ * ++ * Revision 1.97.2.15 2007-10-30 21:39:30 paul ++ * Use skb_tail_pointer/skb_end_pointer [dhr] ++ * ++ * Revision 1.97.2.14 2007/09/05 02:56:10 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.97.2.13 2007/08/10 01:40:49 paul ++ * Fix for sock_unregister for 2.6.19 by Sergeil ++ * ++ * Revision 1.97.2.12 2006/11/24 05:43:29 paul ++ * kernels after 2.6.18 do not return a code from unregister_socket() ++ * backport from git 41e54a2684dc809d7952e816860ea646a3194a72 ++ * ++ * Revision 1.97.2.11 2006/11/15 16:05:57 paul ++ * fix for compiling on 2.4. kernels by Matthias Haas. ++ * ++ * Revision 1.97.2.10 2006/10/10 20:43:28 paul ++ * Add family/create/owner for pfkey_family_ops. This fixes bug #671 ++ * ++ * Revision 1.97.2.9 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.97.2.8 2006/07/10 15:56:11 paul ++ * Fix for bug #642 by Bart. ++ * ++ * Revision 1.97.2.7 2006/04/04 11:34:19 ken ++ * Backport SMP fixes + #ifdef cleanup from #public ++ * ++ * Revision 1.97.2.6 2006/02/15 05:00:20 paul ++ * Fix for crasher on 2.6.12+ with klips (mostly seen on redhat kernels) ++ * ++ * Revision 1.97.2.5 2005/11/22 04:11:52 ken ++ * Backport fixes for 2.6.14 kernels from HEAD ++ * ++ * Revision 1.97.2.4 2005/09/14 16:40:45 mcr ++ * pull up of compilation on 2.4 ++ * ++ * Revision 1.97.2.3 2005/09/06 02:10:03 mcr ++ * pulled up possible SMP-related compilation fix ++ * ++ * Revision 1.97.2.2 2005/08/28 01:21:12 paul ++ * Undid Ken's gcc4 fix in version 1.94 since it breaks linking KLIPS on ++ * SMP kernels. ++ * ++ * Revision 1.97.2.1 2005/08/27 23:40:00 paul ++ * recommited HAVE_SOCK_SECURITY fixes for linux 2.6.13 ++ * ++ * Revision 1.102 2005/09/14 16:37:23 mcr ++ * fix to compile on 2.4. ++ * ++ * Revision 1.101 2005/09/06 01:42:25 mcr ++ * removed additional SOCKOPS_WRAPPED code ++ * ++ * Revision 1.100 2005/08/30 18:10:15 mcr ++ * remove SOCKOPS_WRAPPED() code, add proper locking to the ++ * pfkey code. (cross fingers) ++ * ++ * Revision 1.99 2005/08/28 01:53:37 paul ++ * Undid Ken's gcc4 fix in version 1.94 since it breaks linking KLIPS on SMP kernels. ++ * ++ * Revision 1.98 2005/08/27 23:07:21 paul ++ * Somewhere between 2.6.12 and 2.6.13rc7 the unused security memnber in sk_buff ++ * has been removed. This patch should fix compilation for both cases. ++ * ++ * Revision 1.97 2005/07/20 00:33:36 mcr ++ * fixed typo in #ifdef for SKALLOC. ++ * ++ * Revision 1.96 2005/07/19 20:02:15 mcr ++ * sk_alloc() interface change. ++ * ++ * Revision 1.95 2005/07/09 00:40:06 ken ++ * Fix for GCC4 - it doesn't like the potential for duplicate declaration ++ * ++ * Revision 1.94 2005/07/09 00:14:04 ken ++ * Casts for 64bit cleanliness ++ * ++ * Revision 1.93 2005/07/08 16:20:05 mcr ++ * fix for 2.6.12 disapperance of sk_zapped field -> sock_flags. ++ * ++ * Revision 1.92 2005/05/21 03:29:39 mcr ++ * fixed missing prototype definition. ++ * ++ * Revision 1.91 2005/05/11 01:43:45 mcr ++ * removed "poor-man"s OOP in favour of proper C structures. ++ * ++ * Revision 1.90 2005/05/02 18:42:47 mcr ++ * fix for cut&paste error with pfkey_v2.c "supported_name" ++ * ++ * Revision 1.89 2005/05/01 03:12:31 mcr ++ * print name if it is available. ++ * ++ * Revision 1.88 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.87 2005/04/15 19:57:10 mcr ++ * make sure that address has 0p so that it will ++ * sanitized. ++ * ++ * Revision 1.86 2005/04/08 18:28:36 mcr ++ * some minor #ifdef simplification in pursuit of a possible bug. ++ * ++ * Revision 1.85 2004/12/03 21:25:57 mcr ++ * compile time fixes for running on 2.6. ++ * still experimental. ++ * ++ * Revision 1.84 2004/08/17 03:27:23 mcr ++ * klips 2.6 edits. ++ * ++ * Revision 1.83 2004/08/04 15:57:07 mcr ++ * moved des .h files to include/des/ * ++ * included 2.6 protocol specific things ++ * started at NAT-T support, but it will require a kernel patch. ++ * ++ * Revision 1.82 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.81 2004/04/25 21:23:11 ken ++ * Pull in dhr's changes from FreeS/WAN 2.06 ++ * ++ * Revision 1.80 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.79.4.1 2003/12/22 15:25:52 jjo ++ * . Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.79 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.78.4.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.78 2003/04/03 17:38:09 rgb ++ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}. ++ * ++ * Revision 1.77 2002/10/17 16:49:36 mcr ++ * sock->ops should reference the unwrapped options so that ++ * we get hacked in locking on SMP systems. ++ * ++ * Revision 1.76 2002/10/12 23:11:53 dhr ++ * ++ * [KenB + DHR] more 64-bit cleanup ++ * ++ * Revision 1.75 2002/09/20 05:01:57 rgb ++ * Added memory allocation debugging. ++ * ++ * Revision 1.74 2002/09/19 02:42:50 mcr ++ * do not define the pfkey_ops function for now. ++ * ++ * Revision 1.73 2002/09/17 17:29:23 mcr ++ * #if 0 out some dead code - pfkey_ops is never used as written. ++ * ++ * Revision 1.72 2002/07/24 18:44:54 rgb ++ * Type fiddling to tame ia64 compiler. ++ * ++ * Revision 1.71 2002/05/23 07:14:11 rgb ++ * Cleaned up %p variants to 0p%p for test suite cleanup. ++ * ++ * Revision 1.70 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.69 2002/04/24 07:36:33 mcr ++ * Moved from ./klips/net/ipsec/pfkey_v2.c,v ++ * ++ * Revision 1.68 2002/03/08 01:15:17 mcr ++ * put some internal structure only debug messages behind ++ * && sysctl_ipsec_debug_verbose. ++ * ++ * Revision 1.67 2002/01/29 17:17:57 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.66 2002/01/29 04:00:54 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.65 2002/01/29 02:13:18 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.64 2001/11/26 09:23:51 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.61.2.1 2001/09/25 02:28:44 mcr ++ * cleaned up includes. ++ * ++ * Revision 1.63 2001/11/12 19:38:00 rgb ++ * Continue trying other sockets even if one fails and return only original ++ * error. ++ * ++ * Revision 1.62 2001/10/18 04:45:22 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/freeswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.61 2001/09/20 15:32:59 rgb ++ * Min/max cleanup. ++ * ++ * Revision 1.60 2001/06/14 19:35:12 rgb ++ * Update copyright date. ++ * ++ * Revision 1.59 2001/06/13 15:35:48 rgb ++ * Fixed #endif comments. ++ * ++ * Revision 1.58 2001/05/04 16:37:24 rgb ++ * Remove erroneous checking of return codes for proc_net_* in 2.4. ++ * ++ * Revision 1.57 2001/05/03 19:43:36 rgb ++ * Initialise error return variable. ++ * Check error return codes in startup and shutdown. ++ * Standardise on SENDERR() macro. ++ * ++ * Revision 1.56 2001/04/21 23:05:07 rgb ++ * Define out skb->used for 2.4 kernels. ++ * ++ * Revision 1.55 2001/02/28 05:03:28 rgb ++ * Clean up and rationalise startup messages. ++ * ++ * Revision 1.54 2001/02/27 22:24:55 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.53 2001/02/27 06:48:18 rgb ++ * Fixed pfkey socket unregister log message to reflect type and function. ++ * ++ * Revision 1.52 2001/02/26 22:34:38 rgb ++ * Fix error return code that was getting overwritten by the error return ++ * code of an upmsg. ++ * ++ * Revision 1.51 2001/01/30 23:42:47 rgb ++ * Allow pfkey msgs from pid other than user context required for ACQUIRE ++ * and subsequent ADD or UDATE. ++ * ++ * Revision 1.50 2001/01/23 20:22:59 rgb ++ * 2.4 fix to remove removed is_clone member. ++ * ++ * Revision 1.49 2000/11/06 04:33:47 rgb ++ * Changed non-exported functions to DEBUG_NO_STATIC. ++ * ++ * Revision 1.48 2000/09/29 19:47:41 rgb ++ * Update copyright. ++ * ++ * Revision 1.47 2000/09/22 04:23:04 rgb ++ * Added more debugging to pfkey_upmsg() call from pfkey_sendmsg() error. ++ * ++ * Revision 1.46 2000/09/21 04:20:44 rgb ++ * Fixed array size off-by-one error. (Thanks Svenning!) ++ * ++ * Revision 1.45 2000/09/20 04:01:26 rgb ++ * Changed static functions to DEBUG_NO_STATIC for revealing function names ++ * in oopsen. ++ * ++ * Revision 1.44 2000/09/19 00:33:17 rgb ++ * 2.0 fixes. ++ * ++ * Revision 1.43 2000/09/16 01:28:13 rgb ++ * Fixed use of 0 in p format warning. ++ * ++ * Revision 1.42 2000/09/16 01:09:41 rgb ++ * Fixed debug format warning for pointers that was expecting ints. ++ * ++ * Revision 1.41 2000/09/13 15:54:00 rgb ++ * Rewrote pfkey_get_info(), added pfkey_{supported,registered}_get_info(). ++ * Moved supported algos add and remove to functions. ++ * ++ * Revision 1.40 2000/09/12 18:49:28 rgb ++ * Added IPIP tunnel and IPCOMP register support. ++ * ++ * Revision 1.39 2000/09/12 03:23:49 rgb ++ * Converted #if0 debugs to sysctl. ++ * Removed debug_pfkey initialisations that prevented no_debug loading or ++ * linking. ++ * ++ * Revision 1.38 2000/09/09 06:38:02 rgb ++ * Return positive errno in pfkey_reply error message. ++ * ++ * Revision 1.37 2000/09/08 19:19:09 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * Clean-up of long-unused crud... ++ * Create pfkey error message on on failure. ++ * Give pfkey_list_{insert,remove}_{socket,supported}() some error ++ * checking. ++ * ++ * Revision 1.36 2000/09/01 18:49:38 rgb ++ * Reap experimental NET_21_ bits. ++ * Turned registered sockets list into an array of one list per satype. ++ * Remove references to deprecated sklist_{insert,remove}_socket. ++ * Removed leaking socket debugging code. ++ * Removed duplicate pfkey_insert_socket in pfkey_create. ++ * Removed all references to pfkey msg->msg_name, since it is not used for ++ * pfkey. ++ * Added a supported algorithms array lists, one per satype and registered ++ * existing algorithms. ++ * Fixed pfkey_list_{insert,remove}_{socket,support}() to allow change to ++ * list. ++ * Only send pfkey_expire() messages to sockets registered for that satype. ++ * ++ * Revision 1.35 2000/08/24 17:03:00 rgb ++ * Corrected message size error return code for PF_KEYv2. ++ * Removed downward error prohibition. ++ * ++ * Revision 1.34 2000/08/21 16:32:26 rgb ++ * Re-formatted for cosmetic consistency and readability. ++ * ++ * Revision 1.33 2000/08/20 21:38:24 rgb ++ * Added a pfkey_reply parameter to pfkey_msg_interp(). (Momchil) ++ * Extended the upward message initiation of pfkey_sendmsg(). (Momchil) ++ * ++ * Revision 1.32 2000/07/28 14:58:31 rgb ++ * Changed kfree_s to kfree, eliminating extra arg to fix 2.4.0-test5. ++ * ++ * Revision 1.31 2000/05/16 03:04:00 rgb ++ * Updates for 2.3.99pre8 from MB. ++ * ++ * Revision 1.30 2000/05/10 19:22:21 rgb ++ * Use sklist private functions for 2.3.xx compatibility. ++ * ++ * Revision 1.29 2000/03/22 16:17:03 rgb ++ * Fixed SOCKOPS_WRAPPED macro for SMP (MB). ++ * ++ * Revision 1.28 2000/02/21 19:30:45 rgb ++ * Removed references to pkt_bridged for 2.3.47 compatibility. ++ * ++ * Revision 1.27 2000/02/14 21:07:00 rgb ++ * Fixed /proc/net/pf-key legend spacing. ++ * ++ * Revision 1.26 2000/01/22 03:46:59 rgb ++ * Fixed pfkey error return mechanism so that we are able to free the ++ * local copy of the pfkey_msg, plugging a memory leak and silencing ++ * the bad object free complaints. ++ * ++ * Revision 1.25 2000/01/21 06:19:44 rgb ++ * Moved pfkey_list_remove_socket() calls to before MOD_USE_DEC_COUNT. ++ * Added debugging to pfkey_upmsg. ++ * ++ * Revision 1.24 2000/01/10 16:38:23 rgb ++ * MB fixups for 2.3.x. ++ * ++ * Revision 1.23 1999/12/09 23:22:16 rgb ++ * Added more instrumentation for debugging 2.0 socket ++ * selection/reading. ++ * Removed erroneous 2.0 wait==NULL check bug in select. ++ * ++ * Revision 1.22 1999/12/08 20:32:16 rgb ++ * Tidied up 2.0.xx support, after major pfkey work, eliminating ++ * msg->msg_name twiddling in the process, since it is not defined ++ * for PF_KEYv2. ++ * ++ * Revision 1.21 1999/12/01 22:17:19 rgb ++ * Set skb->dev to zero on new skb in case it is a reused skb. ++ * Added check for skb_put overflow and freeing to avoid upmsg on error. ++ * Added check for wrong pfkey version and freeing to avoid upmsg on ++ * error. ++ * Shut off content dumping in pfkey_destroy. ++ * Added debugging message for size of buffer allocated for upmsg. ++ * ++ * Revision 1.20 1999/11/27 12:11:00 rgb ++ * Minor clean-up, enabling quiet operation of pfkey if desired. ++ * ++ * Revision 1.19 1999/11/25 19:04:21 rgb ++ * Update proc_fs code for pfkey to use dynamic registration. ++ * ++ * Revision 1.18 1999/11/25 09:07:17 rgb ++ * Implemented SENDERR macro for propagating error codes. ++ * Fixed error return code bug. ++ * ++ * Revision 1.17 1999/11/23 23:07:20 rgb ++ * Change name of pfkey_msg_parser to pfkey_msg_interp since it no longer ++ * parses. (PJO) ++ * Sort out pfkey and freeswan headers, putting them in a library path. ++ * ++ * Revision 1.16 1999/11/20 22:00:22 rgb ++ * Moved socketlist type declarations and prototypes for shared use. ++ * Renamed reformatted and generically extended for use by other socket ++ * lists pfkey_{del,add}_open_socket to pfkey_list_{remove,insert}_socket. ++ * ++ * Revision 1.15 1999/11/18 04:15:09 rgb ++ * Make pfkey_data_ready temporarily available for 2.2.x testing. ++ * Clean up pfkey_destroy_socket() debugging statements. ++ * Add Peter Onion's code to send messages up to all listening sockets. ++ * Changed all occurrences of #include "../../../lib/freeswan.h" ++ * to #include which works due to -Ilibfreeswan in the ++ * klips/net/ipsec/Makefile. ++ * Replaced all kernel version macros to shorter, readable form. ++ * Added CONFIG_PROC_FS compiler directives in case it is shut off. ++ * ++ * Revision 1.14 1999/11/17 16:01:00 rgb ++ * Make pfkey_data_ready temporarily available for 2.2.x testing. ++ * Clean up pfkey_destroy_socket() debugging statements. ++ * Add Peter Onion's code to send messages up to all listening sockets. ++ * Changed #include "../../../lib/freeswan.h" to #include ++ * which works due to -Ilibfreeswan in the klips/net/ipsec/Makefile. ++ * ++ * Revision 1.13 1999/10/27 19:59:51 rgb ++ * Removed af_unix comments that are no longer relevant. ++ * Added debug prink statements. ++ * Added to the /proc output in pfkey_get_info. ++ * Made most functions non-static to enable oops tracing. ++ * Re-enable skb dequeueing and freeing. ++ * Fix skb_alloc() and skb_put() size bug in pfkey_upmsg(). ++ * ++ * Revision 1.12 1999/10/26 17:05:42 rgb ++ * Complete re-ordering based on proto_ops structure order. ++ * Separated out proto_ops structures for 2.0.x and 2.2.x for clarity. ++ * Simplification to use built-in socket ops where possible for 2.2.x. ++ * Add shorter macros for compiler directives to visually clean-up. ++ * Add lots of sk skb dequeueing debugging statements. ++ * Added to the /proc output in pfkey_get_info. ++ * ++ * Revision 1.11 1999/09/30 02:55:10 rgb ++ * Bogus skb detection. ++ * Fix incorrect /proc/net/ipsec-eroute printk message. ++ * ++ * Revision 1.10 1999/09/21 15:22:13 rgb ++ * Temporary fix while I figure out the right way to destroy sockets. ++ * ++ * Revision 1.9 1999/07/08 19:19:44 rgb ++ * Fix pointer format warning. ++ * Fix missing member error under 2.0.xx kernels. ++ * ++ * Revision 1.8 1999/06/13 07:24:04 rgb ++ * Add more debugging. ++ * ++ * Revision 1.7 1999/06/10 05:24:17 rgb ++ * Clarified compiler directives. ++ * Renamed variables to reduce confusion. ++ * Used sklist_*_socket() kernel functions to simplify 2.2.x socket support. ++ * Added lots of sanity checking. ++ * ++ * Revision 1.6 1999/06/03 18:59:50 rgb ++ * More updates to 2.2.x socket support. Almost works, oops at end of call. ++ * ++ * Revision 1.5 1999/05/25 22:44:05 rgb ++ * Start fixing 2.2 sockets. ++ * ++ * Revision 1.4 1999/04/29 15:21:34 rgb ++ * Move log to the end of the file. ++ * Eliminate min/max redefinition in #include . ++ * Correct path for pfkey #includes ++ * Standardise an error return method. ++ * Add debugging instrumentation. ++ * Move message type checking to pfkey_msg_parse(). ++ * Add check for errno incorrectly set. ++ * Add check for valid PID. ++ * Add check for reserved illegally set. ++ * Add check for message out of bounds. ++ * ++ * Revision 1.3 1999/04/15 17:58:07 rgb ++ * Add RCSID labels. ++ * ++ * Revision 1.2 1999/04/15 15:37:26 rgb ++ * Forward check changes from POST1_00 branch. ++ * ++ * Revision 1.1.2.2 1999/04/13 20:37:12 rgb ++ * Header Title correction. ++ * ++ * Revision 1.1.2.1 1999/03/26 20:58:55 rgb ++ * Add pfkeyv2 support to KLIPS. ++ * ++ * ++ * RFC 2367 ++ * PF_KEY_v2 Key Management API ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/pfkey_v2_build.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1581 @@ ++/* ++ * RFC2367 PF_KEYv2 Key management API message parser ++ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: pfkey_v2_build.c,v 1.51.8.1 2006-05-01 14:36:39 mcr Exp $ ++ */ ++ ++/* ++ * Template from klips/net/ipsec/ipsec/ipsec_parser.c. ++ */ ++ ++char pfkey_v2_build_c_version[] = "$Id: pfkey_v2_build.c,v 1.51.8.1 2006-05-01 14:36:39 mcr Exp $"; ++ ++/* ++ * Some ugly stuff to allow consistent debugging code for use in the ++ * kernel and in user space ++*/ ++ ++#ifdef __KERNEL__ ++ ++# include /* for printk */ ++ ++# include "openswan/ipsec_kversion.h" /* for malloc switch */ ++# ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++# else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++# endif /* MALLOC_SLAB */ ++# include /* error codes */ ++# include /* size_t */ ++# include /* mark_bh */ ++ ++# include /* struct device, and other headers */ ++# include /* eth_type_trans */ ++# include /* struct iphdr */ ++# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++# include /* struct ipv6hdr */ ++# endif /* if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ ++ ++# define MALLOC(size) kmalloc(size, GFP_ATOMIC) ++# define FREE(obj) kfree(obj) ++# include ++#else /* __KERNEL__ */ ++ ++# include ++# include ++# include ++# include ++# include /* memset */ ++ ++# include ++ ++#endif /* __KERNEL__ */ ++ ++#include ++#include ++ ++#ifdef __KERNEL__ ++#include "openswan/radij.h" /* rd_nodes */ ++#include "openswan/ipsec_encap.h" /* sockaddr_encap */ ++#endif /* __KERNEL__ */ ++ ++ ++#include "openswan/ipsec_sa.h" /* IPSEC_SAREF_NULL, IPSEC_SA_REF_TABLE_IDX_WIDTH */ ++#include "openswan/pfkey_debug.h" ++ ++ ++#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0) ++ ++void ++pfkey_extensions_init(struct sadb_ext *extensions[SADB_EXT_MAX + 1]) ++{ ++ int i; ++ ++ for (i = 0; i != SADB_EXT_MAX + 1; i++) { ++ extensions[i] = NULL; ++ } ++} ++ ++void ++pfkey_extensions_free(struct sadb_ext *extensions[SADB_EXT_MAX + 1]) ++{ ++ int i; ++ ++ if(!extensions) { ++ return; ++ } ++ ++ if(extensions[0]) { ++ memset(extensions[0], 0, sizeof(struct sadb_msg)); ++ FREE(extensions[0]); ++ extensions[0] = NULL; ++ } ++ ++ for (i = 1; i != SADB_EXT_MAX + 1; i++) { ++ if(extensions[i]) { ++ memset(extensions[i], 0, extensions[i]->sadb_ext_len * IPSEC_PFKEYv2_ALIGN); ++ FREE(extensions[i]); ++ extensions[i] = NULL; ++ } ++ } ++} ++ ++void ++pfkey_msg_free(struct sadb_msg **pfkey_msg) ++{ ++ if(*pfkey_msg) { ++ memset(*pfkey_msg, 0, (*pfkey_msg)->sadb_msg_len * IPSEC_PFKEYv2_ALIGN); ++ FREE(*pfkey_msg); ++ *pfkey_msg = NULL; ++ } ++} ++ ++/* Default extension builders taken from the KLIPS code */ ++ ++int ++pfkey_msg_hdr_build(struct sadb_ext** pfkey_ext, ++ uint8_t msg_type, ++ uint8_t satype, ++ uint8_t msg_errno, ++ uint32_t seq, ++ uint32_t pid) ++{ ++ int error = 0; ++ struct sadb_msg *pfkey_msg = (struct sadb_msg *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_hdr_build:\n"); ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_hdr_build: " ++ "on_entry &pfkey_ext=0p%p pfkey_ext=0p%p *pfkey_ext=0p%p.\n", ++ &pfkey_ext, ++ pfkey_ext, ++ *pfkey_ext); ++ /* sanity checks... */ ++ if(pfkey_msg) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_hdr_build: " ++ "why is pfkey_msg already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(!msg_type) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_hdr_build: " ++ "msg type not set, must be non-zero..\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(msg_type > SADB_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_hdr_build: " ++ "msg type too large:%d.\n", ++ msg_type); ++ SENDERR(EINVAL); ++ } ++ ++ if(satype > SADB_SATYPE_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_hdr_build: " ++ "satype %d > max %d\n", ++ satype, SADB_SATYPE_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ pfkey_msg = (struct sadb_msg*)MALLOC(sizeof(struct sadb_msg)); ++ *pfkey_ext = (struct sadb_ext*)pfkey_msg; ++ ++ if(pfkey_msg == NULL) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_hdr_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_msg, 0, sizeof(struct sadb_msg)); ++ ++ pfkey_msg->sadb_msg_len = sizeof(struct sadb_msg) / IPSEC_PFKEYv2_ALIGN; ++ ++ pfkey_msg->sadb_msg_type = msg_type; ++ pfkey_msg->sadb_msg_satype = satype; ++ ++ pfkey_msg->sadb_msg_version = PF_KEY_V2; ++ pfkey_msg->sadb_msg_errno = msg_errno; ++ pfkey_msg->sadb_msg_reserved = 0; ++ pfkey_msg->sadb_msg_seq = seq; ++ pfkey_msg->sadb_msg_pid = pid; ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_hdr_build: " ++ "on_exit &pfkey_ext=0p%p pfkey_ext=0p%p *pfkey_ext=0p%p.\n", ++ &pfkey_ext, ++ pfkey_ext, ++ *pfkey_ext); ++errlab: ++ return error; ++} ++ ++int ++pfkey_sa_ref_build(struct sadb_ext ** pfkey_ext, ++ uint16_t exttype, ++ uint32_t spi, ++ uint8_t replay_window, ++ uint8_t sa_state, ++ uint8_t auth, ++ uint8_t encrypt, ++ uint32_t flags, ++ uint32_t/*IPsecSAref_t*/ ref) ++{ ++ int error = 0; ++ struct sadb_sa *pfkey_sa = (struct sadb_sa *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "spi=%08x replay=%d sa_state=%d auth=%d encrypt=%d flags=%d\n", ++ ntohl(spi), /* in network order */ ++ replay_window, ++ sa_state, ++ auth, ++ encrypt, ++ flags); ++ /* sanity checks... */ ++ if(pfkey_sa) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "why is pfkey_sa already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(exttype != SADB_EXT_SA && ++ exttype != SADB_X_EXT_SA2) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "invalid exttype=%d.\n", ++ exttype); ++ SENDERR(EINVAL); ++ } ++ ++ if(replay_window > 64) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "replay window size: %d -- must be 0 <= size <= 64\n", ++ replay_window); ++ SENDERR(EINVAL); ++ } ++ ++ if(auth > SADB_AALG_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "auth=%d > SADB_AALG_MAX=%d.\n", ++ auth, ++ SADB_AALG_MAX); ++ SENDERR(EINVAL); ++ } ++ ++#if SADB_EALG_MAX < 255 ++ if(encrypt > SADB_EALG_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "encrypt=%d > SADB_EALG_MAX=%d.\n", ++ encrypt, ++ SADB_EALG_MAX); ++ SENDERR(EINVAL); ++ } ++#endif ++ ++ if(sa_state > SADB_SASTATE_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "sa_state=%d exceeds MAX=%d.\n", ++ sa_state, ++ SADB_SASTATE_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ if(sa_state == SADB_SASTATE_DEAD) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "sa_state=%d is DEAD=%d is not allowed.\n", ++ sa_state, ++ SADB_SASTATE_DEAD); ++ SENDERR(EINVAL); ++ } ++ ++ if((IPSEC_SAREF_NULL != ref) && (ref >= (1 << IPSEC_SA_REF_TABLE_IDX_WIDTH))) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "SAref=%d must be (SAref == IPSEC_SAREF_NULL(%d) || SAref < IPSEC_SA_REF_TABLE_NUM_ENTRIES(%d)).\n", ++ ref, ++ IPSEC_SAREF_NULL, ++ IPSEC_SA_REF_TABLE_NUM_ENTRIES); ++ SENDERR(EINVAL); ++ } ++ ++ pfkey_sa = (struct sadb_sa*)MALLOC(sizeof(struct sadb_sa)); ++ *pfkey_ext = (struct sadb_ext*)pfkey_sa; ++ ++ if(pfkey_sa == NULL) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sa_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_sa, 0, sizeof(struct sadb_sa)); ++ ++ pfkey_sa->sadb_sa_len = sizeof(*pfkey_sa) / IPSEC_PFKEYv2_ALIGN; ++ pfkey_sa->sadb_sa_exttype = exttype; ++ pfkey_sa->sadb_sa_spi = spi; ++ pfkey_sa->sadb_sa_replay = replay_window; ++ pfkey_sa->sadb_sa_state = sa_state; ++ pfkey_sa->sadb_sa_auth = auth; ++ pfkey_sa->sadb_sa_encrypt = encrypt; ++ pfkey_sa->sadb_sa_flags = flags; ++ pfkey_sa->sadb_x_sa_ref = ref; ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_sa_build(struct sadb_ext ** pfkey_ext, ++ uint16_t exttype, ++ uint32_t spi, ++ uint8_t replay_window, ++ uint8_t sa_state, ++ uint8_t auth, ++ uint8_t encrypt, ++ uint32_t flags) ++{ ++ return pfkey_sa_ref_build(pfkey_ext, ++ exttype, ++ spi, ++ replay_window, ++ sa_state, ++ auth, ++ encrypt, ++ flags, ++ IPSEC_SAREF_NULL); ++} ++ ++int ++pfkey_lifetime_build(struct sadb_ext ** pfkey_ext, ++ uint16_t exttype, ++ uint32_t allocations, ++ uint64_t bytes, ++ uint64_t addtime, ++ uint64_t usetime, ++ uint32_t packets) ++{ ++ int error = 0; ++ struct sadb_lifetime *pfkey_lifetime = (struct sadb_lifetime *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_lifetime_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_lifetime) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_lifetime_build: " ++ "why is pfkey_lifetime already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(exttype != SADB_EXT_LIFETIME_CURRENT && ++ exttype != SADB_EXT_LIFETIME_HARD && ++ exttype != SADB_EXT_LIFETIME_SOFT) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_lifetime_build: " ++ "invalid exttype=%d.\n", ++ exttype); ++ SENDERR(EINVAL); ++ } ++ ++ pfkey_lifetime = (struct sadb_lifetime*)MALLOC(sizeof(struct sadb_lifetime)); ++ *pfkey_ext = (struct sadb_ext*) pfkey_lifetime; ++ ++ if(pfkey_lifetime == NULL) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_lifetime_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_lifetime, 0, sizeof(struct sadb_lifetime)); ++ ++ pfkey_lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime) / IPSEC_PFKEYv2_ALIGN; ++ pfkey_lifetime->sadb_lifetime_exttype = exttype; ++ pfkey_lifetime->sadb_lifetime_allocations = allocations; ++ pfkey_lifetime->sadb_lifetime_bytes = bytes; ++ pfkey_lifetime->sadb_lifetime_addtime = addtime; ++ pfkey_lifetime->sadb_lifetime_usetime = usetime; ++ pfkey_lifetime->sadb_x_lifetime_packets = packets; ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_address_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint8_t proto, ++ uint8_t prefixlen, ++ struct sockaddr* address) ++{ ++ int error = 0; ++ int saddr_len = 0; ++ char ipaddr_txt[ADDRTOT_BUF + 6/*extra for port number*/]; ++ struct sadb_address *pfkey_address = (struct sadb_address *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_address_build: " ++ "exttype=%d proto=%d prefixlen=%d\n", ++ exttype, ++ proto, ++ prefixlen); ++ /* sanity checks... */ ++ if(pfkey_address) { ++ ERROR("pfkey_address_build: " ++ "why is pfkey_address already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if (!address) { ++ ERROR("pfkey_address_build: " "address is NULL\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(exttype) { ++ case SADB_EXT_ADDRESS_SRC: ++ case SADB_EXT_ADDRESS_DST: ++ case SADB_EXT_ADDRESS_PROXY: ++ case SADB_X_EXT_ADDRESS_DST2: ++ case SADB_X_EXT_ADDRESS_SRC_FLOW: ++ case SADB_X_EXT_ADDRESS_DST_FLOW: ++ case SADB_X_EXT_ADDRESS_SRC_MASK: ++ case SADB_X_EXT_ADDRESS_DST_MASK: ++#ifdef NAT_TRAVERSAL ++ case SADB_X_EXT_NAT_T_OA: ++#endif ++ break; ++ default: ++ ERROR("pfkey_address_build: " ++ "unrecognised ext_type=%d.\n", ++ exttype); ++ SENDERR(EINVAL); ++ } ++ ++ switch(address->sa_family) { ++ case AF_INET: ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_address_build: " ++ "found address family AF_INET.\n"); ++ saddr_len = sizeof(struct sockaddr_in); ++ sprintf(ipaddr_txt, "%d.%d.%d.%d:%d" ++ , (((struct sockaddr_in*)address)->sin_addr.s_addr >> 0) & 0xFF ++ , (((struct sockaddr_in*)address)->sin_addr.s_addr >> 8) & 0xFF ++ , (((struct sockaddr_in*)address)->sin_addr.s_addr >> 16) & 0xFF ++ , (((struct sockaddr_in*)address)->sin_addr.s_addr >> 24) & 0xFF ++ , ntohs(((struct sockaddr_in*)address)->sin_port)); ++ break; ++ case AF_INET6: ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_address_build: " ++ "found address family AF_INET6.\n"); ++ saddr_len = sizeof(struct sockaddr_in6); ++ sprintf(ipaddr_txt, "%x:%x:%x:%x:%x:%x:%x:%x-%x" ++ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[0]) ++ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[1]) ++ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[2]) ++ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[3]) ++ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[4]) ++ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[5]) ++ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[6]) ++ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[7]) ++ , ntohs(((struct sockaddr_in6*)address)->sin6_port)); ++ break; ++ default: ++ ERROR("pfkey_address_build: " ++ "address->sa_family=%d not supported.\n", ++ address->sa_family); ++ SENDERR(EPFNOSUPPORT); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_address_build: " ++ "found address=%s.\n", ++ ipaddr_txt); ++ if(prefixlen != 0) { ++ ERROR("pfkey_address_build: " ++ "address prefixes not supported yet.\n"); ++ SENDERR(EAFNOSUPPORT); /* not supported yet */ ++ } ++ ++ /* allocate some memory for the extension */ ++ pfkey_address = (struct sadb_address*) ++ MALLOC(ALIGN_N(sizeof(struct sadb_address) + saddr_len, IPSEC_PFKEYv2_ALIGN)); ++ *pfkey_ext = (struct sadb_ext*)pfkey_address; ++ ++ if(pfkey_address == NULL ) { ++ ERROR("pfkey_lifetime_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_address, ++ 0, ++ ALIGN_N(sizeof(struct sadb_address) + saddr_len, ++ IPSEC_PFKEYv2_ALIGN)); ++ ++ pfkey_address->sadb_address_len = DIVUP(sizeof(struct sadb_address) + saddr_len, ++ IPSEC_PFKEYv2_ALIGN); ++ ++ pfkey_address->sadb_address_exttype = exttype; ++ pfkey_address->sadb_address_proto = proto; ++ pfkey_address->sadb_address_prefixlen = prefixlen; ++ pfkey_address->sadb_address_reserved = 0; ++ ++ memcpy((char*)pfkey_address + sizeof(struct sadb_address), ++ address, ++ saddr_len); ++ ++#if 0 ++ for(i = 0; i < sizeof(struct sockaddr_in) - offsetof(struct sockaddr_in, sin_zero); i++) { ++ pfkey_address_s_ska.sin_zero[i] = 0; ++ } ++#endif ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_address_build: " ++ "successful created len: %d.\n", pfkey_address->sadb_address_len); ++ ++ errlab: ++ return error; ++} ++ ++int ++pfkey_key_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint16_t key_bits, ++ char* key) ++{ ++ int error = 0; ++ struct sadb_key *pfkey_key = (struct sadb_key *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_key_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_key) { ++ ERROR("pfkey_key_build: " ++ "why is pfkey_key already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(!key_bits) { ++ ERROR("pfkey_key_build: " ++ "key_bits is zero, it must be non-zero.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if( !((exttype == SADB_EXT_KEY_AUTH) || (exttype == SADB_EXT_KEY_ENCRYPT))) { ++ ERROR("pfkey_key_build: " ++ "unsupported extension type=%d.\n", ++ exttype); ++ SENDERR(EINVAL); ++ } ++ ++ pfkey_key = (struct sadb_key*) ++ MALLOC(sizeof(struct sadb_key) + ++ DIVUP(key_bits, 64) * IPSEC_PFKEYv2_ALIGN); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_key; ++ ++ if(pfkey_key == NULL) { ++ ERROR("pfkey_key_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_key, ++ 0, ++ sizeof(struct sadb_key) + ++ DIVUP(key_bits, 64) * IPSEC_PFKEYv2_ALIGN); ++ ++ pfkey_key->sadb_key_len = DIVUP(sizeof(struct sadb_key) * IPSEC_PFKEYv2_ALIGN + key_bits, ++ 64); ++ pfkey_key->sadb_key_exttype = exttype; ++ pfkey_key->sadb_key_bits = key_bits; ++ pfkey_key->sadb_key_reserved = 0; ++ memcpy((char*)pfkey_key + sizeof(struct sadb_key), ++ key, ++ DIVUP(key_bits, 8)); ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_ident_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint16_t ident_type, ++ uint64_t ident_id, ++ uint8_t ident_len, ++ char* ident_string) ++{ ++ int error = 0; ++ struct sadb_ident *pfkey_ident = (struct sadb_ident *)*pfkey_ext; ++ int data_len = ident_len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident); ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_ident_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_ident) { ++ ERROR("pfkey_ident_build: " ++ "why is pfkey_ident already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if( ! ((exttype == SADB_EXT_IDENTITY_SRC) || ++ (exttype == SADB_EXT_IDENTITY_DST))) { ++ ERROR("pfkey_ident_build: " ++ "unsupported extension type=%d.\n", ++ exttype); ++ SENDERR(EINVAL); ++ } ++ ++ if((ident_type == SADB_IDENTTYPE_RESERVED)) { ++ ERROR("pfkey_ident_build: " ++ "ident_type must be non-zero.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(ident_type > SADB_IDENTTYPE_MAX) { ++ ERROR("pfkey_ident_build: " ++ "identtype=%d out of range.\n", ++ ident_type); ++ SENDERR(EINVAL); ++ } ++ ++ if(((ident_type == SADB_IDENTTYPE_PREFIX) || ++ (ident_type == SADB_IDENTTYPE_FQDN)) && ++ !ident_string) { ++ ERROR("pfkey_ident_build: " ++ "string required to allocate size of extension.\n"); ++ SENDERR(EINVAL); ++ } ++ ++#if 0 ++ if((ident_type == SADB_IDENTTYPE_USERFQDN) ) { ++ } ++#endif ++ ++ pfkey_ident = (struct sadb_ident*) ++ MALLOC(ident_len * IPSEC_PFKEYv2_ALIGN); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_ident; ++ ++ if(pfkey_ident == NULL) { ++ ERROR("pfkey_ident_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_ident, 0, ident_len * IPSEC_PFKEYv2_ALIGN); ++ ++ pfkey_ident->sadb_ident_len = ident_len; ++ pfkey_ident->sadb_ident_exttype = exttype; ++ pfkey_ident->sadb_ident_type = ident_type; ++ pfkey_ident->sadb_ident_reserved = 0; ++ pfkey_ident->sadb_ident_id = ident_id; ++ memcpy((char*)pfkey_ident + sizeof(struct sadb_ident), ++ ident_string, ++ data_len); ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_sens_build(struct sadb_ext** pfkey_ext, ++ uint32_t dpd, ++ uint8_t sens_level, ++ uint8_t sens_len, ++ uint64_t* sens_bitmap, ++ uint8_t integ_level, ++ uint8_t integ_len, ++ uint64_t* integ_bitmap) ++{ ++ int error = 0; ++ struct sadb_sens *pfkey_sens = (struct sadb_sens *)*pfkey_ext; ++ int i; ++ uint64_t* bitmap; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sens_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_sens) { ++ ERROR("pfkey_sens_build: " ++ "why is pfkey_sens already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_sens_build: " ++ "Sorry, I can't build exttype=%d yet.\n", ++ (*pfkey_ext)->sadb_ext_type); ++ SENDERR(EINVAL); /* don't process these yet */ ++ ++ pfkey_sens = (struct sadb_sens*) ++ MALLOC(sizeof(struct sadb_sens) + ++ (sens_len + integ_len) * sizeof(uint64_t)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_sens; ++ ++ if(pfkey_sens == NULL) { ++ ERROR("pfkey_sens_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_sens, ++ 0, ++ sizeof(struct sadb_sens) + ++ (sens_len + integ_len) * sizeof(uint64_t)); ++ ++ pfkey_sens->sadb_sens_len = (sizeof(struct sadb_sens) + ++ (sens_len + integ_len) * sizeof(uint64_t)) / IPSEC_PFKEYv2_ALIGN; ++ pfkey_sens->sadb_sens_exttype = SADB_EXT_SENSITIVITY; ++ pfkey_sens->sadb_sens_dpd = dpd; ++ pfkey_sens->sadb_sens_sens_level = sens_level; ++ pfkey_sens->sadb_sens_sens_len = sens_len; ++ pfkey_sens->sadb_sens_integ_level = integ_level; ++ pfkey_sens->sadb_sens_integ_len = integ_len; ++ pfkey_sens->sadb_sens_reserved = 0; ++ ++ bitmap = (uint64_t*)((char*)pfkey_ext + sizeof(struct sadb_sens)); ++ for(i = 0; i < sens_len; i++) { ++ *bitmap = sens_bitmap[i]; ++ bitmap++; ++ } ++ for(i = 0; i < integ_len; i++) { ++ *bitmap = integ_bitmap[i]; ++ bitmap++; ++ } ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_prop_build(struct sadb_ext** pfkey_ext, ++ uint8_t replay, ++ unsigned int comb_num, ++ struct sadb_comb* comb) ++{ ++ int error = 0; ++ int i; ++ struct sadb_prop *pfkey_prop = (struct sadb_prop *)*pfkey_ext; ++ struct sadb_comb *combp; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_prop_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_prop) { ++ ERROR("pfkey_prop_build: " ++ "why is pfkey_prop already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ pfkey_prop = (struct sadb_prop*) ++ MALLOC(sizeof(struct sadb_prop) + ++ comb_num * sizeof(struct sadb_comb)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_prop; ++ ++ if(pfkey_prop == NULL) { ++ ERROR("pfkey_prop_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_prop, ++ 0, ++ sizeof(struct sadb_prop) + ++ comb_num * sizeof(struct sadb_comb)); ++ ++ pfkey_prop->sadb_prop_len = (sizeof(struct sadb_prop) + ++ comb_num * sizeof(struct sadb_comb)) / IPSEC_PFKEYv2_ALIGN; ++ ++ pfkey_prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; ++ pfkey_prop->sadb_prop_replay = replay; ++ ++ for(i=0; i<3; i++) { ++ pfkey_prop->sadb_prop_reserved[i] = 0; ++ } ++ ++ combp = (struct sadb_comb*)((char*)*pfkey_ext + sizeof(struct sadb_prop)); ++ for(i = 0; i < comb_num; i++) { ++ memcpy (combp, &(comb[i]), sizeof(struct sadb_comb)); ++ combp++; ++ } ++ ++#if 0 ++ uint8_t sadb_comb_auth; ++ uint8_t sadb_comb_encrypt; ++ uint16_t sadb_comb_flags; ++ uint16_t sadb_comb_auth_minbits; ++ uint16_t sadb_comb_auth_maxbits; ++ uint16_t sadb_comb_encrypt_minbits; ++ uint16_t sadb_comb_encrypt_maxbits; ++ uint32_t sadb_comb_reserved; ++ uint32_t sadb_comb_soft_allocations; ++ uint32_t sadb_comb_hard_allocations; ++ uint64_t sadb_comb_soft_bytes; ++ uint64_t sadb_comb_hard_bytes; ++ uint64_t sadb_comb_soft_addtime; ++ uint64_t sadb_comb_hard_addtime; ++ uint64_t sadb_comb_soft_usetime; ++ uint64_t sadb_comb_hard_usetime; ++ uint32_t sadb_comb_soft_packets; ++ uint32_t sadb_comb_hard_packets; ++#endif ++errlab: ++ return error; ++} ++ ++int ++pfkey_supported_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ unsigned int alg_num, ++ struct sadb_alg* alg) ++{ ++ int error = 0; ++ unsigned int i; ++ struct sadb_supported *pfkey_supported = (struct sadb_supported *)*pfkey_ext; ++ struct sadb_alg *pfkey_alg; ++ ++ /* sanity checks... */ ++ if(pfkey_supported) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_supported_build: " ++ "why is pfkey_supported already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if( !((exttype == SADB_EXT_SUPPORTED_AUTH) || (exttype == SADB_EXT_SUPPORTED_ENCRYPT))) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_supported_build: " ++ "unsupported extension type=%d.\n", ++ exttype); ++ SENDERR(EINVAL); ++ } ++ ++ pfkey_supported = (struct sadb_supported*) ++ MALLOC(sizeof(struct sadb_supported) + ++ alg_num * ++ sizeof(struct sadb_alg)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_supported; ++ ++ if(pfkey_supported == NULL) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_supported_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_supported, ++ 0, ++ sizeof(struct sadb_supported) + ++ alg_num * ++ sizeof(struct sadb_alg)); ++ ++ pfkey_supported->sadb_supported_len = (sizeof(struct sadb_supported) + ++ alg_num * ++ sizeof(struct sadb_alg)) / ++ IPSEC_PFKEYv2_ALIGN; ++ pfkey_supported->sadb_supported_exttype = exttype; ++ pfkey_supported->sadb_supported_reserved = 0; ++ ++ pfkey_alg = (struct sadb_alg*)((char*)pfkey_supported + sizeof(struct sadb_supported)); ++ for(i = 0; i < alg_num; i++) { ++ memcpy (pfkey_alg, &(alg[i]), sizeof(struct sadb_alg)); ++ pfkey_alg->sadb_alg_reserved = 0; ++ pfkey_alg++; ++ } ++ ++#if 0 ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_supported_build: " ++ "Sorry, I can't build exttype=%d yet.\n", ++ (*pfkey_ext)->sadb_ext_type); ++ SENDERR(EINVAL); /* don't process these yet */ ++ ++ uint8_t sadb_alg_id; ++ uint8_t sadb_alg_ivlen; ++ uint16_t sadb_alg_minbits; ++ uint16_t sadb_alg_maxbits; ++ uint16_t sadb_alg_reserved; ++#endif ++errlab: ++ return error; ++} ++ ++int ++pfkey_spirange_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint32_t min, /* in network order */ ++ uint32_t max) /* in network order */ ++{ ++ int error = 0; ++ struct sadb_spirange *pfkey_spirange = (struct sadb_spirange *)*pfkey_ext; ++ ++ /* sanity checks... */ ++ if(pfkey_spirange) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_spirange_build: " ++ "why is pfkey_spirange already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(ntohl(max) < ntohl(min)) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_spirange_build: " ++ "minspi=%08x must be < maxspi=%08x.\n", ++ ntohl(min), ++ ntohl(max)); ++ SENDERR(EINVAL); ++ } ++ ++ if(ntohl(min) <= 255) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_spirange_build: " ++ "minspi=%08x must be > 255.\n", ++ ntohl(min)); ++ SENDERR(EEXIST); ++ } ++ ++ pfkey_spirange = (struct sadb_spirange*) ++ MALLOC(sizeof(struct sadb_spirange)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_spirange; ++ ++ if(pfkey_spirange == NULL) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_spirange_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_spirange, ++ 0, ++ sizeof(struct sadb_spirange)); ++ ++ pfkey_spirange->sadb_spirange_len = sizeof(struct sadb_spirange) / IPSEC_PFKEYv2_ALIGN; ++ ++ pfkey_spirange->sadb_spirange_exttype = SADB_EXT_SPIRANGE; ++ pfkey_spirange->sadb_spirange_min = min; ++ pfkey_spirange->sadb_spirange_max = max; ++ pfkey_spirange->sadb_spirange_reserved = 0; ++ errlab: ++ return error; ++} ++ ++int ++pfkey_x_kmprivate_build(struct sadb_ext** pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_x_kmprivate *pfkey_x_kmprivate = (struct sadb_x_kmprivate *)*pfkey_ext; ++ ++ /* sanity checks... */ ++ if(pfkey_x_kmprivate) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_kmprivate_build: " ++ "why is pfkey_x_kmprivate already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ pfkey_x_kmprivate->sadb_x_kmprivate_reserved = 0; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_kmprivate_build: " ++ "Sorry, I can't build exttype=%d yet.\n", ++ (*pfkey_ext)->sadb_ext_type); ++ SENDERR(EINVAL); /* don't process these yet */ ++ ++ pfkey_x_kmprivate = (struct sadb_x_kmprivate*) ++ MALLOC(sizeof(struct sadb_x_kmprivate)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_x_kmprivate; ++ ++ if(pfkey_x_kmprivate == NULL) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_kmprivate_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_x_kmprivate, ++ 0, ++ sizeof(struct sadb_x_kmprivate)); ++ ++ pfkey_x_kmprivate->sadb_x_kmprivate_len = ++ sizeof(struct sadb_x_kmprivate) / IPSEC_PFKEYv2_ALIGN; ++ ++ pfkey_x_kmprivate->sadb_x_kmprivate_exttype = SADB_X_EXT_KMPRIVATE; ++ pfkey_x_kmprivate->sadb_x_kmprivate_reserved = 0; ++errlab: ++ return error; ++} ++ ++int ++pfkey_x_satype_build(struct sadb_ext** pfkey_ext, ++ uint8_t satype) ++{ ++ int error = 0; ++ int i; ++ struct sadb_x_satype *pfkey_x_satype = (struct sadb_x_satype *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_satype_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_x_satype) { ++ ERROR("pfkey_x_satype_build: " ++ "why is pfkey_x_satype already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(!satype) { ++ ERROR("pfkey_x_satype_build: " ++ "SA type not set, must be non-zero.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(satype > SADB_SATYPE_MAX) { ++ ERROR("pfkey_x_satype_build: " ++ "satype %d > max %d\n", ++ satype, SADB_SATYPE_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ pfkey_x_satype = (struct sadb_x_satype*) ++ MALLOC(sizeof(struct sadb_x_satype)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_x_satype; ++ if(pfkey_x_satype == NULL) { ++ ERROR("pfkey_x_satype_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ memset(pfkey_x_satype, ++ 0, ++ sizeof(struct sadb_x_satype)); ++ ++ pfkey_x_satype->sadb_x_satype_len = sizeof(struct sadb_x_satype) / IPSEC_PFKEYv2_ALIGN; ++ ++ pfkey_x_satype->sadb_x_satype_exttype = SADB_X_EXT_SATYPE2; ++ pfkey_x_satype->sadb_x_satype_satype = satype; ++ for(i=0; i<3; i++) { ++ pfkey_x_satype->sadb_x_satype_reserved[i] = 0; ++ } ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_x_debug_build(struct sadb_ext** pfkey_ext, ++ uint32_t tunnel, ++ uint32_t netlink, ++ uint32_t xform, ++ uint32_t eroute, ++ uint32_t spi, ++ uint32_t radij, ++ uint32_t esp, ++ uint32_t ah, ++ uint32_t rcv, ++ uint32_t pfkey, ++ uint32_t ipcomp, ++ uint32_t verbose) ++{ ++ int error = 0; ++ int i; ++ struct sadb_x_debug *pfkey_x_debug = (struct sadb_x_debug *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_debug_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_x_debug) { ++ ERROR("pfkey_x_debug_build: " ++ "why is pfkey_x_debug already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_debug_build: " ++ "tunnel=%x netlink=%x xform=%x eroute=%x spi=%x radij=%x esp=%x ah=%x rcv=%x pfkey=%x ipcomp=%x verbose=%x?\n", ++ tunnel, netlink, xform, eroute, spi, radij, esp, ah, rcv, pfkey, ipcomp, verbose); ++ ++ pfkey_x_debug = (struct sadb_x_debug*) ++ MALLOC(sizeof(struct sadb_x_debug)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_x_debug; ++ ++ if(pfkey_x_debug == NULL) { ++ ERROR("pfkey_x_debug_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++#if 0 ++ memset(pfkey_x_debug, ++ 0, ++ sizeof(struct sadb_x_debug)); ++#endif ++ ++ pfkey_x_debug->sadb_x_debug_len = sizeof(struct sadb_x_debug) / IPSEC_PFKEYv2_ALIGN; ++ pfkey_x_debug->sadb_x_debug_exttype = SADB_X_EXT_DEBUG; ++ ++ pfkey_x_debug->sadb_x_debug_tunnel = tunnel; ++ pfkey_x_debug->sadb_x_debug_netlink = netlink; ++ pfkey_x_debug->sadb_x_debug_xform = xform; ++ pfkey_x_debug->sadb_x_debug_eroute = eroute; ++ pfkey_x_debug->sadb_x_debug_spi = spi; ++ pfkey_x_debug->sadb_x_debug_radij = radij; ++ pfkey_x_debug->sadb_x_debug_esp = esp; ++ pfkey_x_debug->sadb_x_debug_ah = ah; ++ pfkey_x_debug->sadb_x_debug_rcv = rcv; ++ pfkey_x_debug->sadb_x_debug_pfkey = pfkey; ++ pfkey_x_debug->sadb_x_debug_ipcomp = ipcomp; ++ pfkey_x_debug->sadb_x_debug_verbose = verbose; ++ ++ for(i=0; i<4; i++) { ++ pfkey_x_debug->sadb_x_debug_reserved[i] = 0; ++ } ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_x_nat_t_type_build(struct sadb_ext** pfkey_ext, ++ uint8_t type) ++{ ++ int error = 0; ++ int i; ++ struct sadb_x_nat_t_type *pfkey_x_nat_t_type = (struct sadb_x_nat_t_type *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_nat_t_type_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_x_nat_t_type) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_nat_t_type_build: " ++ "why is pfkey_x_nat_t_type already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_nat_t_type_build: " ++ "type=%d\n", type); ++ ++ pfkey_x_nat_t_type = (struct sadb_x_nat_t_type*) ++ MALLOC(sizeof(struct sadb_x_nat_t_type)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_x_nat_t_type; ++ ++ if(pfkey_x_nat_t_type == NULL) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_nat_t_type_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ ++ pfkey_x_nat_t_type->sadb_x_nat_t_type_len = sizeof(struct sadb_x_nat_t_type) / IPSEC_PFKEYv2_ALIGN; ++ pfkey_x_nat_t_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE; ++ pfkey_x_nat_t_type->sadb_x_nat_t_type_type = type; ++ for(i=0; i<3; i++) { ++ pfkey_x_nat_t_type->sadb_x_nat_t_type_reserved[i] = 0; ++ } ++ ++errlab: ++ return error; ++} ++int ++pfkey_x_nat_t_port_build(struct sadb_ext** pfkey_ext, ++ uint16_t exttype, ++ uint16_t port) ++{ ++ int error = 0; ++ struct sadb_x_nat_t_port *pfkey_x_nat_t_port = (struct sadb_x_nat_t_port *)*pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_nat_t_port_build:\n"); ++ /* sanity checks... */ ++ if(pfkey_x_nat_t_port) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_nat_t_port_build: " ++ "why is pfkey_x_nat_t_port already pointing to something?\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(exttype) { ++ case SADB_X_EXT_NAT_T_SPORT: ++ case SADB_X_EXT_NAT_T_DPORT: ++ break; ++ default: ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_nat_t_port_build: " ++ "unrecognised ext_type=%d.\n", ++ exttype); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_nat_t_port_build: " ++ "ext=%d, port=%d\n", exttype, port); ++ ++ pfkey_x_nat_t_port = (struct sadb_x_nat_t_port*) ++ MALLOC(sizeof(struct sadb_x_nat_t_port)); ++ ++ *pfkey_ext = (struct sadb_ext*)pfkey_x_nat_t_port; ++ ++ if(pfkey_x_nat_t_port == NULL) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_x_nat_t_port_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ ++ pfkey_x_nat_t_port->sadb_x_nat_t_port_len = sizeof(struct sadb_x_nat_t_port) / IPSEC_PFKEYv2_ALIGN; ++ pfkey_x_nat_t_port->sadb_x_nat_t_port_exttype = exttype; ++ pfkey_x_nat_t_port->sadb_x_nat_t_port_port = port; ++ pfkey_x_nat_t_port->sadb_x_nat_t_port_reserved = 0; ++ ++errlab: ++ return error; ++} ++ ++int pfkey_x_protocol_build(struct sadb_ext **pfkey_ext, ++ uint8_t protocol) ++{ ++ int error = 0; ++ struct sadb_protocol * p = (struct sadb_protocol *)*pfkey_ext; ++ DEBUGGING(PF_KEY_DEBUG_BUILD,"pfkey_x_protocol_build: protocol=%u\n", protocol); ++ /* sanity checks... */ ++ if (p != 0) { ++ ERROR("pfkey_x_protocol_build: bogus protocol pointer\n"); ++ SENDERR(EINVAL); ++ } ++ if ((p = (struct sadb_protocol*)MALLOC(sizeof(*p))) == 0) { ++ ERROR("pfkey_build: memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ *pfkey_ext = (struct sadb_ext *)p; ++ p->sadb_protocol_len = sizeof(*p) / sizeof(uint64_t); ++ p->sadb_protocol_exttype = SADB_X_EXT_PROTOCOL; ++ p->sadb_protocol_proto = protocol; ++ p->sadb_protocol_flags = 0; ++ p->sadb_protocol_reserved2 = 0; ++ errlab: ++ return error; ++} ++ ++int ++pfkey_msg_build(struct sadb_msg **pfkey_msg, struct sadb_ext *extensions[], int dir) ++{ ++ int error = 0; ++ unsigned ext; ++ unsigned total_size; ++ struct sadb_ext *pfkey_ext; ++ int extensions_seen = 0; ++#ifndef __KERNEL__ ++ struct sadb_ext *extensions_check[SADB_EXT_MAX + 1]; ++#endif ++ ++ if(!extensions[0]) { ++ ERROR("pfkey_msg_build: " ++ "extensions[0] must be specified (struct sadb_msg).\n"); ++ SENDERR(EINVAL); ++ } ++ ++ /* figure out the total size for all the requested extensions */ ++ total_size = IPSEC_PFKEYv2_WORDS(sizeof(struct sadb_msg)); ++ for(ext = 1; ext <= SADB_EXT_MAX; ext++) { ++ if(extensions[ext]) { ++ total_size += (extensions[ext])->sadb_ext_len; ++ } ++ } ++ ++ /* allocate that much space */ ++ *pfkey_msg = (struct sadb_msg*)MALLOC(total_size * IPSEC_PFKEYv2_ALIGN); ++ if(*pfkey_msg == NULL) { ++ ERROR("pfkey_msg_build: " ++ "memory allocation failed\n"); ++ SENDERR(ENOMEM); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_build: " ++ "pfkey_msg=0p%p allocated %lu bytes, &(extensions[0])=0p%p\n", ++ *pfkey_msg, ++ (unsigned long)(total_size * IPSEC_PFKEYv2_ALIGN), ++ &(extensions[0])); ++ ++ memcpy(*pfkey_msg, ++ extensions[0], ++ sizeof(struct sadb_msg)); ++ (*pfkey_msg)->sadb_msg_len = total_size; ++ (*pfkey_msg)->sadb_msg_reserved = 0; ++ extensions_seen = 1 ; ++ ++ /* ++ * point pfkey_ext to immediately after the space for the header, ++ * i.e. at the first extension location. ++ */ ++ pfkey_ext = (struct sadb_ext*)(((char*)(*pfkey_msg)) + sizeof(struct sadb_msg)); ++ ++ for(ext = 1; ext <= SADB_EXT_MAX; ext++) { ++ /* copy from extension[ext] to buffer */ ++ if(extensions[ext]) { ++ /* Is this type of extension permitted for this type of message? */ ++ if(!(extensions_bitmaps[dir][EXT_BITS_PERM][(*pfkey_msg)->sadb_msg_type] & ++ 1<sadb_msg_type], ++ 1<sadb_ext_len * IPSEC_PFKEYv2_ALIGN), ++ ext, ++ extensions[ext]->sadb_ext_type); ++ ++ memcpy(pfkey_ext, ++ extensions[ext], ++ (extensions[ext])->sadb_ext_len * IPSEC_PFKEYv2_ALIGN); ++ { ++ char *pfkey_ext_c = (char *)pfkey_ext; ++ ++ pfkey_ext_c += (extensions[ext])->sadb_ext_len * IPSEC_PFKEYv2_ALIGN; ++ pfkey_ext = (struct sadb_ext *)pfkey_ext_c; ++ } ++ ++ /* Mark that we have seen this extension and remember the header location */ ++ extensions_seen |= ( 1 << ext ); ++ } ++ } ++ ++ /* check required extensions */ ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_build: " ++ "extensions permitted=%08x, seen=%08x, required=%08x.\n", ++ extensions_bitmaps[dir][EXT_BITS_PERM][(*pfkey_msg)->sadb_msg_type], ++ extensions_seen, ++ extensions_bitmaps[dir][EXT_BITS_REQ][(*pfkey_msg)->sadb_msg_type]); ++ ++ if((extensions_seen & ++ extensions_bitmaps[dir][EXT_BITS_REQ][(*pfkey_msg)->sadb_msg_type]) != ++ extensions_bitmaps[dir][EXT_BITS_REQ][(*pfkey_msg)->sadb_msg_type]) { ++ DEBUGGING(PF_KEY_DEBUG_BUILD, ++ "pfkey_msg_build: " ++ "required extensions missing:%08x.\n", ++ extensions_bitmaps[dir][EXT_BITS_REQ][(*pfkey_msg)->sadb_msg_type] - ++ (extensions_seen & ++ extensions_bitmaps[dir][EXT_BITS_REQ][(*pfkey_msg)->sadb_msg_type]) ); ++ SENDERR(EINVAL); ++ } ++ ++#ifndef __KERNEL__ ++/* ++ * this is silly, there is no need to reparse the message that we just built. ++ * ++ */ ++ if((error = pfkey_msg_parse(*pfkey_msg, NULL, extensions_check, dir))) { ++ ERROR( ++ "pfkey_msg_build: " ++ "Trouble parsing newly built pfkey message, error=%d.\n", ++ error); ++ SENDERR(-error); ++ } ++#endif ++ ++errlab: ++ ++ return error; ++} ++ ++/* ++ * $Log: pfkey_v2_build.c,v $ ++ * Revision 1.51.8.1 2006-05-01 14:36:39 mcr ++ * get rid of dead code. ++ * ++ * Revision 1.51 2004/10/03 01:26:36 mcr ++ * fixes for gcc 3.4 compilation. ++ * ++ * Revision 1.50 2004/07/10 07:48:35 mcr ++ * Moved from linux/lib/libfreeswan/pfkey_v2_build.c,v ++ * ++ * Revision 1.49 2004/04/12 02:59:06 mcr ++ * erroneously moved pfkey_v2_build.c ++ * ++ * Revision 1.48 2004/04/09 18:00:40 mcr ++ * Moved from linux/lib/libfreeswan/pfkey_v2_build.c,v ++ * ++ * Revision 1.47 2004/03/08 01:59:08 ken ++ * freeswan.h -> openswan.h ++ * ++ * Revision 1.46 2003/12/10 01:20:19 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.45 2003/12/04 23:01:12 mcr ++ * removed ipsec_netlink.h ++ * ++ * Revision 1.44 2003/10/31 02:27:12 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.43.4.2 2003/10/29 01:11:32 mcr ++ * added debugging for pfkey library. ++ * ++ * Revision 1.43.4.1 2003/09/21 13:59:44 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.43 2003/05/07 17:29:17 mcr ++ * new function pfkey_debug_func added for us in debugging from ++ * pfkey library. ++ * ++ * Revision 1.42 2003/01/30 02:32:09 rgb ++ * ++ * Rename SAref table macro names for clarity. ++ * Convert IPsecSAref_t from signed to unsigned to fix apparent SAref exhaustion bug. ++ * ++ * Revision 1.41 2002/12/13 18:16:02 mcr ++ * restored sa_ref code ++ * ++ * Revision 1.40 2002/12/13 18:06:52 mcr ++ * temporarily removed sadb_x_sa_ref reference for 2.xx ++ * ++ * Revision 1.39 2002/12/13 17:43:28 mcr ++ * commented out access to sadb_x_sa_ref for 2.xx branch ++ * ++ * Revision 1.38 2002/10/09 03:12:05 dhr ++ * ++ * [kenb+dhr] 64-bit fixes ++ * ++ * Revision 1.37 2002/09/20 15:40:39 rgb ++ * Added new function pfkey_sa_ref_build() to accomodate saref parameter. ++ * ++ * Revision 1.36 2002/09/20 05:01:22 rgb ++ * Generalise for platform independance: fix (ia64) using unsigned for sizes. ++ * ++ * Revision 1.35 2002/07/24 18:44:54 rgb ++ * Type fiddling to tame ia64 compiler. ++ * ++ * Revision 1.34 2002/05/23 07:14:11 rgb ++ * Cleaned up %p variants to 0p%p for test suite cleanup. ++ * ++ * Revision 1.33 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.32 2002/04/24 07:36:40 mcr ++ * Moved from ./lib/pfkey_v2_build.c,v ++ * ++ * Revision 1.31 2002/01/29 22:25:35 rgb ++ * Re-add ipsec_kversion.h to keep MALLOC happy. ++ * ++ * Revision 1.30 2002/01/29 01:59:09 mcr ++ * removal of kversions.h - sources that needed it now use ipsec_param.h. ++ * updating of IPv6 structures to match latest in6.h version. ++ * removed dead code from openswan.h that also duplicated kversions.h ++ * code. ++ * ++ * Revision 1.29 2001/12/19 21:06:09 rgb ++ * Added port numbers to pfkey_address_build() debugging. ++ * ++ * Revision 1.28 2001/11/06 19:47:47 rgb ++ * Added packet parameter to lifetime and comb structures. ++ * ++ * Revision 1.27 2001/10/18 04:45:24 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/openswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.26 2001/09/08 21:13:34 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.25 2001/06/14 19:35:16 rgb ++ * Update copyright date. ++ * ++ * Revision 1.24 2001/03/20 03:49:45 rgb ++ * Ditch superfluous debug_pfkey declaration. ++ * Move misplaced openswan.h inclusion for kernel case. ++ * ++ * Revision 1.23 2001/03/16 07:41:50 rgb ++ * Put openswan.h include before pluto includes. ++ * ++ * Revision 1.22 2001/02/27 22:24:56 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.21 2000/11/17 18:10:30 rgb ++ * Fixed bugs mostly relating to spirange, to treat all spi variables as ++ * network byte order since this is the way PF_KEYv2 stored spis. ++ * ++ * Revision 1.20 2000/10/12 00:02:39 rgb ++ * Removed 'format, ##' nonsense from debug macros for RH7.0. ++ * ++ * Revision 1.19 2000/10/10 20:10:20 rgb ++ * Added support for debug_ipcomp and debug_verbose to klipsdebug. ++ * ++ * Revision 1.18 2000/09/12 18:59:54 rgb ++ * Added Gerhard's IPv6 support to pfkey parts of libopenswan. ++ * ++ * Revision 1.17 2000/09/12 03:27:00 rgb ++ * Moved DEBUGGING definition to compile kernel with debug off. ++ * ++ * Revision 1.16 2000/09/08 19:22:12 rgb ++ * Fixed pfkey_prop_build() parameter to be only single indirection. ++ * Fixed struct alg copy. ++ * ++ * Revision 1.15 2000/08/20 21:40:01 rgb ++ * Added an address parameter sanity check to pfkey_address_build(). ++ * ++ * Revision 1.14 2000/08/15 17:29:23 rgb ++ * Fixes from SZI to untested pfkey_prop_build(). ++ * ++ * Revision 1.13 2000/06/02 22:54:14 rgb ++ * Added Gerhard Gessler's struct sockaddr_storage mods for IPv6 support. ++ * ++ * Revision 1.12 2000/05/10 19:24:01 rgb ++ * Fleshed out sensitivity, proposal and supported extensions. ++ * ++ * Revision 1.11 2000/03/16 14:07:23 rgb ++ * Renamed ALIGN macro to avoid fighting with others in kernel. ++ * ++ * Revision 1.10 2000/01/24 21:14:35 rgb ++ * Added disabled pluto pfkey lib debug flag. ++ * ++ * Revision 1.9 2000/01/21 06:27:32 rgb ++ * Added address cases for eroute flows. ++ * Removed unused code. ++ * Dropped unused argument to pfkey_x_satype_build(). ++ * Indented compiler directives for readability. ++ * Added klipsdebug switching capability. ++ * Fixed SADB_EXT_MAX bug not permitting last extension access. ++ * ++ * Revision 1.8 1999/12/29 21:17:41 rgb ++ * Changed pfkey_msg_build() I/F to include a struct sadb_msg** ++ * parameter for cleaner manipulation of extensions[] and to guard ++ * against potential memory leaks. ++ * Changed the I/F to pfkey_msg_free() for the same reason. ++ * ++ * Revision 1.7 1999/12/09 23:12:20 rgb ++ * Removed unused cruft. ++ * Added argument to pfkey_sa_build() to do eroutes. ++ * Fixed exttype check in as yet unused pfkey_lifetime_build(). ++ * ++ * Revision 1.6 1999/12/07 19:54:29 rgb ++ * Removed static pluto debug flag. ++ * Added functions for pfkey message and extensions initialisation ++ * and cleanup. ++ * ++ * Revision 1.5 1999/12/01 22:20:06 rgb ++ * Changed pfkey_sa_build to accept an SPI in network byte order. ++ * Added to quiet userspace compiler. ++ * Moved pfkey_lib_debug variable into the library. ++ * Removed SATYPE check from pfkey_msg_hdr_build so FLUSH will work. ++ * Added extension assembly debugging. ++ * Isolated assignment with brackets to be sure of scope. ++ * ++ * Revision 1.4 1999/11/27 11:57:35 rgb ++ * Added ipv6 headers. ++ * Remove over-zealous algorithm sanity checkers from pfkey_sa_build. ++ * Debugging error messages added. ++ * Fixed missing auth and encrypt assignment bug. ++ * Add argument to pfkey_msg_parse() for direction. ++ * Move parse-after-build check inside pfkey_msg_build(). ++ * Consolidated the 4 1-d extension bitmap arrays into one 4-d array. ++ * Add CVS log entry to bottom of file. ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/pfkey_v2_debug.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,181 @@ ++/* ++ * @(#) pfkey version 2 debugging messages ++ * ++ * Copyright (C) 2001 Richard Guy Briggs ++ * and Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: pfkey_v2_debug.c,v 1.11 2005-04-06 17:45:16 mcr Exp $ ++ * ++ */ ++ ++#ifdef __KERNEL__ ++ ++# include /* for printk */ ++ ++# include "openswan/ipsec_kversion.h" /* for malloc switch */ ++# ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++# else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++# endif /* MALLOC_SLAB */ ++# include /* error codes */ ++# include /* size_t */ ++# include /* mark_bh */ ++ ++# include /* struct device, and other headers */ ++# include /* eth_type_trans */ ++extern int debug_pfkey; ++ ++#else /* __KERNEL__ */ ++ ++# include ++# include ++# include ++ ++#endif /* __KERNEL__ */ ++ ++#include "openswan.h" ++#include "pfkeyv2.h" ++#include "pfkey.h" ++ ++/* ++ * This file provides ASCII translations of PF_KEY magic numbers. ++ * ++ */ ++ ++static char *pfkey_sadb_ext_strings[]={ ++ "reserved", /* SADB_EXT_RESERVED 0 */ ++ "security-association", /* SADB_EXT_SA 1 */ ++ "lifetime-current", /* SADB_EXT_LIFETIME_CURRENT 2 */ ++ "lifetime-hard", /* SADB_EXT_LIFETIME_HARD 3 */ ++ "lifetime-soft", /* SADB_EXT_LIFETIME_SOFT 4 */ ++ "source-address", /* SADB_EXT_ADDRESS_SRC 5 */ ++ "destination-address", /* SADB_EXT_ADDRESS_DST 6 */ ++ "proxy-address", /* SADB_EXT_ADDRESS_PROXY 7 */ ++ "authentication-key", /* SADB_EXT_KEY_AUTH 8 */ ++ "cipher-key", /* SADB_EXT_KEY_ENCRYPT 9 */ ++ "source-identity", /* SADB_EXT_IDENTITY_SRC 10 */ ++ "destination-identity", /* SADB_EXT_IDENTITY_DST 11 */ ++ "sensitivity-label", /* SADB_EXT_SENSITIVITY 12 */ ++ "proposal", /* SADB_EXT_PROPOSAL 13 */ ++ "supported-auth", /* SADB_EXT_SUPPORTED_AUTH 14 */ ++ "supported-cipher", /* SADB_EXT_SUPPORTED_ENCRYPT 15 */ ++ "spi-range", /* SADB_EXT_SPIRANGE 16 */ ++ "X-kmpprivate", /* SADB_X_EXT_KMPRIVATE 17 */ ++ "X-satype2", /* SADB_X_EXT_SATYPE2 18 */ ++ "X-security-association", /* SADB_X_EXT_SA2 19 */ ++ "X-destination-address2", /* SADB_X_EXT_ADDRESS_DST2 20 */ ++ "X-source-flow-address", /* SADB_X_EXT_ADDRESS_SRC_FLOW 21 */ ++ "X-dest-flow-address", /* SADB_X_EXT_ADDRESS_DST_FLOW 22 */ ++ "X-source-mask", /* SADB_X_EXT_ADDRESS_SRC_MASK 23 */ ++ "X-dest-mask", /* SADB_X_EXT_ADDRESS_DST_MASK 24 */ ++ "X-set-debug", /* SADB_X_EXT_DEBUG 25 */ ++ /* NAT_TRAVERSAL */ ++ "X-NAT-T-type", /* SADB_X_EXT_NAT_T_TYPE 26 */ ++ "X-NAT-T-sport", /* SADB_X_EXT_NAT_T_SPORT 27 */ ++ "X-NAT-T-dport", /* SADB_X_EXT_NAT_T_DPORT 28 */ ++ "X-NAT-T-OA", /* SADB_X_EXT_NAT_T_OA 29 */ ++}; ++ ++const char * ++pfkey_v2_sadb_ext_string(int ext) ++{ ++ if(ext <= SADB_EXT_MAX) { ++ return pfkey_sadb_ext_strings[ext]; ++ } else { ++ return "unknown-ext"; ++ } ++} ++ ++ ++static char *pfkey_sadb_type_strings[]={ ++ "reserved", /* SADB_RESERVED */ ++ "getspi", /* SADB_GETSPI */ ++ "update", /* SADB_UPDATE */ ++ "add", /* SADB_ADD */ ++ "delete", /* SADB_DELETE */ ++ "get", /* SADB_GET */ ++ "acquire", /* SADB_ACQUIRE */ ++ "register", /* SADB_REGISTER */ ++ "expire", /* SADB_EXPIRE */ ++ "flush", /* SADB_FLUSH */ ++ "dump", /* SADB_DUMP */ ++ "x-promisc", /* SADB_X_PROMISC */ ++ "x-pchange", /* SADB_X_PCHANGE */ ++ "x-groupsa", /* SADB_X_GRPSA */ ++ "x-addflow(eroute)", /* SADB_X_ADDFLOW */ ++ "x-delflow(eroute)", /* SADB_X_DELFLOW */ ++ "x-debug", /* SADB_X_DEBUG */ ++}; ++ ++const char * ++pfkey_v2_sadb_type_string(int sadb_type) ++{ ++ if(sadb_type <= SADB_MAX) { ++ return pfkey_sadb_type_strings[sadb_type]; ++ } else { ++ return "unknown-sadb-type"; ++ } ++} ++ ++ ++ ++ ++/* ++ * $Log: pfkey_v2_debug.c,v $ ++ * Revision 1.11 2005-04-06 17:45:16 mcr ++ * always include NAT-T names. ++ * ++ * Revision 1.10 2004/07/10 07:48:35 mcr ++ * Moved from linux/lib/libfreeswan/pfkey_v2_debug.c,v ++ * ++ * Revision 1.9 2004/03/08 01:59:08 ken ++ * freeswan.h -> openswan.h ++ * ++ * Revision 1.8 2003/12/10 01:20:19 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.7 2002/09/20 05:01:26 rgb ++ * Fixed limit inclusion error in both type and ext string conversion. ++ * ++ * Revision 1.6 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.5 2002/04/24 07:36:40 mcr ++ * Moved from ./lib/pfkey_v2_debug.c,v ++ * ++ * Revision 1.4 2002/01/29 22:25:36 rgb ++ * Re-add ipsec_kversion.h to keep MALLOC happy. ++ * ++ * Revision 1.3 2002/01/29 01:59:09 mcr ++ * removal of kversions.h - sources that needed it now use ipsec_param.h. ++ * updating of IPv6 structures to match latest in6.h version. ++ * removed dead code from openswan.h that also duplicated kversions.h ++ * code. ++ * ++ * Revision 1.2 2002/01/20 20:34:50 mcr ++ * added pfkey_v2_sadb_type_string to decode sadb_type to string. ++ * ++ * Revision 1.1 2001/11/27 05:30:06 mcr ++ * initial set of debug strings for pfkey debugging. ++ * this will eventually only be included for debug builds. ++ * ++ * Revision 1.1 2001/09/21 04:12:03 mcr ++ * first compilable version. ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/pfkey_v2_ext_bits.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,814 @@ ++/* ++ * RFC2367 PF_KEYv2 Key management API message parser ++ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: pfkey_v2_ext_bits.c,v 1.22 2005-05-11 01:45:31 mcr Exp $ ++ */ ++ ++/* ++ * Template from klips/net/ipsec/ipsec/ipsec_parse.c. ++ */ ++ ++char pfkey_v2_ext_bits_c_version[] = "$Id: pfkey_v2_ext_bits.c,v 1.22 2005-05-11 01:45:31 mcr Exp $"; ++ ++/* ++ * Some ugly stuff to allow consistent debugging code for use in the ++ * kernel and in user space ++*/ ++ ++#ifdef __KERNEL__ ++ ++# include /* for printk */ ++ ++# include "openswan/ipsec_kversion.h" /* for malloc switch */ ++# ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++# else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++# endif /* MALLOC_SLAB */ ++# include /* error codes */ ++# include /* size_t */ ++# include /* mark_bh */ ++ ++# include /* struct device, and other headers */ ++# include /* eth_type_trans */ ++# include /* struct iphdr */ ++# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++# include ++# endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ ++ ++#else /* __KERNEL__ */ ++ ++# include ++# include ++# include ++#endif ++ ++#include ++#include ++#include ++ ++unsigned int extensions_bitmaps[2/*in/out*/][2/*perm/req*/][SADB_EXTENSIONS_MAX] = { ++ ++/* INBOUND EXTENSIONS */ ++{ ++ ++/* PERMITTED IN */ ++{ ++/* SADB_RESERVED */ ++0 ++, ++/* SADB_GETSPI */ ++1< openswan.h ++ * ++ * Revision 1.19 2003/12/22 21:38:13 mcr ++ * removed extraenous #endif. ++ * ++ * Revision 1.18 2003/12/22 19:34:41 mcr ++ * added 0.6c NAT-T patch. ++ * ++ * Revision 1.17 2003/12/10 01:20:19 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.16 2003/10/31 02:27:12 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.15.30.1 2003/09/21 13:59:44 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.15 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.14 2002/04/24 07:36:40 mcr ++ * Moved from ./lib/pfkey_v2_ext_bits.c,v ++ * ++ * Revision 1.13 2002/01/29 22:25:36 rgb ++ * Re-add ipsec_kversion.h to keep MALLOC happy. ++ * ++ * Revision 1.12 2002/01/29 01:59:10 mcr ++ * removal of kversions.h - sources that needed it now use ipsec_param.h. ++ * updating of IPv6 structures to match latest in6.h version. ++ * removed dead code from openswan.h that also duplicated kversions.h ++ * code. ++ * ++ * Revision 1.11 2001/10/18 04:45:24 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/openswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.10 2001/09/08 21:13:35 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * ++ * Revision 1.9 2001/06/14 19:35:16 rgb ++ * Update copyright date. ++ * ++ * Revision 1.8 2001/03/26 23:07:36 rgb ++ * Remove requirement for auth and enc key from UPDATE. ++ * ++ * Revision 1.7 2000/09/12 22:35:37 rgb ++ * Restructured to remove unused extensions from CLEARFLOW messages. ++ * ++ * Revision 1.6 2000/09/09 06:39:01 rgb ++ * Added comments for clarity. ++ * ++ * Revision 1.5 2000/06/02 22:54:14 rgb ++ * Added Gerhard Gessler's struct sockaddr_storage mods for IPv6 support. ++ * ++ * Revision 1.4 2000/01/21 06:27:56 rgb ++ * Added address cases for eroute flows. ++ * Added comments for each message type. ++ * Added klipsdebug switching capability. ++ * Fixed GRPSA bitfields. ++ * ++ * Revision 1.3 1999/12/01 22:20:27 rgb ++ * Remove requirement for a proxy address in an incoming getspi message. ++ * ++ * Revision 1.2 1999/11/27 11:57:06 rgb ++ * Consolidated the 4 1-d extension bitmap arrays into one 4-d array. ++ * Add CVS log entry to bottom of file. ++ * Cleaned out unused bits. ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/pfkey_v2_ext_process.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,971 @@ ++/* ++ * @(#) RFC2367 PF_KEYv2 Key management API message parser ++ * Copyright (C) 1998-2003 Richard Guy Briggs. ++ * Copyright (C) 2004 Michael Richardson ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: pfkey_v2_ext_process.c,v 1.20.2.4 2007-11-16 03:42:22 paul Exp $ ++ */ ++ ++/* ++ * Template from klips/net/ipsec/ipsec/ipsec_netlink.c. ++ */ ++ ++char pfkey_v2_ext_process_c_version[] = "$Id: pfkey_v2_ext_process.c,v 1.20.2.4 2007-11-16 03:42:22 paul Exp $"; ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++ ++#include ++ ++#include ++ ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++#ifdef NET_21 ++# include ++# define ip_chk_addr inet_addr_type ++# define IS_MYADDR RTN_LOCAL ++#endif ++ ++#include ++#ifdef NETLINK_SOCK ++# include ++#else ++# include ++#endif ++ ++#include /* get_random_bytes() */ ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipcomp.h" ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipsec_alg.h" ++ ++#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0) ++ ++int ++pfkey_sa_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ struct sadb_sa *pfkey_sa = (struct sadb_sa *)pfkey_ext; ++ int error = 0; ++ struct ipsec_sa* ipsp; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sa_process: .\n"); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sa_process: " ++ "extr or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_ext->sadb_ext_type) { ++ case SADB_EXT_SA: ++ ipsp = extr->ips; ++ break; ++ case SADB_X_EXT_SA2: ++ if(extr->ips2 == NULL) { ++ extr->ips2 = ipsec_sa_alloc(&error); /* pass error var by pointer */ ++ } ++ if(extr->ips2 == NULL) { ++ SENDERR(-error); ++ } ++ ipsp = extr->ips2; ++ break; ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sa_process: " ++ "invalid exttype=%d.\n", ++ pfkey_ext->sadb_ext_type); ++ SENDERR(EINVAL); ++ } ++ ++ ipsp->ips_said.spi = pfkey_sa->sadb_sa_spi; ++ ipsp->ips_replaywin = pfkey_sa->sadb_sa_replay; ++ ipsp->ips_state = pfkey_sa->sadb_sa_state; ++ ipsp->ips_flags = pfkey_sa->sadb_sa_flags; ++ ipsp->ips_replaywin_lastseq = ipsp->ips_replaywin_bitmap = 0; ++ ipsp->ips_ref_rel = pfkey_sa->sadb_x_sa_ref; ++ ++ switch(ipsp->ips_said.proto) { ++ case IPPROTO_AH: ++ ipsp->ips_authalg = pfkey_sa->sadb_sa_auth; ++ ipsp->ips_encalg = SADB_EALG_NONE; ++ break; ++ case IPPROTO_ESP: ++ ipsp->ips_authalg = pfkey_sa->sadb_sa_auth; ++ ipsp->ips_encalg = pfkey_sa->sadb_sa_encrypt; ++#ifdef CONFIG_KLIPS_ALG ++ ipsec_alg_sa_init(ipsp); ++#endif ++ break; ++ case IPPROTO_IPIP: ++ ipsp->ips_authalg = AH_NONE; ++ ipsp->ips_encalg = ESP_NONE; ++ break; ++#ifdef CONFIG_KLIPS_IPCOMP ++ case IPPROTO_COMP: ++ ipsp->ips_authalg = AH_NONE; ++ ipsp->ips_encalg = pfkey_sa->sadb_sa_encrypt; ++ break; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ case IPPROTO_INT: ++ ipsp->ips_authalg = AH_NONE; ++ ipsp->ips_encalg = ESP_NONE; ++ break; ++ case 0: ++ break; ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sa_process: " ++ "unknown proto=%d.\n", ++ ipsp->ips_said.proto); ++ SENDERR(EINVAL); ++ } ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_lifetime_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct sadb_lifetime *pfkey_lifetime = (struct sadb_lifetime *)pfkey_ext; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_lifetime_process: .\n"); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_lifetime_process: " ++ "extr or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_lifetime->sadb_lifetime_exttype) { ++ case SADB_EXT_LIFETIME_CURRENT: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_lifetime_process: " ++ "lifetime_current not supported yet.\n"); ++ SENDERR(EINVAL); ++ break; ++ case SADB_EXT_LIFETIME_HARD: ++ ipsec_lifetime_update_hard(&extr->ips->ips_life.ipl_allocations, ++ pfkey_lifetime->sadb_lifetime_allocations); ++ ++ ipsec_lifetime_update_hard(&extr->ips->ips_life.ipl_bytes, ++ pfkey_lifetime->sadb_lifetime_bytes); ++ ++ ipsec_lifetime_update_hard(&extr->ips->ips_life.ipl_addtime, ++ pfkey_lifetime->sadb_lifetime_addtime); ++ ++ ipsec_lifetime_update_hard(&extr->ips->ips_life.ipl_usetime, ++ pfkey_lifetime->sadb_lifetime_usetime); ++ ++ break; ++ ++ case SADB_EXT_LIFETIME_SOFT: ++ ipsec_lifetime_update_soft(&extr->ips->ips_life.ipl_allocations, ++ pfkey_lifetime->sadb_lifetime_allocations); ++ ++ ipsec_lifetime_update_soft(&extr->ips->ips_life.ipl_bytes, ++ pfkey_lifetime->sadb_lifetime_bytes); ++ ++ ipsec_lifetime_update_soft(&extr->ips->ips_life.ipl_addtime, ++ pfkey_lifetime->sadb_lifetime_addtime); ++ ++ ipsec_lifetime_update_soft(&extr->ips->ips_life.ipl_usetime, ++ pfkey_lifetime->sadb_lifetime_usetime); ++ ++ break; ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_lifetime_process: " ++ "invalid exttype=%d.\n", ++ pfkey_ext->sadb_ext_type); ++ SENDERR(EINVAL); ++ } ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_address_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ int saddr_len = 0; ++ char ipaddr_txt[ADDRTOA_BUF]; ++ unsigned char **sap; ++ unsigned short * portp = 0; ++ struct sadb_address *pfkey_address = (struct sadb_address *)pfkey_ext; ++ struct sockaddr* s = (struct sockaddr*)((char*)pfkey_address + sizeof(*pfkey_address)); ++ struct ipsec_sa* ipsp; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process:\n"); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "extr or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(s->sa_family) { ++ case AF_INET: ++ saddr_len = sizeof(struct sockaddr_in); ++ if (debug_pfkey) ++ addrtoa(((struct sockaddr_in*)s)->sin_addr, 0, ipaddr_txt, sizeof(ipaddr_txt)); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found address family=%d, AF_INET, %s.\n", ++ s->sa_family, ++ ipaddr_txt); ++ break; ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ case AF_INET6: ++ saddr_len = sizeof(struct sockaddr_in6); ++ break; ++#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "s->sa_family=%d not supported.\n", ++ s->sa_family); ++ SENDERR(EPFNOSUPPORT); ++ } ++ ++ switch(pfkey_address->sadb_address_exttype) { ++ case SADB_EXT_ADDRESS_SRC: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found src address.\n"); ++ sap = (unsigned char **)&(extr->ips->ips_addr_s); ++ extr->ips->ips_addr_s_size = saddr_len; ++ break; ++ case SADB_EXT_ADDRESS_DST: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found dst address.\n"); ++ sap = (unsigned char **)&(extr->ips->ips_addr_d); ++ extr->ips->ips_addr_d_size = saddr_len; ++ break; ++ case SADB_EXT_ADDRESS_PROXY: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found proxy address.\n"); ++ sap = (unsigned char **)&(extr->ips->ips_addr_p); ++ extr->ips->ips_addr_p_size = saddr_len; ++ break; ++ case SADB_X_EXT_ADDRESS_DST2: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found 2nd dst address.\n"); ++ if(extr->ips2 == NULL) { ++ extr->ips2 = ipsec_sa_alloc(&error); /* pass error var by pointer */ ++ } ++ if(extr->ips2 == NULL) { ++ SENDERR(-error); ++ } ++ sap = (unsigned char **)&(extr->ips2->ips_addr_d); ++ extr->ips2->ips_addr_d_size = saddr_len; ++ break; ++ case SADB_X_EXT_ADDRESS_SRC_FLOW: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found src flow address.\n"); ++ if(pfkey_alloc_eroute(&(extr->eroute)) == ENOMEM) { ++ SENDERR(ENOMEM); ++ } ++ sap = (unsigned char **)&(extr->eroute->er_eaddr.sen_ip_src); ++ portp = &(extr->eroute->er_eaddr.sen_sport); ++ break; ++ case SADB_X_EXT_ADDRESS_DST_FLOW: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found dst flow address.\n"); ++ if(pfkey_alloc_eroute(&(extr->eroute)) == ENOMEM) { ++ SENDERR(ENOMEM); ++ } ++ sap = (unsigned char **)&(extr->eroute->er_eaddr.sen_ip_dst); ++ portp = &(extr->eroute->er_eaddr.sen_dport); ++ break; ++ case SADB_X_EXT_ADDRESS_SRC_MASK: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found src mask address.\n"); ++ if(pfkey_alloc_eroute(&(extr->eroute)) == ENOMEM) { ++ SENDERR(ENOMEM); ++ } ++ sap = (unsigned char **)&(extr->eroute->er_emask.sen_ip_src); ++ portp = &(extr->eroute->er_emask.sen_sport); ++ break; ++ case SADB_X_EXT_ADDRESS_DST_MASK: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found dst mask address.\n"); ++ if(pfkey_alloc_eroute(&(extr->eroute)) == ENOMEM) { ++ SENDERR(ENOMEM); ++ } ++ sap = (unsigned char **)&(extr->eroute->er_emask.sen_ip_dst); ++ portp = &(extr->eroute->er_emask.sen_dport); ++ break; ++#ifdef NAT_TRAVERSAL ++ case SADB_X_EXT_NAT_T_OA: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "found NAT-OA address.\n"); ++ sap = (unsigned char **)&(extr->ips->ips_natt_oa); ++ extr->ips->ips_natt_oa_size = saddr_len; ++ break; ++#endif ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "unrecognised ext_type=%d.\n", ++ pfkey_address->sadb_address_exttype); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_address->sadb_address_exttype) { ++ case SADB_EXT_ADDRESS_SRC: ++ case SADB_EXT_ADDRESS_DST: ++ case SADB_EXT_ADDRESS_PROXY: ++ case SADB_X_EXT_ADDRESS_DST2: ++#ifdef NAT_TRAVERSAL ++ case SADB_X_EXT_NAT_T_OA: ++#endif ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "allocating %d bytes for saddr.\n", ++ saddr_len); ++ if(!(*sap = kmalloc(saddr_len, GFP_KERNEL))) { ++ SENDERR(ENOMEM); ++ } ++ memcpy(*sap, s, saddr_len); ++ break; ++ default: ++ if(s->sa_family != AF_INET) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "s->sa_family=%d not supported.\n", ++ s->sa_family); ++ SENDERR(EPFNOSUPPORT); ++ } ++ { ++ *(struct in_addr *)sap = ((struct sockaddr_in *)s)->sin_addr; ++ } ++ ++ if (portp != 0) ++ *portp = ((struct sockaddr_in*)s)->sin_port; ++#ifdef CONFIG_KLIPS_DEBUG ++ if(extr->eroute) { ++ char buf1[64], buf2[64]; ++ if (debug_pfkey) { ++ subnettoa(extr->eroute->er_eaddr.sen_ip_src, ++ extr->eroute->er_emask.sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(extr->eroute->er_eaddr.sen_ip_dst, ++ extr->eroute->er_emask.sen_ip_dst, 0, buf2, sizeof(buf2)); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_parse: " ++ "extr->eroute set to %s:%d->%s:%d\n", ++ buf1, ++ ntohs(extr->eroute->er_eaddr.sen_sport), ++ buf2, ++ ntohs(extr->eroute->er_eaddr.sen_dport)); ++ } ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ } ++ ++ ipsp = extr->ips; ++ switch(pfkey_address->sadb_address_exttype) { ++ case SADB_X_EXT_ADDRESS_DST2: ++ ipsp = extr->ips2; ++ case SADB_EXT_ADDRESS_DST: ++ if(s->sa_family == AF_INET) { ++ ipsp->ips_said.dst.u.v4.sin_addr.s_addr = ((struct sockaddr_in*)(ipsp->ips_addr_d))->sin_addr.s_addr; ++ ipsp->ips_said.dst.u.v4.sin_family = AF_INET; ++ if (debug_pfkey) ++ addrtoa(((struct sockaddr_in*)(ipsp->ips_addr_d))->sin_addr, ++ 0, ++ ipaddr_txt, ++ sizeof(ipaddr_txt)); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "ips_said.dst set to %s.\n", ++ ipaddr_txt); ++ } else { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: " ++ "uh, ips_said.dst doesn't do address family=%d yet, said will be invalid.\n", ++ s->sa_family); ++ } ++ default: ++ break; ++ } ++ ++ /* XXX check if port!=0 */ ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_address_process: successful.\n"); ++ errlab: ++ return error; ++} ++ ++int ++pfkey_key_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct sadb_key *pfkey_key = (struct sadb_key *)pfkey_ext; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_key_process: .\n"); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_key_process: " ++ "extr or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_key->sadb_key_exttype) { ++ case SADB_EXT_KEY_AUTH: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_key_process: " ++ "allocating %d bytes for authkey.\n", ++ DIVUP(pfkey_key->sadb_key_bits, 8)); ++ if(!(extr->ips->ips_key_a = kmalloc(DIVUP(pfkey_key->sadb_key_bits, 8), GFP_KERNEL))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_key_process: " ++ "memory allocation error.\n"); ++ SENDERR(ENOMEM); ++ } ++ extr->ips->ips_key_bits_a = pfkey_key->sadb_key_bits; ++ extr->ips->ips_key_a_size = DIVUP(pfkey_key->sadb_key_bits, 8); ++ memcpy(extr->ips->ips_key_a, ++ (char*)pfkey_key + sizeof(struct sadb_key), ++ extr->ips->ips_key_a_size); ++ break; ++ case SADB_EXT_KEY_ENCRYPT: /* Key(s) */ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_key_process: " ++ "allocating %d bytes for enckey.\n", ++ DIVUP(pfkey_key->sadb_key_bits, 8)); ++ if(!(extr->ips->ips_key_e = kmalloc(DIVUP(pfkey_key->sadb_key_bits, 8), GFP_KERNEL))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_key_process: " ++ "memory allocation error.\n"); ++ SENDERR(ENOMEM); ++ } ++ extr->ips->ips_key_bits_e = pfkey_key->sadb_key_bits; ++ extr->ips->ips_key_e_size = DIVUP(pfkey_key->sadb_key_bits, 8); ++ memcpy(extr->ips->ips_key_e, ++ (char*)pfkey_key + sizeof(struct sadb_key), ++ extr->ips->ips_key_e_size); ++ break; ++ default: ++ SENDERR(EINVAL); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_key_process: " ++ "success.\n"); ++errlab: ++ return error; ++} ++ ++int ++pfkey_ident_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct sadb_ident *pfkey_ident = (struct sadb_ident *)pfkey_ext; ++ int data_len; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_ident_process: .\n"); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_ident_process: " ++ "extr or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_ident->sadb_ident_exttype) { ++ case SADB_EXT_IDENTITY_SRC: ++ data_len = pfkey_ident->sadb_ident_len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident); ++ ++ extr->ips->ips_ident_s.type = pfkey_ident->sadb_ident_type; ++ extr->ips->ips_ident_s.id = pfkey_ident->sadb_ident_id; ++ extr->ips->ips_ident_s.len = pfkey_ident->sadb_ident_len; ++ if(data_len) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_ident_process: " ++ "allocating %d bytes for ident_s.\n", ++ data_len); ++ if(!(extr->ips->ips_ident_s.data ++ = kmalloc(data_len, GFP_KERNEL))) { ++ SENDERR(ENOMEM); ++ } ++ memcpy(extr->ips->ips_ident_s.data, ++ (char*)pfkey_ident + sizeof(struct sadb_ident), ++ data_len); ++ } else { ++ extr->ips->ips_ident_s.data = NULL; ++ } ++ break; ++ case SADB_EXT_IDENTITY_DST: /* Identity(ies) */ ++ data_len = pfkey_ident->sadb_ident_len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident); ++ ++ extr->ips->ips_ident_d.type = pfkey_ident->sadb_ident_type; ++ extr->ips->ips_ident_d.id = pfkey_ident->sadb_ident_id; ++ extr->ips->ips_ident_d.len = pfkey_ident->sadb_ident_len; ++ if(data_len) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_ident_process: " ++ "allocating %d bytes for ident_d.\n", ++ data_len); ++ if(!(extr->ips->ips_ident_d.data ++ = kmalloc(data_len, GFP_KERNEL))) { ++ SENDERR(ENOMEM); ++ } ++ memcpy(extr->ips->ips_ident_d.data, ++ (char*)pfkey_ident + sizeof(struct sadb_ident), ++ data_len); ++ } else { ++ extr->ips->ips_ident_d.data = NULL; ++ } ++ break; ++ default: ++ SENDERR(EINVAL); ++ } ++errlab: ++ return error; ++} ++ ++int ++pfkey_sens_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_sens_process: " ++ "Sorry, I can't process exttype=%d yet.\n", ++ pfkey_ext->sadb_ext_type); ++ SENDERR(EINVAL); /* don't process these yet */ ++ errlab: ++ return error; ++} ++ ++int ++pfkey_prop_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_prop_process: " ++ "Sorry, I can't process exttype=%d yet.\n", ++ pfkey_ext->sadb_ext_type); ++ SENDERR(EINVAL); /* don't process these yet */ ++ ++ errlab: ++ return error; ++} ++ ++int ++pfkey_supported_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_supported_process: " ++ "Sorry, I can't process exttype=%d yet.\n", ++ pfkey_ext->sadb_ext_type); ++ SENDERR(EINVAL); /* don't process these yet */ ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_spirange_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_spirange_process: .\n"); ++/* errlab: */ ++ return error; ++} ++ ++int ++pfkey_x_kmprivate_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_kmprivate_process: " ++ "Sorry, I can't process exttype=%d yet.\n", ++ pfkey_ext->sadb_ext_type); ++ SENDERR(EINVAL); /* don't process these yet */ ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_x_satype_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct sadb_x_satype *pfkey_x_satype = (struct sadb_x_satype *)pfkey_ext; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_satype_process: .\n"); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_satype_process: " ++ "extr or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(extr->ips2 == NULL) { ++ extr->ips2 = ipsec_sa_alloc(&error); /* pass error var by pointer */ ++ } ++ if(extr->ips2 == NULL) { ++ SENDERR(-error); ++ } ++ if(!(extr->ips2->ips_said.proto = satype2proto(pfkey_x_satype->sadb_x_satype_satype))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_satype_process: " ++ "proto lookup from satype=%d failed.\n", ++ pfkey_x_satype->sadb_x_satype_satype); ++ SENDERR(EINVAL); ++ } ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_satype_process: " ++ "protocol==%d decoded from satype==%d(%s).\n", ++ extr->ips2->ips_said.proto, ++ pfkey_x_satype->sadb_x_satype_satype, ++ satype2name(pfkey_x_satype->sadb_x_satype_satype)); ++ ++errlab: ++ return error; ++} ++ ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++int ++pfkey_x_nat_t_type_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct sadb_x_nat_t_type *pfkey_x_nat_t_type = (struct sadb_x_nat_t_type *)pfkey_ext; ++ ++ if(!pfkey_x_nat_t_type) { ++ printk("klips_debug:pfkey_x_nat_t_type_process: " ++ "null pointer passed in\n"); ++ SENDERR(EINVAL); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_nat_t_type_process: %d.\n", ++ pfkey_x_nat_t_type->sadb_x_nat_t_type_type); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_nat_t_type_process: " ++ "extr or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_x_nat_t_type->sadb_x_nat_t_type_type) { ++ case ESPINUDP_WITH_NON_IKE: /* with Non-IKE (older version) */ ++ case ESPINUDP_WITH_NON_ESP: /* with Non-ESP */ ++ ++ extr->ips->ips_natt_type = pfkey_x_nat_t_type->sadb_x_nat_t_type_type; ++ break; ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_nat_t_type_process: " ++ "unknown type %d.\n", ++ pfkey_x_nat_t_type->sadb_x_nat_t_type_type); ++ SENDERR(EINVAL); ++ break; ++ } ++ ++errlab: ++ return error; ++} ++ ++int ++pfkey_x_nat_t_port_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct sadb_x_nat_t_port *pfkey_x_nat_t_port = (struct sadb_x_nat_t_port *)pfkey_ext; ++ ++ if(!pfkey_x_nat_t_port) { ++ printk("klips_debug:pfkey_x_nat_t_port_process: " ++ "null pointer passed in\n"); ++ SENDERR(EINVAL); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_nat_t_port_process: %d/%d.\n", ++ pfkey_x_nat_t_port->sadb_x_nat_t_port_exttype, ++ pfkey_x_nat_t_port->sadb_x_nat_t_port_port); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_nat_t_type_process: " ++ "extr or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_x_nat_t_port->sadb_x_nat_t_port_exttype) { ++ case SADB_X_EXT_NAT_T_SPORT: ++ extr->ips->ips_natt_sport = pfkey_x_nat_t_port->sadb_x_nat_t_port_port; ++ break; ++ case SADB_X_EXT_NAT_T_DPORT: ++ extr->ips->ips_natt_dport = pfkey_x_nat_t_port->sadb_x_nat_t_port_port; ++ break; ++ default: ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_nat_t_port_process: " ++ "unknown exttype %d.\n", ++ pfkey_x_nat_t_port->sadb_x_nat_t_port_exttype); ++ SENDERR(EINVAL); ++ break; ++ } ++ ++errlab: ++ return error; ++} ++#endif ++ ++int ++pfkey_x_debug_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct sadb_x_debug *pfkey_x_debug = (struct sadb_x_debug *)pfkey_ext; ++ ++ if(!pfkey_x_debug) { ++ printk("klips_debug:pfkey_x_debug_process: " ++ "null pointer passed in\n"); ++ SENDERR(EINVAL); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_debug_process: .\n"); ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if(pfkey_x_debug->sadb_x_debug_netlink >> ++ (sizeof(pfkey_x_debug->sadb_x_debug_netlink) * 8 - 1)) { ++ pfkey_x_debug->sadb_x_debug_netlink &= ++ ~(1 << (sizeof(pfkey_x_debug->sadb_x_debug_netlink) * 8 -1)); ++ debug_tunnel |= pfkey_x_debug->sadb_x_debug_tunnel; ++ debug_netlink |= pfkey_x_debug->sadb_x_debug_netlink; ++ debug_xform |= pfkey_x_debug->sadb_x_debug_xform; ++ debug_eroute |= pfkey_x_debug->sadb_x_debug_eroute; ++ debug_spi |= pfkey_x_debug->sadb_x_debug_spi; ++ debug_radij |= pfkey_x_debug->sadb_x_debug_radij; ++ debug_esp |= pfkey_x_debug->sadb_x_debug_esp; ++ debug_ah |= pfkey_x_debug->sadb_x_debug_ah; ++ debug_rcv |= pfkey_x_debug->sadb_x_debug_rcv; ++ debug_pfkey |= pfkey_x_debug->sadb_x_debug_pfkey; ++#ifdef CONFIG_KLIPS_IPCOMP ++ sysctl_ipsec_debug_ipcomp |= pfkey_x_debug->sadb_x_debug_ipcomp; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ sysctl_ipsec_debug_verbose |= pfkey_x_debug->sadb_x_debug_verbose; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_debug_process: " ++ "set\n"); ++ } else { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_debug_process: " ++ "unset\n"); ++ debug_tunnel &= pfkey_x_debug->sadb_x_debug_tunnel; ++ debug_netlink &= pfkey_x_debug->sadb_x_debug_netlink; ++ debug_xform &= pfkey_x_debug->sadb_x_debug_xform; ++ debug_eroute &= pfkey_x_debug->sadb_x_debug_eroute; ++ debug_spi &= pfkey_x_debug->sadb_x_debug_spi; ++ debug_radij &= pfkey_x_debug->sadb_x_debug_radij; ++ debug_esp &= pfkey_x_debug->sadb_x_debug_esp; ++ debug_ah &= pfkey_x_debug->sadb_x_debug_ah; ++ debug_rcv &= pfkey_x_debug->sadb_x_debug_rcv; ++ debug_pfkey &= pfkey_x_debug->sadb_x_debug_pfkey; ++#ifdef CONFIG_KLIPS_IPCOMP ++ sysctl_ipsec_debug_ipcomp &= pfkey_x_debug->sadb_x_debug_ipcomp; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ sysctl_ipsec_debug_verbose &= pfkey_x_debug->sadb_x_debug_verbose; ++ } ++#else /* CONFIG_KLIPS_DEBUG */ ++ printk("klips_debug:pfkey_x_debug_process: " ++ "debugging not enabled\n"); ++ SENDERR(EINVAL); ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++errlab: ++ return error; ++} ++ ++/* ++ * $Log: pfkey_v2_ext_process.c,v $ ++ * Revision 1.20.2.4 2007-11-16 03:42:22 paul ++ * Fix for 64bit big endian machines where a cast for struct in_addr was ++ * wrong. This resulted in KLIPS dropping all NAT-T packets with the ++ * error: ++ * ++ * klips_debug:ipsec_xmit_SAlookup: checking for local udp/500 IKE packet ++ * saddr=a010c92, er=0p0000000000000000, daddr=a010f17, er_dst=0, proto=1 ++ * sport=0 dport=0 ++ * klips_debug:ipsec_xmit_encap_bundle: shunt SA of DROP or no eroute: ++ * dropping. ++ * ++ * Patch by [dhr] ++ * ++ * Revision 1.20.2.3 2007/09/05 02:56:10 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.20.2.2 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.20.2.1 2006/04/20 16:33:07 mcr ++ * remove all of CONFIG_KLIPS_ALG --- one can no longer build without it. ++ * Fix in-kernel module compilation. Sub-makefiles do not work. ++ * ++ * Revision 1.20 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.19 2004/12/04 07:14:18 mcr ++ * resolution to gcc3-ism was wrong. fixed to assign correct ++ * variable. ++ * ++ * Revision 1.18 2004/12/03 21:25:57 mcr ++ * compile time fixes for running on 2.6. ++ * still experimental. ++ * ++ * Revision 1.17 2004/08/21 00:45:04 mcr ++ * CONFIG_KLIPS_NAT was wrong, also need to include udp.h. ++ * ++ * Revision 1.16 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.15 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.14 2004/02/03 03:13:59 mcr ++ * no longer #ifdef out NON_ESP mode. That was a mistake. ++ * ++ * Revision 1.13 2003/12/15 18:13:12 mcr ++ * when compiling with NAT traversal, don't assume that the ++ * kernel has been patched, unless CONFIG_IPSEC_NAT_NON_ESP ++ * is set. ++ * ++ * Revision 1.12.2.1 2003/12/22 15:25:52 jjo ++ * Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.12 2003/12/10 01:14:27 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.11 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.10.4.2 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.10.4.1 2003/09/21 13:59:56 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.10 2003/02/06 01:51:41 rgb ++ * Removed no longer relevant comment ++ * ++ * Revision 1.9 2003/01/30 02:32:44 rgb ++ * ++ * Transmit error code through to caller from callee for better diagnosis of problems. ++ * ++ * Revision 1.8 2002/12/13 22:42:22 mcr ++ * restored sa_ref code ++ * ++ * Revision 1.7 2002/12/13 22:40:48 mcr ++ * temporarily removed sadb_x_sa_ref reference for 2.xx ++ * ++ * Revision 1.6 2002/10/05 05:02:58 dhr ++ * ++ * C labels go on statements ++ * ++ * Revision 1.5 2002/09/20 15:41:08 rgb ++ * Switch from pfkey_alloc_ipsec_sa() to ipsec_sa_alloc(). ++ * Added sadb_x_sa_ref to struct sadb_sa. ++ * ++ * Revision 1.4 2002/09/20 05:02:02 rgb ++ * Added memory allocation debugging. ++ * ++ * Revision 1.3 2002/07/24 18:44:54 rgb ++ * Type fiddling to tame ia64 compiler. ++ * ++ * Revision 1.2 2002/05/27 18:55:03 rgb ++ * Remove final vistiges of tdb references via IPSEC_KLIPS1_COMPAT. ++ * ++ * Revision 1.1 2002/05/14 02:33:51 rgb ++ * Moved all the extension processing functions to pfkey_v2_ext_process.c. ++ * ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/pfkey_v2_parse.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1846 @@ ++/* ++ * RFC2367 PF_KEYv2 Key management API message parser ++ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: pfkey_v2_parse.c,v 1.65 2005-04-06 17:46:05 mcr Exp $ ++ */ ++ ++/* ++ * Template from klips/net/ipsec/ipsec/ipsec_parser.c. ++ */ ++ ++char pfkey_v2_parse_c_version[] = "$Id: pfkey_v2_parse.c,v 1.65 2005-04-06 17:46:05 mcr Exp $"; ++ ++/* ++ * Some ugly stuff to allow consistent debugging code for use in the ++ * kernel and in user space ++*/ ++ ++#ifdef __KERNEL__ ++ ++# include /* for printk */ ++ ++#include "openswan/ipsec_kversion.h" /* for malloc switch */ ++ ++# ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++# else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++# endif /* MALLOC_SLAB */ ++# include /* error codes */ ++# include /* size_t */ ++# include /* mark_bh */ ++ ++# include /* struct device, and other headers */ ++# include /* eth_type_trans */ ++# include /* struct iphdr */ ++# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++# include /* struct ipv6hdr */ ++# endif /* if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ ++extern int debug_pfkey; ++ ++# include ++ ++#include "openswan/ipsec_encap.h" ++ ++#else /* __KERNEL__ */ ++ ++# include ++# include ++# include ++ ++# include ++# include "constants.h" ++# include "programs/pluto/defs.h" /* for PRINTF_LIKE */ ++ ++#endif /* __KERNEL__ */ ++ ++ ++#include ++#include ++ ++#include "openswan/ipsec_sa.h" /* IPSEC_SAREF_NULL, IPSEC_SA_REF_TABLE_IDX_WIDTH */ ++ ++/* ++ * how to handle debugging for pfkey. ++ */ ++#include ++ ++unsigned int pfkey_lib_debug = PF_KEY_DEBUG_PARSE_NONE; ++void (*pfkey_debug_func)(const char *message, ...) PRINTF_LIKE(1); ++void (*pfkey_error_func)(const char *message, ...) PRINTF_LIKE(1); ++ ++ ++#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0) ++ ++struct satype_tbl { ++ uint8_t proto; ++ uint8_t satype; ++ char* name; ++} static satype_tbl[] = { ++#ifdef __KERNEL__ ++ { IPPROTO_ESP, SADB_SATYPE_ESP, "ESP" }, ++ { IPPROTO_AH, SADB_SATYPE_AH, "AH" }, ++ { IPPROTO_IPIP, SADB_X_SATYPE_IPIP, "IPIP" }, ++#ifdef CONFIG_KLIPS_IPCOMP ++ { IPPROTO_COMP, SADB_X_SATYPE_COMP, "COMP" }, ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ { IPPROTO_INT, SADB_X_SATYPE_INT, "INT" }, ++#else /* __KERNEL__ */ ++ { SA_ESP, SADB_SATYPE_ESP, "ESP" }, ++ { SA_AH, SADB_SATYPE_AH, "AH" }, ++ { SA_IPIP, SADB_X_SATYPE_IPIP, "IPIP" }, ++ { SA_COMP, SADB_X_SATYPE_COMP, "COMP" }, ++ { SA_INT, SADB_X_SATYPE_INT, "INT" }, ++#endif /* __KERNEL__ */ ++ { 0, 0, "UNKNOWN" } ++}; ++ ++uint8_t ++satype2proto(uint8_t satype) ++{ ++ int i =0; ++ ++ while(satype_tbl[i].satype != satype && satype_tbl[i].satype != 0) { ++ i++; ++ } ++ return satype_tbl[i].proto; ++} ++ ++uint8_t ++proto2satype(uint8_t proto) ++{ ++ int i = 0; ++ ++ while(satype_tbl[i].proto != proto && satype_tbl[i].proto != 0) { ++ i++; ++ } ++ return satype_tbl[i].satype; ++} ++ ++char* ++satype2name(uint8_t satype) ++{ ++ int i = 0; ++ ++ while(satype_tbl[i].satype != satype && satype_tbl[i].satype != 0) { ++ i++; ++ } ++ return satype_tbl[i].name; ++} ++ ++char* ++proto2name(uint8_t proto) ++{ ++ int i = 0; ++ ++ while(satype_tbl[i].proto != proto && satype_tbl[i].proto != 0) { ++ i++; ++ } ++ return satype_tbl[i].name; ++} ++ ++/* Default extension parsers taken from the KLIPS code */ ++ ++DEBUG_NO_STATIC int ++pfkey_sa_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_sa *pfkey_sa = (struct sadb_sa *)pfkey_ext; ++#if 0 ++ struct sadb_sa sav2; ++#endif ++ ++ /* sanity checks... */ ++ if(!pfkey_sa) { ++ ERROR("pfkey_sa_parse: " ++ "NULL pointer passed in.\n"); ++ SENDERR(EINVAL); ++ } ++ ++#if 0 ++ /* check if this structure is short, and if so, fix it up. ++ * XXX this is NOT the way to do things. ++ */ ++ if(pfkey_sa->sadb_sa_len == sizeof(struct sadb_sa_v1)/IPSEC_PFKEYv2_ALIGN) { ++ ++ /* yes, so clear out a temporary structure, and copy first */ ++ memset(&sav2, 0, sizeof(sav2)); ++ memcpy(&sav2, pfkey_sa, sizeof(struct sadb_sa_v1)); ++ sav2.sadb_x_sa_ref=-1; ++ sav2.sadb_sa_len = sizeof(struct sadb_sa) / IPSEC_PFKEYv2_ALIGN; ++ ++ pfkey_sa = &sav2; ++ } ++#endif ++ ++ ++ if(pfkey_sa->sadb_sa_len != sizeof(struct sadb_sa) / IPSEC_PFKEYv2_ALIGN) { ++ ERROR( ++ "pfkey_sa_parse: " ++ "length wrong pfkey_sa->sadb_sa_len=%d sizeof(struct sadb_sa)=%d.\n", ++ pfkey_sa->sadb_sa_len, ++ (int)sizeof(struct sadb_sa)); ++ SENDERR(EINVAL); ++ } ++ ++#if SADB_EALG_MAX < 255 ++ if(pfkey_sa->sadb_sa_encrypt > SADB_EALG_MAX) { ++ ERROR( ++ "pfkey_sa_parse: " ++ "pfkey_sa->sadb_sa_encrypt=%d > SADB_EALG_MAX=%d.\n", ++ pfkey_sa->sadb_sa_encrypt, ++ SADB_EALG_MAX); ++ SENDERR(EINVAL); ++ } ++#endif ++ ++#if SADB_AALG_MAX < 255 ++ if(pfkey_sa->sadb_sa_auth > SADB_AALG_MAX) { ++ ERROR( ++ "pfkey_sa_parse: " ++ "pfkey_sa->sadb_sa_auth=%d > SADB_AALG_MAX=%d.\n", ++ pfkey_sa->sadb_sa_auth, ++ SADB_AALG_MAX); ++ SENDERR(EINVAL); ++ } ++#endif ++ ++#if SADB_SASTATE_MAX < 255 ++ if(pfkey_sa->sadb_sa_state > SADB_SASTATE_MAX) { ++ ERROR( ++ "pfkey_sa_parse: " ++ "state=%d exceeds MAX=%d.\n", ++ pfkey_sa->sadb_sa_state, ++ SADB_SASTATE_MAX); ++ SENDERR(EINVAL); ++ } ++#endif ++ ++ if(pfkey_sa->sadb_sa_state == SADB_SASTATE_DEAD) { ++ ERROR( ++ "pfkey_sa_parse: " ++ "state=%d is DEAD=%d.\n", ++ pfkey_sa->sadb_sa_state, ++ SADB_SASTATE_DEAD); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_sa->sadb_sa_replay > 64) { ++ ERROR( ++ "pfkey_sa_parse: " ++ "replay window size: %d -- must be 0 <= size <= 64\n", ++ pfkey_sa->sadb_sa_replay); ++ SENDERR(EINVAL); ++ } ++ ++ if(! ((pfkey_sa->sadb_sa_exttype == SADB_EXT_SA) || ++ (pfkey_sa->sadb_sa_exttype == SADB_X_EXT_SA2))) ++ { ++ ERROR( ++ "pfkey_sa_parse: " ++ "unknown exttype=%d, expecting SADB_EXT_SA=%d or SADB_X_EXT_SA2=%d.\n", ++ pfkey_sa->sadb_sa_exttype, ++ SADB_EXT_SA, ++ SADB_X_EXT_SA2); ++ SENDERR(EINVAL); ++ } ++ ++ if((IPSEC_SAREF_NULL != pfkey_sa->sadb_x_sa_ref) && (pfkey_sa->sadb_x_sa_ref >= (1 << IPSEC_SA_REF_TABLE_IDX_WIDTH))) { ++ ERROR( ++ "pfkey_sa_parse: " ++ "SAref=%d must be (SAref == IPSEC_SAREF_NULL(%d) || SAref < IPSEC_SA_REF_TABLE_NUM_ENTRIES(%d)).\n", ++ pfkey_sa->sadb_x_sa_ref, ++ IPSEC_SAREF_NULL, ++ IPSEC_SA_REF_TABLE_NUM_ENTRIES); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_sa_parse: " ++ "successfully found len=%d exttype=%d(%s) spi=%08lx replay=%d state=%d auth=%d encrypt=%d flags=%d ref=%d.\n", ++ pfkey_sa->sadb_sa_len, ++ pfkey_sa->sadb_sa_exttype, ++ pfkey_v2_sadb_ext_string(pfkey_sa->sadb_sa_exttype), ++ (long unsigned int)ntohl(pfkey_sa->sadb_sa_spi), ++ pfkey_sa->sadb_sa_replay, ++ pfkey_sa->sadb_sa_state, ++ pfkey_sa->sadb_sa_auth, ++ pfkey_sa->sadb_sa_encrypt, ++ pfkey_sa->sadb_sa_flags, ++ pfkey_sa->sadb_x_sa_ref); ++ ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_lifetime_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_lifetime *pfkey_lifetime = (struct sadb_lifetime *)pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, ++ "pfkey_lifetime_parse:enter\n"); ++ /* sanity checks... */ ++ if(!pfkey_lifetime) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_lifetime_parse: " ++ "NULL pointer passed in.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_lifetime->sadb_lifetime_len != ++ sizeof(struct sadb_lifetime) / IPSEC_PFKEYv2_ALIGN) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_lifetime_parse: " ++ "length wrong pfkey_lifetime->sadb_lifetime_len=%d sizeof(struct sadb_lifetime)=%d.\n", ++ pfkey_lifetime->sadb_lifetime_len, ++ (int)sizeof(struct sadb_lifetime)); ++ SENDERR(EINVAL); ++ } ++ ++ if((pfkey_lifetime->sadb_lifetime_exttype != SADB_EXT_LIFETIME_HARD) && ++ (pfkey_lifetime->sadb_lifetime_exttype != SADB_EXT_LIFETIME_SOFT) && ++ (pfkey_lifetime->sadb_lifetime_exttype != SADB_EXT_LIFETIME_CURRENT)) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_lifetime_parse: " ++ "unexpected ext_type=%d.\n", ++ pfkey_lifetime->sadb_lifetime_exttype); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_lifetime_parse: " ++ "life_type=%d(%s) alloc=%u bytes=%u add=%u use=%u pkts=%u.\n", ++ pfkey_lifetime->sadb_lifetime_exttype, ++ pfkey_v2_sadb_ext_string(pfkey_lifetime->sadb_lifetime_exttype), ++ pfkey_lifetime->sadb_lifetime_allocations, ++ (unsigned)pfkey_lifetime->sadb_lifetime_bytes, ++ (unsigned)pfkey_lifetime->sadb_lifetime_addtime, ++ (unsigned)pfkey_lifetime->sadb_lifetime_usetime, ++ pfkey_lifetime->sadb_x_lifetime_packets); ++errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_address_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ int saddr_len = 0; ++ struct sadb_address *pfkey_address = (struct sadb_address *)pfkey_ext; ++ struct sockaddr* s = (struct sockaddr*)((char*)pfkey_address + sizeof(*pfkey_address)); ++ char ipaddr_txt[ADDRTOT_BUF]; ++ ++ /* sanity checks... */ ++ if(!pfkey_address) { ++ ERROR( ++ "pfkey_address_parse: " ++ "NULL pointer passed in.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_address->sadb_address_len < ++ (sizeof(struct sadb_address) + sizeof(struct sockaddr))/ ++ IPSEC_PFKEYv2_ALIGN) { ++ ERROR("pfkey_address_parse: " ++ "size wrong 1 ext_len=%d, adr_ext_len=%d, saddr_len=%d.\n", ++ pfkey_address->sadb_address_len, ++ (int)sizeof(struct sadb_address), ++ (int)sizeof(struct sockaddr)); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_address->sadb_address_reserved) { ++ ERROR("pfkey_address_parse: " ++ "res=%d, must be zero.\n", ++ pfkey_address->sadb_address_reserved); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_address->sadb_address_exttype) { ++ case SADB_EXT_ADDRESS_SRC: ++ case SADB_EXT_ADDRESS_DST: ++ case SADB_EXT_ADDRESS_PROXY: ++ case SADB_X_EXT_ADDRESS_DST2: ++ case SADB_X_EXT_ADDRESS_SRC_FLOW: ++ case SADB_X_EXT_ADDRESS_DST_FLOW: ++ case SADB_X_EXT_ADDRESS_SRC_MASK: ++ case SADB_X_EXT_ADDRESS_DST_MASK: ++#ifdef NAT_TRAVERSAL ++ case SADB_X_EXT_NAT_T_OA: ++#endif ++ break; ++ default: ++ ERROR( ++ "pfkey_address_parse: " ++ "unexpected ext_type=%d.\n", ++ pfkey_address->sadb_address_exttype); ++ SENDERR(ENOPKG); ++ } ++ ++ switch(s->sa_family) { ++ case AF_INET: ++ saddr_len = sizeof(struct sockaddr_in); ++ sprintf(ipaddr_txt, "%d.%d.%d.%d" ++ , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 0) & 0xFF ++ , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 8) & 0xFF ++ , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 16) & 0xFF ++ , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 24) & 0xFF); ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_address_parse: " ++ "found exttype=%u(%s) family=%d(AF_INET) address=%s proto=%u port=%u.\n", ++ pfkey_address->sadb_address_exttype, ++ pfkey_v2_sadb_ext_string(pfkey_address->sadb_address_exttype), ++ s->sa_family, ++ ipaddr_txt, ++ pfkey_address->sadb_address_proto, ++ ntohs(((struct sockaddr_in*)s)->sin_port)); ++ break; ++ case AF_INET6: ++ saddr_len = sizeof(struct sockaddr_in6); ++ sprintf(ipaddr_txt, "%x:%x:%x:%x:%x:%x:%x:%x" ++ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[0]) ++ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[1]) ++ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[2]) ++ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[3]) ++ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[4]) ++ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[5]) ++ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[6]) ++ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[7])); ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_address_parse: " ++ "found exttype=%u(%s) family=%d(AF_INET6) address=%s proto=%u port=%u.\n", ++ pfkey_address->sadb_address_exttype, ++ pfkey_v2_sadb_ext_string(pfkey_address->sadb_address_exttype), ++ s->sa_family, ++ ipaddr_txt, ++ pfkey_address->sadb_address_proto, ++ ((struct sockaddr_in6*)s)->sin6_port); ++ break; ++ default: ++ ERROR( ++ "pfkey_address_parse: " ++ "s->sa_family=%d not supported.\n", ++ s->sa_family); ++ SENDERR(EPFNOSUPPORT); ++ } ++ ++ if(pfkey_address->sadb_address_len != ++ DIVUP(sizeof(struct sadb_address) + saddr_len, IPSEC_PFKEYv2_ALIGN)) { ++ ERROR( ++ "pfkey_address_parse: " ++ "size wrong 2 ext_len=%d, adr_ext_len=%d, saddr_len=%d.\n", ++ pfkey_address->sadb_address_len, ++ (int)sizeof(struct sadb_address), ++ saddr_len); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_address->sadb_address_prefixlen != 0) { ++ ERROR( ++ "pfkey_address_parse: " ++ "address prefixes not supported yet.\n"); ++ SENDERR(EAFNOSUPPORT); /* not supported yet */ ++ } ++ ++ /* XXX check if port!=0 */ ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, ++ "pfkey_address_parse: successful.\n"); ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_key_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_key *pfkey_key = (struct sadb_key *)pfkey_ext; ++ ++ /* sanity checks... */ ++ ++ if(!pfkey_key) { ++ ERROR( ++ "pfkey_key_parse: " ++ "NULL pointer passed in.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_key->sadb_key_len < sizeof(struct sadb_key) / IPSEC_PFKEYv2_ALIGN) { ++ ERROR( ++ "pfkey_key_parse: " ++ "size wrong ext_len=%d, key_ext_len=%d.\n", ++ pfkey_key->sadb_key_len, ++ (int)sizeof(struct sadb_key)); ++ SENDERR(EINVAL); ++ } ++ ++ if(!pfkey_key->sadb_key_bits) { ++ ERROR( ++ "pfkey_key_parse: " ++ "key length set to zero, must be non-zero.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_key->sadb_key_len != ++ DIVUP(sizeof(struct sadb_key) * OCTETBITS + pfkey_key->sadb_key_bits, ++ PFKEYBITS)) { ++ ERROR( ++ "pfkey_key_parse: " ++ "key length=%d does not agree with extension length=%d.\n", ++ pfkey_key->sadb_key_bits, ++ pfkey_key->sadb_key_len); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_key->sadb_key_reserved) { ++ ERROR( ++ "pfkey_key_parse: " ++ "res=%d, must be zero.\n", ++ pfkey_key->sadb_key_reserved); ++ SENDERR(EINVAL); ++ } ++ ++ if(! ( (pfkey_key->sadb_key_exttype == SADB_EXT_KEY_AUTH) || ++ (pfkey_key->sadb_key_exttype == SADB_EXT_KEY_ENCRYPT))) { ++ ERROR( ++ "pfkey_key_parse: " ++ "expecting extension type AUTH or ENCRYPT, got %d.\n", ++ pfkey_key->sadb_key_exttype); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_key_parse: " ++ "success, found len=%d exttype=%d(%s) bits=%d reserved=%d.\n", ++ pfkey_key->sadb_key_len, ++ pfkey_key->sadb_key_exttype, ++ pfkey_v2_sadb_ext_string(pfkey_key->sadb_key_exttype), ++ pfkey_key->sadb_key_bits, ++ pfkey_key->sadb_key_reserved); ++ ++errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_ident_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_ident *pfkey_ident = (struct sadb_ident *)pfkey_ext; ++ ++ /* sanity checks... */ ++ if(pfkey_ident->sadb_ident_len < sizeof(struct sadb_ident) / IPSEC_PFKEYv2_ALIGN) { ++ ERROR( ++ "pfkey_ident_parse: " ++ "size wrong ext_len=%d, key_ext_len=%d.\n", ++ pfkey_ident->sadb_ident_len, ++ (int)sizeof(struct sadb_ident)); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_ident->sadb_ident_type > SADB_IDENTTYPE_MAX) { ++ ERROR( ++ "pfkey_ident_parse: " ++ "ident_type=%d out of range, must be less than %d.\n", ++ pfkey_ident->sadb_ident_type, ++ SADB_IDENTTYPE_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_ident->sadb_ident_reserved) { ++ ERROR( ++ "pfkey_ident_parse: " ++ "res=%d, must be zero.\n", ++ pfkey_ident->sadb_ident_reserved); ++ SENDERR(EINVAL); ++ } ++ ++ /* string terminator/padding must be zero */ ++ if(pfkey_ident->sadb_ident_len > sizeof(struct sadb_ident) / IPSEC_PFKEYv2_ALIGN) { ++ if(*((char*)pfkey_ident + pfkey_ident->sadb_ident_len * IPSEC_PFKEYv2_ALIGN - 1)) { ++ ERROR( ++ "pfkey_ident_parse: " ++ "string padding must be zero, last is 0x%02x.\n", ++ *((char*)pfkey_ident + ++ pfkey_ident->sadb_ident_len * IPSEC_PFKEYv2_ALIGN - 1)); ++ SENDERR(EINVAL); ++ } ++ } ++ ++ if( ! ((pfkey_ident->sadb_ident_exttype == SADB_EXT_IDENTITY_SRC) || ++ (pfkey_ident->sadb_ident_exttype == SADB_EXT_IDENTITY_DST))) { ++ ERROR( ++ "pfkey_key_parse: " ++ "expecting extension type IDENTITY_SRC or IDENTITY_DST, got %d.\n", ++ pfkey_ident->sadb_ident_exttype); ++ SENDERR(EINVAL); ++ } ++ ++errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_sens_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_sens *pfkey_sens = (struct sadb_sens *)pfkey_ext; ++ ++ /* sanity checks... */ ++ if(pfkey_sens->sadb_sens_len < sizeof(struct sadb_sens) / IPSEC_PFKEYv2_ALIGN) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_sens_parse: " ++ "size wrong ext_len=%d, key_ext_len=%d.\n", ++ pfkey_sens->sadb_sens_len, ++ (int)sizeof(struct sadb_sens)); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_sens_parse: " ++ "Sorry, I can't parse exttype=%d yet.\n", ++ pfkey_ext->sadb_ext_type); ++#if 0 ++ SENDERR(EINVAL); /* don't process these yet */ ++#endif ++ ++errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_prop_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ int i, num_comb; ++ struct sadb_prop *pfkey_prop = (struct sadb_prop *)pfkey_ext; ++ struct sadb_comb *pfkey_comb = (struct sadb_comb *)((char*)pfkey_ext + sizeof(struct sadb_prop)); ++ ++ /* sanity checks... */ ++ if((pfkey_prop->sadb_prop_len < sizeof(struct sadb_prop) / IPSEC_PFKEYv2_ALIGN) || ++ (((pfkey_prop->sadb_prop_len * IPSEC_PFKEYv2_ALIGN) - sizeof(struct sadb_prop)) % sizeof(struct sadb_comb))) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "size wrong ext_len=%d, prop_ext_len=%d comb_ext_len=%d.\n", ++ pfkey_prop->sadb_prop_len, ++ (int)sizeof(struct sadb_prop), ++ (int)sizeof(struct sadb_comb)); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_prop->sadb_prop_replay > 64) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "replay window size: %d -- must be 0 <= size <= 64\n", ++ pfkey_prop->sadb_prop_replay); ++ SENDERR(EINVAL); ++ } ++ ++ for(i=0; i<3; i++) { ++ if(pfkey_prop->sadb_prop_reserved[i]) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "res[%d]=%d, must be zero.\n", ++ i, pfkey_prop->sadb_prop_reserved[i]); ++ SENDERR(EINVAL); ++ } ++ } ++ ++ num_comb = ((pfkey_prop->sadb_prop_len * IPSEC_PFKEYv2_ALIGN) - sizeof(struct sadb_prop)) / sizeof(struct sadb_comb); ++ ++ for(i = 0; i < num_comb; i++) { ++ if(pfkey_comb->sadb_comb_auth > SADB_AALG_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_auth=%d > SADB_AALG_MAX=%d.\n", ++ i, ++ pfkey_comb->sadb_comb_auth, ++ SADB_AALG_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_comb->sadb_comb_auth) { ++ if(!pfkey_comb->sadb_comb_auth_minbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_auth_minbits=0, fatal.\n", ++ i); ++ SENDERR(EINVAL); ++ } ++ if(!pfkey_comb->sadb_comb_auth_maxbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_auth_maxbits=0, fatal.\n", ++ i); ++ SENDERR(EINVAL); ++ } ++ if(pfkey_comb->sadb_comb_auth_minbits > pfkey_comb->sadb_comb_auth_maxbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_auth_minbits=%d > maxbits=%d, fatal.\n", ++ i, ++ pfkey_comb->sadb_comb_auth_minbits, ++ pfkey_comb->sadb_comb_auth_maxbits); ++ SENDERR(EINVAL); ++ } ++ } else { ++ if(pfkey_comb->sadb_comb_auth_minbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_auth_minbits=%d != 0, fatal.\n", ++ i, ++ pfkey_comb->sadb_comb_auth_minbits); ++ SENDERR(EINVAL); ++ } ++ if(pfkey_comb->sadb_comb_auth_maxbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_auth_maxbits=%d != 0, fatal.\n", ++ i, ++ pfkey_comb->sadb_comb_auth_maxbits); ++ SENDERR(EINVAL); ++ } ++ } ++ ++#if SADB_EALG_MAX < 255 ++ if(pfkey_comb->sadb_comb_encrypt > SADB_EALG_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_comb_parse: " ++ "pfkey_comb[%d]->sadb_comb_encrypt=%d > SADB_EALG_MAX=%d.\n", ++ i, ++ pfkey_comb->sadb_comb_encrypt, ++ SADB_EALG_MAX); ++ SENDERR(EINVAL); ++ } ++#endif ++ ++ if(pfkey_comb->sadb_comb_encrypt) { ++ if(!pfkey_comb->sadb_comb_encrypt_minbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_encrypt_minbits=0, fatal.\n", ++ i); ++ SENDERR(EINVAL); ++ } ++ if(!pfkey_comb->sadb_comb_encrypt_maxbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_encrypt_maxbits=0, fatal.\n", ++ i); ++ SENDERR(EINVAL); ++ } ++ if(pfkey_comb->sadb_comb_encrypt_minbits > pfkey_comb->sadb_comb_encrypt_maxbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_encrypt_minbits=%d > maxbits=%d, fatal.\n", ++ i, ++ pfkey_comb->sadb_comb_encrypt_minbits, ++ pfkey_comb->sadb_comb_encrypt_maxbits); ++ SENDERR(EINVAL); ++ } ++ } else { ++ if(pfkey_comb->sadb_comb_encrypt_minbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_encrypt_minbits=%d != 0, fatal.\n", ++ i, ++ pfkey_comb->sadb_comb_encrypt_minbits); ++ SENDERR(EINVAL); ++ } ++ if(pfkey_comb->sadb_comb_encrypt_maxbits) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_encrypt_maxbits=%d != 0, fatal.\n", ++ i, ++ pfkey_comb->sadb_comb_encrypt_maxbits); ++ SENDERR(EINVAL); ++ } ++ } ++ ++ /* XXX do sanity check on flags */ ++ ++ if(pfkey_comb->sadb_comb_hard_allocations && pfkey_comb->sadb_comb_soft_allocations > pfkey_comb->sadb_comb_hard_allocations) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_soft_allocations=%d > hard_allocations=%d, fatal.\n", ++ i, ++ pfkey_comb->sadb_comb_soft_allocations, ++ pfkey_comb->sadb_comb_hard_allocations); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_comb->sadb_comb_hard_bytes && pfkey_comb->sadb_comb_soft_bytes > pfkey_comb->sadb_comb_hard_bytes) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_soft_bytes=%Ld > hard_bytes=%Ld, fatal.\n", ++ i, ++ (unsigned long long int)pfkey_comb->sadb_comb_soft_bytes, ++ (unsigned long long int)pfkey_comb->sadb_comb_hard_bytes); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_comb->sadb_comb_hard_addtime && pfkey_comb->sadb_comb_soft_addtime > pfkey_comb->sadb_comb_hard_addtime) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_soft_addtime=%Ld > hard_addtime=%Ld, fatal.\n", ++ i, ++ (unsigned long long int)pfkey_comb->sadb_comb_soft_addtime, ++ (unsigned long long int)pfkey_comb->sadb_comb_hard_addtime); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_comb->sadb_comb_hard_usetime && pfkey_comb->sadb_comb_soft_usetime > pfkey_comb->sadb_comb_hard_usetime) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_comb_soft_usetime=%Ld > hard_usetime=%Ld, fatal.\n", ++ i, ++ (unsigned long long int)pfkey_comb->sadb_comb_soft_usetime, ++ (unsigned long long int)pfkey_comb->sadb_comb_hard_usetime); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_comb->sadb_x_comb_hard_packets && pfkey_comb->sadb_x_comb_soft_packets > pfkey_comb->sadb_x_comb_hard_packets) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "pfkey_comb[%d]->sadb_x_comb_soft_packets=%d > hard_packets=%d, fatal.\n", ++ i, ++ pfkey_comb->sadb_x_comb_soft_packets, ++ pfkey_comb->sadb_x_comb_hard_packets); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_comb->sadb_comb_reserved) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_prop_parse: " ++ "comb[%d].res=%d, must be zero.\n", ++ i, ++ pfkey_comb->sadb_comb_reserved); ++ SENDERR(EINVAL); ++ } ++ pfkey_comb++; ++ } ++ ++errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_supported_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ unsigned int i, num_alg; ++ struct sadb_supported *pfkey_supported = (struct sadb_supported *)pfkey_ext; ++ struct sadb_alg *pfkey_alg = (struct sadb_alg*)((char*)pfkey_ext + sizeof(struct sadb_supported)); ++ ++ /* sanity checks... */ ++ if((pfkey_supported->sadb_supported_len < ++ sizeof(struct sadb_supported) / IPSEC_PFKEYv2_ALIGN) || ++ (((pfkey_supported->sadb_supported_len * IPSEC_PFKEYv2_ALIGN) - ++ sizeof(struct sadb_supported)) % sizeof(struct sadb_alg))) { ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_supported_parse: " ++ "size wrong ext_len=%d, supported_ext_len=%d alg_ext_len=%d.\n", ++ pfkey_supported->sadb_supported_len, ++ (int)sizeof(struct sadb_supported), ++ (int)sizeof(struct sadb_alg)); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_supported->sadb_supported_reserved) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_supported_parse: " ++ "res=%d, must be zero.\n", ++ pfkey_supported->sadb_supported_reserved); ++ SENDERR(EINVAL); ++ } ++ ++ num_alg = ((pfkey_supported->sadb_supported_len * IPSEC_PFKEYv2_ALIGN) - sizeof(struct sadb_supported)) / sizeof(struct sadb_alg); ++ ++ for(i = 0; i < num_alg; i++) { ++ /* process algo description */ ++ if(pfkey_alg->sadb_alg_reserved) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_supported_parse: " ++ "alg[%d], id=%d, ivlen=%d, minbits=%d, maxbits=%d, res=%d, must be zero.\n", ++ i, ++ pfkey_alg->sadb_alg_id, ++ pfkey_alg->sadb_alg_ivlen, ++ pfkey_alg->sadb_alg_minbits, ++ pfkey_alg->sadb_alg_maxbits, ++ pfkey_alg->sadb_alg_reserved); ++ SENDERR(EINVAL); ++ } ++ ++ /* XXX can alg_id auth/enc be determined from info given? ++ Yes, but OpenBSD's method does not iteroperate with rfc2367. ++ rgb, 2000-04-06 */ ++ ++ switch(pfkey_supported->sadb_supported_exttype) { ++ case SADB_EXT_SUPPORTED_AUTH: ++ if(pfkey_alg->sadb_alg_id > SADB_AALG_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_supported_parse: " ++ "alg[%d], alg_id=%d > SADB_AALG_MAX=%d, fatal.\n", ++ i, ++ pfkey_alg->sadb_alg_id, ++ SADB_AALG_MAX); ++ SENDERR(EINVAL); ++ } ++ break; ++ case SADB_EXT_SUPPORTED_ENCRYPT: ++#if SADB_EALG_MAX < 255 ++ if(pfkey_alg->sadb_alg_id > SADB_EALG_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_supported_parse: " ++ "alg[%d], alg_id=%d > SADB_EALG_MAX=%d, fatal.\n", ++ i, ++ pfkey_alg->sadb_alg_id, ++ SADB_EALG_MAX); ++ SENDERR(EINVAL); ++ } ++#endif ++ break; ++ default: ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_supported_parse: " ++ "alg[%d], alg_id=%d > SADB_EALG_MAX=%d, fatal.\n", ++ i, ++ pfkey_alg->sadb_alg_id, ++ SADB_EALG_MAX); ++ SENDERR(EINVAL); ++ } ++ pfkey_alg++; ++ } ++ ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_spirange_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_spirange *pfkey_spirange = (struct sadb_spirange *)pfkey_ext; ++ ++ /* sanity checks... */ ++ if(pfkey_spirange->sadb_spirange_len != ++ sizeof(struct sadb_spirange) / IPSEC_PFKEYv2_ALIGN) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_spirange_parse: " ++ "size wrong ext_len=%d, key_ext_len=%d.\n", ++ pfkey_spirange->sadb_spirange_len, ++ (int)sizeof(struct sadb_spirange)); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_spirange->sadb_spirange_reserved) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_spirange_parse: " ++ "reserved=%d must be set to zero.\n", ++ pfkey_spirange->sadb_spirange_reserved); ++ SENDERR(EINVAL); ++ } ++ ++ if(ntohl(pfkey_spirange->sadb_spirange_max) < ntohl(pfkey_spirange->sadb_spirange_min)) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_spirange_parse: " ++ "minspi=%08x must be < maxspi=%08x.\n", ++ ntohl(pfkey_spirange->sadb_spirange_min), ++ ntohl(pfkey_spirange->sadb_spirange_max)); ++ SENDERR(EINVAL); ++ } ++ ++ if(ntohl(pfkey_spirange->sadb_spirange_min) <= 255) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_spirange_parse: " ++ "minspi=%08x must be > 255.\n", ++ ntohl(pfkey_spirange->sadb_spirange_min)); ++ SENDERR(EEXIST); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_spirange_parse: " ++ "ext_len=%u ext_type=%u(%s) min=%u max=%u res=%u.\n", ++ pfkey_spirange->sadb_spirange_len, ++ pfkey_spirange->sadb_spirange_exttype, ++ pfkey_v2_sadb_ext_string(pfkey_spirange->sadb_spirange_exttype), ++ pfkey_spirange->sadb_spirange_min, ++ pfkey_spirange->sadb_spirange_max, ++ pfkey_spirange->sadb_spirange_reserved); ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_kmprivate_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_x_kmprivate *pfkey_x_kmprivate = (struct sadb_x_kmprivate *)pfkey_ext; ++ ++ /* sanity checks... */ ++ if(pfkey_x_kmprivate->sadb_x_kmprivate_len < ++ sizeof(struct sadb_x_kmprivate) / IPSEC_PFKEYv2_ALIGN) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_kmprivate_parse: " ++ "size wrong ext_len=%d, key_ext_len=%d.\n", ++ pfkey_x_kmprivate->sadb_x_kmprivate_len, ++ (int)sizeof(struct sadb_x_kmprivate)); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_x_kmprivate->sadb_x_kmprivate_reserved) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_kmprivate_parse: " ++ "reserved=%d must be set to zero.\n", ++ pfkey_x_kmprivate->sadb_x_kmprivate_reserved); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_kmprivate_parse: " ++ "Sorry, I can't parse exttype=%d yet.\n", ++ pfkey_ext->sadb_ext_type); ++ SENDERR(EINVAL); /* don't process these yet */ ++ ++errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_satype_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ int i; ++ struct sadb_x_satype *pfkey_x_satype = (struct sadb_x_satype *)pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, ++ "pfkey_x_satype_parse: enter\n"); ++ /* sanity checks... */ ++ if(pfkey_x_satype->sadb_x_satype_len != ++ sizeof(struct sadb_x_satype) / IPSEC_PFKEYv2_ALIGN) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_satype_parse: " ++ "size wrong ext_len=%d, key_ext_len=%d.\n", ++ pfkey_x_satype->sadb_x_satype_len, ++ (int)sizeof(struct sadb_x_satype)); ++ SENDERR(EINVAL); ++ } ++ ++ if(!pfkey_x_satype->sadb_x_satype_satype) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_satype_parse: " ++ "satype is zero, must be non-zero.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_x_satype->sadb_x_satype_satype > SADB_SATYPE_MAX) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_satype_parse: " ++ "satype %d > max %d, invalid.\n", ++ pfkey_x_satype->sadb_x_satype_satype, SADB_SATYPE_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ if(!(satype2proto(pfkey_x_satype->sadb_x_satype_satype))) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_satype_parse: " ++ "proto lookup from satype=%d failed.\n", ++ pfkey_x_satype->sadb_x_satype_satype); ++ SENDERR(EINVAL); ++ } ++ ++ for(i = 0; i < 3; i++) { ++ if(pfkey_x_satype->sadb_x_satype_reserved[i]) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_satype_parse: " ++ "reserved[%d]=%d must be set to zero.\n", ++ i, pfkey_x_satype->sadb_x_satype_reserved[i]); ++ SENDERR(EINVAL); ++ } ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_x_satype_parse: " ++ "len=%u ext=%u(%s) satype=%u(%s) res=%u,%u,%u.\n", ++ pfkey_x_satype->sadb_x_satype_len, ++ pfkey_x_satype->sadb_x_satype_exttype, ++ pfkey_v2_sadb_ext_string(pfkey_x_satype->sadb_x_satype_exttype), ++ pfkey_x_satype->sadb_x_satype_satype, ++ satype2name(pfkey_x_satype->sadb_x_satype_satype), ++ pfkey_x_satype->sadb_x_satype_reserved[0], ++ pfkey_x_satype->sadb_x_satype_reserved[1], ++ pfkey_x_satype->sadb_x_satype_reserved[2]); ++errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_ext_debug_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ int i; ++ struct sadb_x_debug *pfkey_x_debug = (struct sadb_x_debug *)pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, ++ "pfkey_x_debug_parse: enter\n"); ++ /* sanity checks... */ ++ if(pfkey_x_debug->sadb_x_debug_len != ++ sizeof(struct sadb_x_debug) / IPSEC_PFKEYv2_ALIGN) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_debug_parse: " ++ "size wrong ext_len=%d, key_ext_len=%d.\n", ++ pfkey_x_debug->sadb_x_debug_len, ++ (int)sizeof(struct sadb_x_debug)); ++ SENDERR(EINVAL); ++ } ++ ++ for(i = 0; i < 4; i++) { ++ if(pfkey_x_debug->sadb_x_debug_reserved[i]) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_debug_parse: " ++ "reserved[%d]=%d must be set to zero.\n", ++ i, pfkey_x_debug->sadb_x_debug_reserved[i]); ++ SENDERR(EINVAL); ++ } ++ } ++ ++errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_ext_protocol_parse(struct sadb_ext *pfkey_ext) ++{ ++ int error = 0; ++ struct sadb_protocol *p = (struct sadb_protocol *)pfkey_ext; ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_x_protocol_parse:\n"); ++ /* sanity checks... */ ++ ++ if (p->sadb_protocol_len != sizeof(*p)/IPSEC_PFKEYv2_ALIGN) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_x_protocol_parse: size wrong ext_len=%d, key_ext_len=%d.\n", ++ p->sadb_protocol_len, (int)sizeof(*p)); ++ SENDERR(EINVAL); ++ } ++ ++ if (p->sadb_protocol_reserved2 != 0) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_protocol_parse: res=%d, must be zero.\n", ++ p->sadb_protocol_reserved2); ++ SENDERR(EINVAL); ++ } ++ ++ errlab: ++ return error; ++} ++ ++#ifdef NAT_TRAVERSAL ++DEBUG_NO_STATIC int ++pfkey_x_ext_nat_t_type_parse(struct sadb_ext *pfkey_ext) ++{ ++ return 0; ++} ++DEBUG_NO_STATIC int ++pfkey_x_ext_nat_t_port_parse(struct sadb_ext *pfkey_ext) ++{ ++ return 0; ++} ++#endif ++ ++#define DEFINEPARSER(NAME) static struct pf_key_ext_parsers_def NAME##_def={NAME, #NAME}; ++ ++DEFINEPARSER(pfkey_sa_parse); ++DEFINEPARSER(pfkey_lifetime_parse); ++DEFINEPARSER(pfkey_address_parse); ++DEFINEPARSER(pfkey_key_parse); ++DEFINEPARSER(pfkey_ident_parse); ++DEFINEPARSER(pfkey_sens_parse); ++DEFINEPARSER(pfkey_prop_parse); ++DEFINEPARSER(pfkey_supported_parse); ++DEFINEPARSER(pfkey_spirange_parse); ++DEFINEPARSER(pfkey_x_kmprivate_parse); ++DEFINEPARSER(pfkey_x_satype_parse); ++DEFINEPARSER(pfkey_x_ext_debug_parse); ++DEFINEPARSER(pfkey_x_ext_protocol_parse); ++#ifdef NAT_TRAVERSAL ++DEFINEPARSER(pfkey_x_ext_nat_t_type_parse); ++DEFINEPARSER(pfkey_x_ext_nat_t_port_parse); ++#endif ++ ++struct pf_key_ext_parsers_def *ext_default_parsers[]= ++{ ++ NULL, /* pfkey_msg_parse, */ ++ &pfkey_sa_parse_def, ++ &pfkey_lifetime_parse_def, ++ &pfkey_lifetime_parse_def, ++ &pfkey_lifetime_parse_def, ++ &pfkey_address_parse_def, ++ &pfkey_address_parse_def, ++ &pfkey_address_parse_def, ++ &pfkey_key_parse_def, ++ &pfkey_key_parse_def, ++ &pfkey_ident_parse_def, ++ &pfkey_ident_parse_def, ++ &pfkey_sens_parse_def, ++ &pfkey_prop_parse_def, ++ &pfkey_supported_parse_def, ++ &pfkey_supported_parse_def, ++ &pfkey_spirange_parse_def, ++ &pfkey_x_kmprivate_parse_def, ++ &pfkey_x_satype_parse_def, ++ &pfkey_sa_parse_def, ++ &pfkey_address_parse_def, ++ &pfkey_address_parse_def, ++ &pfkey_address_parse_def, ++ &pfkey_address_parse_def, ++ &pfkey_address_parse_def, ++ &pfkey_x_ext_debug_parse_def, ++ &pfkey_x_ext_protocol_parse_def ++#ifdef NAT_TRAVERSAL ++ , ++ &pfkey_x_ext_nat_t_type_parse_def, ++ &pfkey_x_ext_nat_t_port_parse_def, ++ &pfkey_x_ext_nat_t_port_parse_def, ++ &pfkey_address_parse_def ++#endif ++}; ++ ++int ++pfkey_msg_parse(struct sadb_msg *pfkey_msg, ++ struct pf_key_ext_parsers_def *ext_parsers[], ++ struct sadb_ext *extensions[], ++ int dir) ++{ ++ int error = 0; ++ int remain; ++ struct sadb_ext *pfkey_ext; ++ int extensions_seen = 0; ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_msg_parse: " ++ "parsing message ver=%d, type=%d(%s), errno=%d, satype=%d(%s), len=%d, res=%d, seq=%d, pid=%d.\n", ++ pfkey_msg->sadb_msg_version, ++ pfkey_msg->sadb_msg_type, ++ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type), ++ pfkey_msg->sadb_msg_errno, ++ pfkey_msg->sadb_msg_satype, ++ satype2name(pfkey_msg->sadb_msg_satype), ++ pfkey_msg->sadb_msg_len, ++ pfkey_msg->sadb_msg_reserved, ++ pfkey_msg->sadb_msg_seq, ++ pfkey_msg->sadb_msg_pid); ++ ++ if(ext_parsers == NULL) ext_parsers = ext_default_parsers; ++ ++ pfkey_extensions_init(extensions); ++ ++ remain = pfkey_msg->sadb_msg_len; ++ remain -= sizeof(struct sadb_msg) / IPSEC_PFKEYv2_ALIGN; ++ ++ pfkey_ext = (struct sadb_ext*)((char*)pfkey_msg + ++ sizeof(struct sadb_msg)); ++ ++ extensions[0] = (struct sadb_ext *) pfkey_msg; ++ ++ ++ if(pfkey_msg->sadb_msg_version != PF_KEY_V2) { ++ ERROR("pfkey_msg_parse: " ++ "not PF_KEY_V2 msg, found %d, should be %d.\n", ++ pfkey_msg->sadb_msg_version, ++ PF_KEY_V2); ++ SENDERR(EINVAL); ++ } ++ ++ if(!pfkey_msg->sadb_msg_type) { ++ ERROR("pfkey_msg_parse: " ++ "msg type not set, must be non-zero..\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(pfkey_msg->sadb_msg_type > SADB_MAX) { ++ ERROR("pfkey_msg_parse: " ++ "msg type=%d > max=%d.\n", ++ pfkey_msg->sadb_msg_type, ++ SADB_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_msg->sadb_msg_type) { ++ case SADB_GETSPI: ++ case SADB_UPDATE: ++ case SADB_ADD: ++ case SADB_DELETE: ++ case SADB_GET: ++ case SADB_X_GRPSA: ++ case SADB_X_ADDFLOW: ++ if(!satype2proto(pfkey_msg->sadb_msg_satype)) { ++ ERROR("pfkey_msg_parse: " ++ "satype %d conversion to proto failed for msg_type %d (%s).\n", ++ pfkey_msg->sadb_msg_satype, ++ pfkey_msg->sadb_msg_type, ++ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type)); ++ SENDERR(EINVAL); ++ } else { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "satype %d(%s) conversion to proto gives %d for msg_type %d(%s).\n", ++ pfkey_msg->sadb_msg_satype, ++ satype2name(pfkey_msg->sadb_msg_satype), ++ satype2proto(pfkey_msg->sadb_msg_satype), ++ pfkey_msg->sadb_msg_type, ++ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type)); ++ } ++ case SADB_ACQUIRE: ++ case SADB_REGISTER: ++ case SADB_EXPIRE: ++ if(!pfkey_msg->sadb_msg_satype) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "satype is zero, must be non-zero for msg_type %d(%s).\n", ++ pfkey_msg->sadb_msg_type, ++ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type)); ++ SENDERR(EINVAL); ++ } ++ default: ++ break; ++ } ++ ++ /* errno must not be set in downward messages */ ++ /* this is not entirely true... a response to an ACQUIRE could return an error */ ++ if((dir == EXT_BITS_IN) && (pfkey_msg->sadb_msg_type != SADB_ACQUIRE) && pfkey_msg->sadb_msg_errno) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "errno set to %d.\n", ++ pfkey_msg->sadb_msg_errno); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, ++ "pfkey_msg_parse: " ++ "remain=%d\n", ++ remain ++ ); ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, ++ "pfkey_msg_parse: " ++ "extensions permitted=%08x, required=%08x.\n", ++ extensions_bitmaps[dir][EXT_BITS_PERM][pfkey_msg->sadb_msg_type], ++ extensions_bitmaps[dir][EXT_BITS_REQ][pfkey_msg->sadb_msg_type]); ++ ++ extensions_seen = 1; ++ ++ while( (remain * IPSEC_PFKEYv2_ALIGN) >= sizeof(struct sadb_ext) ) { ++ /* Is there enough message left to support another extension header? */ ++ if(remain < pfkey_ext->sadb_ext_len) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "remain %d less than ext len %d.\n", ++ remain, pfkey_ext->sadb_ext_len); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, ++ "pfkey_msg_parse: " ++ "parsing ext type=%d(%s) remain=%d.\n", ++ pfkey_ext->sadb_ext_type, ++ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type), ++ remain); ++ ++ /* Is the extension header type valid? */ ++ if((pfkey_ext->sadb_ext_type > SADB_EXT_MAX) || (!pfkey_ext->sadb_ext_type)) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "ext type %d(%s) invalid, SADB_EXT_MAX=%d.\n", ++ pfkey_ext->sadb_ext_type, ++ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type), ++ SADB_EXT_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ /* Have we already seen this type of extension? */ ++ if((extensions_seen & ( 1 << pfkey_ext->sadb_ext_type )) != 0) ++ { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "ext type %d(%s) already seen.\n", ++ pfkey_ext->sadb_ext_type, ++ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type)); ++ SENDERR(EINVAL); ++ } ++ ++ /* Do I even know about this type of extension? */ ++ if(ext_parsers[pfkey_ext->sadb_ext_type]==NULL) { ++ ERROR("pfkey_msg_parse: " ++ "ext type %d(%s) unknown, ignoring.\n", ++ pfkey_ext->sadb_ext_type, ++ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type)); ++ goto next_ext; ++ } ++ ++ /* Is this type of extension permitted for this type of message? */ ++ if(!(extensions_bitmaps[dir][EXT_BITS_PERM][pfkey_msg->sadb_msg_type] & ++ 1<sadb_ext_type)) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "ext type %d(%s) not permitted, exts_perm_in=%08x, 1<sadb_ext_type, ++ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type), ++ extensions_bitmaps[dir][EXT_BITS_PERM][pfkey_msg->sadb_msg_type], ++ 1<sadb_ext_type); ++ SENDERR(EINVAL); ++ } ++ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_msg_parse: " ++ "remain=%d ext_type=%d(%s) ext_len=%d parsing ext 0p%p with parser %s.\n", ++ remain, ++ pfkey_ext->sadb_ext_type, ++ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type), ++ pfkey_ext->sadb_ext_len, ++ pfkey_ext, ++ ext_parsers[pfkey_ext->sadb_ext_type]->parser_name); ++ ++ /* Parse the extension */ ++ if((error = ++ (*ext_parsers[pfkey_ext->sadb_ext_type]->parser)(pfkey_ext))) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "extension parsing for type %d(%s) failed with error %d.\n", ++ pfkey_ext->sadb_ext_type, ++ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type), ++ error); ++ SENDERR(-error); ++ } ++ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, ++ "pfkey_msg_parse: " ++ "Extension %d(%s) parsed.\n", ++ pfkey_ext->sadb_ext_type, ++ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type)); ++ ++ /* Mark that we have seen this extension and remember the header location */ ++ extensions_seen |= ( 1 << pfkey_ext->sadb_ext_type ); ++ extensions[pfkey_ext->sadb_ext_type] = pfkey_ext; ++ ++ next_ext: ++ /* Calculate how much message remains */ ++ remain -= pfkey_ext->sadb_ext_len; ++ ++ if(!remain) { ++ break; ++ } ++ /* Find the next extension header */ ++ pfkey_ext = (struct sadb_ext*)((char*)pfkey_ext + ++ pfkey_ext->sadb_ext_len * IPSEC_PFKEYv2_ALIGN); ++ } ++ ++ if(remain) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "unexpected remainder of %d.\n", ++ remain); ++ /* why is there still something remaining? */ ++ SENDERR(EINVAL); ++ } ++ ++ /* check required extensions */ ++ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, ++ "pfkey_msg_parse: " ++ "extensions permitted=%08x, seen=%08x, required=%08x.\n", ++ extensions_bitmaps[dir][EXT_BITS_PERM][pfkey_msg->sadb_msg_type], ++ extensions_seen, ++ extensions_bitmaps[dir][EXT_BITS_REQ][pfkey_msg->sadb_msg_type]); ++ ++ /* don't check further if it is an error return message since it ++ may not have a body */ ++ if(pfkey_msg->sadb_msg_errno) { ++ SENDERR(-error); ++ } ++ ++ if((extensions_seen & ++ extensions_bitmaps[dir][EXT_BITS_REQ][pfkey_msg->sadb_msg_type]) != ++ extensions_bitmaps[dir][EXT_BITS_REQ][pfkey_msg->sadb_msg_type]) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "required extensions missing:%08x.\n", ++ extensions_bitmaps[dir][EXT_BITS_REQ][pfkey_msg->sadb_msg_type] - ++ (extensions_seen & ++ extensions_bitmaps[dir][EXT_BITS_REQ][pfkey_msg->sadb_msg_type])); ++ SENDERR(EINVAL); ++ } ++ ++ if((dir == EXT_BITS_IN) && (pfkey_msg->sadb_msg_type == SADB_X_DELFLOW) ++ && ((extensions_seen & SADB_X_EXT_ADDRESS_DELFLOW) ++ != SADB_X_EXT_ADDRESS_DELFLOW) ++ && (((extensions_seen & (1<sadb_sa_flags ++ & SADB_X_SAFLAGS_CLEARFLOW) ++ != SADB_X_SAFLAGS_CLEARFLOW))) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "required SADB_X_DELFLOW extensions missing: either %08x must be present or %08x must be present with SADB_X_SAFLAGS_CLEARFLOW set.\n", ++ SADB_X_EXT_ADDRESS_DELFLOW ++ - (extensions_seen & SADB_X_EXT_ADDRESS_DELFLOW), ++ (1<sadb_msg_type) { ++ case SADB_ADD: ++ case SADB_UPDATE: ++ /* check maturity */ ++ if(((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_state != ++ SADB_SASTATE_MATURE) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "state=%d for add or update should be MATURE=%d.\n", ++ ((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_state, ++ SADB_SASTATE_MATURE); ++ SENDERR(EINVAL); ++ } ++ ++ /* check AH and ESP */ ++ switch(((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype) { ++ case SADB_SATYPE_AH: ++ if(!(((struct sadb_sa*)extensions[SADB_EXT_SA]) && ++ ((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_auth != ++ SADB_AALG_NONE)) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "auth alg is zero, must be non-zero for AH SAs.\n"); ++ SENDERR(EINVAL); ++ } ++ if(((struct sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_encrypt != ++ SADB_EALG_NONE) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "AH handed encalg=%d, must be zero.\n", ++ ((struct sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_encrypt); ++ SENDERR(EINVAL); ++ } ++ break; ++ case SADB_SATYPE_ESP: ++ if(!(((struct sadb_sa*)extensions[SADB_EXT_SA]) && ++ ((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_encrypt != ++ SADB_EALG_NONE)) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "encrypt alg=%d is zero, must be non-zero for ESP=%d SAs.\n", ++ ((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_encrypt, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype); ++ SENDERR(EINVAL); ++ } ++ if((((struct sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_encrypt == ++ SADB_EALG_NULL) && ++ (((struct sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_auth == ++ SADB_AALG_NONE) ) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "ESP handed encNULL+authNONE, illegal combination.\n"); ++ SENDERR(EINVAL); ++ } ++ break; ++ case SADB_X_SATYPE_COMP: ++ if(!(((struct sadb_sa*)extensions[SADB_EXT_SA]) && ++ ((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_encrypt != ++ SADB_EALG_NONE)) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "encrypt alg=%d is zero, must be non-zero for COMP=%d SAs.\n", ++ ((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_encrypt, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype); ++ SENDERR(EINVAL); ++ } ++ if(((struct sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_auth != ++ SADB_AALG_NONE) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "COMP handed auth=%d, must be zero.\n", ++ ((struct sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_auth); ++ SENDERR(EINVAL); ++ } ++ break; ++ default: ++ break; ++ } ++ if(ntohl(((struct sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_spi) <= 255) { ++ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, ++ "pfkey_msg_parse: " ++ "spi=%08x must be > 255.\n", ++ ntohl(((struct sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_spi)); ++ SENDERR(EINVAL); ++ } ++ default: ++ break; ++ } ++errlab: ++ ++ return error; ++} ++ ++/* ++ * $Log: pfkey_v2_parse.c,v $ ++ * Revision 1.65 2005-04-06 17:46:05 mcr ++ * failure to recognize an extension is considered an error. ++ * This could be a problem in the future, but we need some kind ++ * of logging. This should be rate limited, probably. ++ * ++ * Revision 1.64 2005/01/26 00:50:35 mcr ++ * adjustment of confusion of CONFIG_IPSEC_NAT vs CONFIG_KLIPS_NAT, ++ * and make sure that NAT_TRAVERSAL is set as well to match ++ * userspace compiles of code. ++ * ++ * Revision 1.63 2004/10/28 22:54:10 mcr ++ * results from valgrind, thanks to: Harald Hoyer ++ * ++ * Revision 1.62 2004/10/03 01:26:36 mcr ++ * fixes for gcc 3.4 compilation. ++ * ++ * Revision 1.61 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.59 2004/04/18 03:03:49 mcr ++ * renamed common include files from pluto directory. ++ * ++ * Revision 1.58 2004/03/08 01:59:08 ken ++ * freeswan.h -> openswan.h ++ * ++ * Revision 1.57 2003/12/10 01:20:19 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.56 2003/12/04 23:01:12 mcr ++ * removed ipsec_netlink.h ++ * ++ * Revision 1.55 2003/11/07 01:30:37 ken ++ * Cast sizeof() to int to keep things 64bit clean ++ * ++ * Revision 1.54 2003/10/31 02:27:12 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.53.20.2 2003/10/29 01:11:32 mcr ++ * added debugging for pfkey library. ++ * ++ * Revision 1.53.20.1 2003/09/21 13:59:44 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.53 2003/01/30 02:32:09 rgb ++ * ++ * Rename SAref table macro names for clarity. ++ * Convert IPsecSAref_t from signed to unsigned to fix apparent SAref exhaustion bug. ++ * ++ * Revision 1.52 2002/12/30 06:53:07 mcr ++ * deal with short SA structures... #if 0 out for now. Probably ++ * not quite the right way. ++ * ++ * Revision 1.51 2002/12/13 18:16:02 mcr ++ * restored sa_ref code ++ * ++ * Revision 1.50 2002/12/13 18:06:52 mcr ++ * temporarily removed sadb_x_sa_ref reference for 2.xx ++ * ++ * Revision 1.49 2002/10/05 05:02:58 dhr ++ * ++ * C labels go on statements ++ * ++ * Revision 1.48 2002/09/20 15:40:45 rgb ++ * Added sadb_x_sa_ref to struct sadb_sa. ++ * ++ * Revision 1.47 2002/09/20 05:01:31 rgb ++ * Fixed usage of pfkey_lib_debug. ++ * Format for function declaration style consistency. ++ * Added text labels to elucidate numeric values presented. ++ * Re-organised debug output to reduce noise in output. ++ * ++ * Revision 1.46 2002/07/24 18:44:54 rgb ++ * Type fiddling to tame ia64 compiler. ++ * ++ * Revision 1.45 2002/05/23 07:14:11 rgb ++ * Cleaned up %p variants to 0p%p for test suite cleanup. ++ * ++ * Revision 1.44 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.43 2002/04/24 07:36:40 mcr ++ * Moved from ./lib/pfkey_v2_parse.c,v ++ * ++ * Revision 1.42 2002/01/29 22:25:36 rgb ++ * Re-add ipsec_kversion.h to keep MALLOC happy. ++ * ++ * Revision 1.41 2002/01/29 01:59:10 mcr ++ * removal of kversions.h - sources that needed it now use ipsec_param.h. ++ * updating of IPv6 structures to match latest in6.h version. ++ * removed dead code from openswan.h that also duplicated kversions.h ++ * code. ++ * ++ * Revision 1.40 2002/01/20 20:34:50 mcr ++ * added pfkey_v2_sadb_type_string to decode sadb_type to string. ++ * ++ * Revision 1.39 2001/11/27 05:29:22 mcr ++ * pfkey parses are now maintained by a structure ++ * that includes their name for debug purposes. ++ * DEBUGGING() macro changed so that it takes a debug ++ * level so that pf_key() can use this to decode the ++ * structures without innundanting humans. ++ * Also uses pfkey_v2_sadb_ext_string() in messages. ++ * ++ * Revision 1.38 2001/11/06 19:47:47 rgb ++ * Added packet parameter to lifetime and comb structures. ++ * ++ * Revision 1.37 2001/10/18 04:45:24 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/openswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.36 2001/06/14 19:35:16 rgb ++ * Update copyright date. ++ * ++ * Revision 1.35 2001/05/03 19:44:51 rgb ++ * Standardise on SENDERR() macro. ++ * ++ * Revision 1.34 2001/03/16 07:41:51 rgb ++ * Put openswan.h include before pluto includes. ++ * ++ * Revision 1.33 2001/02/27 07:13:51 rgb ++ * Added satype2name() function. ++ * Added text to default satype_tbl entry. ++ * Added satype2name() conversions for most satype debug output. ++ * ++ * Revision 1.32 2001/02/26 20:01:09 rgb ++ * Added internal IP protocol 61 for magic SAs. ++ * Ditch unused sadb_satype2proto[], replaced by satype2proto(). ++ * Re-formatted debug output (split lines, consistent spacing). ++ * Removed acquire, register and expire requirements for a known satype. ++ * Changed message type checking to a switch structure. ++ * Verify expected NULL auth for IPCOMP. ++ * Enforced spi > 0x100 requirement, now that pass uses a magic SA for ++ * appropriate message types. ++ * ++ * Revision 1.31 2000/12/01 07:09:00 rgb ++ * Added ipcomp sanity check to require encalgo is set. ++ * ++ * Revision 1.30 2000/11/17 18:10:30 rgb ++ * Fixed bugs mostly relating to spirange, to treat all spi variables as ++ * network byte order since this is the way PF_KEYv2 stored spis. ++ * ++ * Revision 1.29 2000/10/12 00:02:39 rgb ++ * Removed 'format, ##' nonsense from debug macros for RH7.0. ++ * ++ * Revision 1.28 2000/09/20 16:23:04 rgb ++ * Remove over-paranoid extension check in the presence of sadb_msg_errno. ++ * ++ * Revision 1.27 2000/09/20 04:04:21 rgb ++ * Changed static functions to DEBUG_NO_STATIC to reveal function names in ++ * oopsen. ++ * ++ * Revision 1.26 2000/09/15 11:37:02 rgb ++ * Merge in heavily modified Svenning Soerensen's ++ * IPCOMP zlib deflate code. ++ * ++ * Revision 1.25 2000/09/12 22:35:37 rgb ++ * Restructured to remove unused extensions from CLEARFLOW messages. ++ * ++ * Revision 1.24 2000/09/12 18:59:54 rgb ++ * Added Gerhard's IPv6 support to pfkey parts of libopenswan. ++ * ++ * Revision 1.23 2000/09/12 03:27:00 rgb ++ * Moved DEBUGGING definition to compile kernel with debug off. ++ * ++ * Revision 1.22 2000/09/09 06:39:27 rgb ++ * Restrict pfkey errno check to downward messages only. ++ * ++ * Revision 1.21 2000/09/08 19:22:34 rgb ++ * Enabled pfkey_sens_parse(). ++ * Added check for errno on downward acquire messages only. ++ * ++ * Revision 1.20 2000/09/01 18:48:23 rgb ++ * Fixed reserved check bug and added debug output in ++ * pfkey_supported_parse(). ++ * Fixed debug output label bug in pfkey_ident_parse(). ++ * ++ * Revision 1.19 2000/08/27 01:55:26 rgb ++ * Define OCTETBITS and PFKEYBITS to avoid using 'magic' numbers in code. ++ * ++ * Revision 1.18 2000/08/24 17:00:36 rgb ++ * Ignore unknown extensions instead of failing. ++ * ++ * Revision 1.17 2000/06/02 22:54:14 rgb ++ * Added Gerhard Gessler's struct sockaddr_storage mods for IPv6 support. ++ * ++ * Revision 1.16 2000/05/10 19:25:11 rgb ++ * Fleshed out proposal and supported extensions. ++ * ++ * Revision 1.15 2000/01/24 21:15:31 rgb ++ * Added disabled pluto pfkey lib debug flag. ++ * Added algo debugging reporting. ++ * ++ * Revision 1.14 2000/01/22 23:24:29 rgb ++ * Added new functions proto2satype() and satype2proto() and lookup ++ * table satype_tbl. Also added proto2name() since it was easy. ++ * ++ * Revision 1.13 2000/01/21 09:43:59 rgb ++ * Cast ntohl(spi) as (unsigned long int) to shut up compiler. ++ * ++ * Revision 1.12 2000/01/21 06:28:19 rgb ++ * Added address cases for eroute flows. ++ * Indented compiler directives for readability. ++ * Added klipsdebug switching capability. ++ * ++ * Revision 1.11 1999/12/29 21:14:59 rgb ++ * Fixed debug text cut and paste typo. ++ * ++ * Revision 1.10 1999/12/10 17:45:24 rgb ++ * Added address debugging. ++ * ++ * Revision 1.9 1999/12/09 23:11:42 rgb ++ * Ditched include since we no longer use memset(). ++ * Use new pfkey_extensions_init() instead of memset(). ++ * Added check for SATYPE in pfkey_msg_build(). ++ * Tidy up comments and debugging comments. ++ * ++ * Revision 1.8 1999/12/07 19:55:26 rgb ++ * Removed unused first argument from extension parsers. ++ * Removed static pluto debug flag. ++ * Moved message type and state checking to pfkey_msg_parse(). ++ * Changed print[fk] type from lx to x to quiet compiler. ++ * Removed redundant remain check. ++ * Changed __u* types to uint* to avoid use of asm/types.h and ++ * sys/types.h in userspace code. ++ * ++ * Revision 1.7 1999/12/01 22:20:51 rgb ++ * Moved pfkey_lib_debug variable into the library. ++ * Added pfkey version check into header parsing. ++ * Added check for SATYPE only for those extensions that require a ++ * non-zero value. ++ * ++ * Revision 1.6 1999/11/27 11:58:05 rgb ++ * Added ipv6 headers. ++ * Moved sadb_satype2proto protocol lookup table from ++ * klips/net/ipsec/pfkey_v2_parser.c. ++ * Enable lifetime_current checking. ++ * Debugging error messages added. ++ * Add argument to pfkey_msg_parse() for direction. ++ * Consolidated the 4 1-d extension bitmap arrays into one 4-d array. ++ * Add CVS log entry to bottom of file. ++ * Moved auth and enc alg check to pfkey_msg_parse(). ++ * Enable accidentally disabled spirange parsing. ++ * Moved protocol/algorithm checks from klips/net/ipsec/pfkey_v2_parser.c ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/pfkey_v2_parser.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,3529 @@ ++/* ++ * @(#) RFC2367 PF_KEYv2 Key management API message parser ++ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: pfkey_v2_parser.c,v 1.134.2.4 2007-10-30 21:40:36 paul Exp $ ++ */ ++ ++/* ++ * Template from klips/net/ipsec/ipsec/ipsec_netlink.c. ++ */ ++ ++char pfkey_v2_parser_c_version[] = "$Id: pfkey_v2_parser.c,v 1.134.2.4 2007-10-30 21:40:36 paul Exp $"; ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++ ++#include ++ ++#include ++ ++#ifdef SPINLOCK ++# ifdef SPINLOCK_23 ++# include /* *lock* */ ++# else /* SPINLOCK_23 */ ++# include /* *lock* */ ++# endif /* SPINLOCK_23 */ ++#endif /* SPINLOCK */ ++ ++#include ++#include ++ ++#include ++#ifdef NETLINK_SOCK ++# include ++#else ++# include ++#endif ++ ++#include /* get_random_bytes() */ ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_sa.h" ++ ++#include "openswan/ipsec_radij.h" ++#include "openswan/ipsec_xform.h" ++#include "openswan/ipsec_ah.h" ++#include "openswan/ipsec_esp.h" ++#include "openswan/ipsec_tunnel.h" ++#include "openswan/ipsec_rcv.h" ++#include "openswan/ipcomp.h" ++ ++#include ++#include ++ ++#include "openswan/ipsec_proto.h" ++#include "openswan/ipsec_alg.h" ++ ++#include "openswan/ipsec_kern24.h" ++ ++#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0) ++ ++struct sklist_t { ++ struct socket *sk; ++ struct sklist_t* next; ++} pfkey_sklist_head, *pfkey_sklist, *pfkey_sklist_prev; ++ ++__u32 pfkey_msg_seq = 0; ++ ++ ++#if 0 ++#define DUMP_SAID dump_said(&extr->ips->ips_said, __LINE__) ++#define DUMP_SAID2 dump_said(&extr.ips->ips_said, __LINE__) ++static void dump_said(ip_said *s, int line) ++{ ++ char msa[SATOT_BUF]; ++ size_t msa_len; ++ ++ msa_len = satot(s, 0, msa, sizeof(msa)); ++ ++ printk("line: %d msa: %s\n", line, msa); ++} ++#endif ++ ++ ++int ++pfkey_alloc_eroute(struct eroute** eroute) ++{ ++ int error = 0; ++ if(*eroute) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_alloc_eroute: " ++ "eroute struct already allocated\n"); ++ SENDERR(EEXIST); ++ } ++ ++ if((*eroute = kmalloc(sizeof(**eroute), GFP_ATOMIC) ) == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_alloc_eroute: " ++ "memory allocation error\n"); ++ SENDERR(ENOMEM); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_alloc_eroute: " ++ "allocating %lu bytes for an eroute at 0p%p\n", ++ (unsigned long) sizeof(**eroute), *eroute); ++ ++ memset((caddr_t)*eroute, 0, sizeof(**eroute)); ++ (*eroute)->er_eaddr.sen_len = ++ (*eroute)->er_emask.sen_len = sizeof(struct sockaddr_encap); ++ (*eroute)->er_eaddr.sen_family = ++ (*eroute)->er_emask.sen_family = AF_ENCAP; ++ (*eroute)->er_eaddr.sen_type = SENT_IP4; ++ (*eroute)->er_emask.sen_type = 255; ++ (*eroute)->er_pid = 0; ++ (*eroute)->er_count = 0; ++ (*eroute)->er_lasttime = jiffies/HZ; ++ ++ errlab: ++ return(error); ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_protocol_process(struct sadb_ext *pfkey_ext, ++ struct pfkey_extracted_data *extr) ++{ ++ int error = 0; ++ struct sadb_protocol * p = (struct sadb_protocol *)pfkey_ext; ++ ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_protocol_process: %p\n", extr); ++ ++ if (extr == 0) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_protocol_process:" ++ "extr is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ if (extr->eroute == 0) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_protocol_process:" ++ "extr->eroute is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ extr->eroute->er_eaddr.sen_proto = p->sadb_protocol_proto; ++ extr->eroute->er_emask.sen_proto = p->sadb_protocol_proto ? ~0:0; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_protocol_process: protocol = %d.\n", ++ p->sadb_protocol_proto); ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_ipsec_sa_init(struct ipsec_sa *ipsp) ++{ ++ ++ return ipsec_sa_init(ipsp); ++} ++ ++int ++pfkey_safe_build(int error, struct sadb_ext *extensions[SADB_MAX+1]) ++{ ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_safe_build: " ++ "error=%d\n", ++ error); ++ if (!error) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_safe_build:" ++ "success.\n"); ++ return 1; ++ } else { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_safe_build:" ++ "caught error %d\n", ++ error); ++ pfkey_extensions_free(extensions); ++ return 0; ++ } ++} ++ ++ ++DEBUG_NO_STATIC int ++pfkey_getspi_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ipsec_spi_t minspi = htonl(256), maxspi = htonl(-1L); ++ int found_avail = 0; ++ struct ipsec_sa *ipsq; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_getspi_parse: .\n"); ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ if(extr == NULL || extr->ips == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_getspi_parse: " ++ "error, extr or extr->ipsec_sa pointer NULL\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(extensions[SADB_EXT_SPIRANGE]) { ++ minspi = ((struct sadb_spirange *)extensions[SADB_EXT_SPIRANGE])->sadb_spirange_min; ++ maxspi = ((struct sadb_spirange *)extensions[SADB_EXT_SPIRANGE])->sadb_spirange_max; ++ } ++ ++ if(maxspi == minspi) { ++ extr->ips->ips_said.spi = maxspi; ++ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said)); ++ if(ipsq != NULL) { ++ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa)); ++ ipsec_sa_put(ipsq); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_getspi_parse: " ++ "EMT_GETSPI found an old ipsec_sa for SA: %s, delete it first.\n", ++ sa_len ? sa : " (error)"); ++ SENDERR(EEXIST); ++ } else { ++ found_avail = 1; ++ } ++ } else { ++ int i = 0; ++ __u32 rand_val; ++ __u32 spi_diff; ++ while( ( i < (spi_diff = (ntohl(maxspi) - ntohl(minspi)))) && !found_avail ) { ++ prng_bytes(&ipsec_prng, (char *) &(rand_val), ++ ( (spi_diff < (2^8)) ? 1 : ++ ( (spi_diff < (2^16)) ? 2 : ++ ( (spi_diff < (2^24)) ? 3 : ++ 4 ) ) ) ); ++ extr->ips->ips_said.spi = htonl(ntohl(minspi) + ++ (rand_val % ++ (spi_diff + 1))); ++ i++; ++ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said)); ++ if(ipsq == NULL) { ++ found_avail = 1; ++ } else { ++ ipsec_sa_put(ipsq); ++ } ++ } ++ } ++ ++ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa)); ++ ++ if (!found_avail) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_getspi_parse: " ++ "found an old ipsec_sa for SA: %s, delete it first.\n", ++ sa_len ? sa : " (error)"); ++ SENDERR(EEXIST); ++ } ++ ++ if(inet_addr_type((unsigned long)extr->ips->ips_said.dst.u.v4.sin_addr.s_addr) == RTN_LOCAL) { ++ extr->ips->ips_flags |= EMT_INBOUND; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_getspi_parse: " ++ "existing ipsec_sa not found (this is good) for SA: %s, %s-bound, allocating.\n", ++ sa_len ? sa : " (error)", ++ extr->ips->ips_flags & EMT_INBOUND ? "in" : "out"); ++ ++ /* XXX extr->ips->ips_rcvif = &(enc_softc[em->em_if].enc_if);*/ ++ extr->ips->ips_rcvif = NULL; ++ extr->ips->ips_life.ipl_addtime.ipl_count = jiffies/HZ; ++ ++ extr->ips->ips_state = SADB_SASTATE_LARVAL; ++ ++ if(!extr->ips->ips_life.ipl_allocations.ipl_count) { ++ extr->ips->ips_life.ipl_allocations.ipl_count += 1; ++ } ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_GETSPI, ++ satype, ++ 0, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_seq, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ 0, ++ SADB_SASTATE_LARVAL, ++ 0, ++ 0, ++ 0, ++ extr->ips->ips_ref), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_s), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_d), ++ extensions_reply) )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: " ++ "failed to build the getspi reply message extensions\n"); ++ goto errlab; ++ } ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: " ++ "failed to build the getspi reply message\n"); ++ SENDERR(-error); ++ } ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: " ++ "sending up getspi reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: " ++ "sending up getspi reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ if((error = ipsec_sa_add(extr->ips))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: " ++ "failed to add the larval SA=%s with error=%d.\n", ++ sa_len ? sa : " (error)", ++ error); ++ SENDERR(-error); ++ } ++ extr->ips = NULL; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_getspi_parse: " ++ "successful for SA: %s\n", ++ sa_len ? sa : " (error)"); ++ ++ errlab: ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_update_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct ipsec_sa* ipsq; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ struct ipsec_sa *nat_t_ips_saved = NULL; ++#endif ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: .\n"); ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ if(((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_state != SADB_SASTATE_MATURE) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: " ++ "error, sa_state=%d must be MATURE=%d\n", ++ ((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_state, ++ SADB_SASTATE_MATURE); ++ SENDERR(EINVAL); ++ } ++ ++ if(extr == NULL || extr->ips == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: " ++ "error, extr or extr->ips pointer NULL\n"); ++ SENDERR(EINVAL); ++ } ++ ++ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa)); ++ ++ spin_lock_bh(&tdb_lock); ++ ++ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said)); ++ if (ipsq == NULL) { ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: " ++ "reserved ipsec_sa for SA: %s not found. Call SADB_GETSPI first or call SADB_ADD instead.\n", ++ sa_len ? sa : " (error)"); ++ SENDERR(ENOENT); ++ } ++ ++ if(inet_addr_type((unsigned long)extr->ips->ips_said.dst.u.v4.sin_addr.s_addr) == RTN_LOCAL) { ++ extr->ips->ips_flags |= EMT_INBOUND; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: " ++ "existing ipsec_sa found (this is good) for SA: %s, %s-bound, updating.\n", ++ sa_len ? sa : " (error)", ++ extr->ips->ips_flags & EMT_INBOUND ? "in" : "out"); ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ if (extr->ips->ips_natt_sport || extr->ips->ips_natt_dport) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: only updating NAT-T ports " ++ "(%u:%u -> %u:%u)\n", ++ ipsq->ips_natt_sport, ipsq->ips_natt_dport, ++ extr->ips->ips_natt_sport, extr->ips->ips_natt_dport); ++ ++ if (extr->ips->ips_natt_sport) { ++ ipsq->ips_natt_sport = extr->ips->ips_natt_sport; ++ if (ipsq->ips_addr_s->sa_family == AF_INET) { ++ ((struct sockaddr_in *)(ipsq->ips_addr_s))->sin_port = htons(extr->ips->ips_natt_sport); ++ } ++ } ++ ++ if (extr->ips->ips_natt_dport) { ++ ipsq->ips_natt_dport = extr->ips->ips_natt_dport; ++ if (ipsq->ips_addr_d->sa_family == AF_INET) { ++ ((struct sockaddr_in *)(ipsq->ips_addr_d))->sin_port = htons(extr->ips->ips_natt_dport); ++ } ++ } ++ ++ nat_t_ips_saved = extr->ips; ++ extr->ips = ipsq; ++ } ++ else { ++#endif ++ ++ /* XXX extr->ips->ips_rcvif = &(enc_softc[em->em_if].enc_if);*/ ++ extr->ips->ips_rcvif = NULL; ++ if ((error = pfkey_ipsec_sa_init(extr->ips))) { ++ ipsec_sa_put(ipsq); ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: " ++ "not successful for SA: %s, deleting.\n", ++ sa_len ? sa : " (error)"); ++ SENDERR(-error); ++ } ++ ++ extr->ips->ips_life.ipl_addtime.ipl_count = ipsq->ips_life.ipl_addtime.ipl_count; ++ ipsec_sa_put(ipsq); ++ if((error = ipsec_sa_delchain(ipsq))) { ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: " ++ "error=%d, trouble deleting intermediate ipsec_sa for SA=%s.\n", ++ error, ++ sa_len ? sa : " (error)"); ++ SENDERR(-error); ++ } ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ } ++#endif ++ ++ spin_unlock_bh(&tdb_lock); ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_UPDATE, ++ satype, ++ 0, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_seq, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ extr->ips->ips_replaywin, ++ extr->ips->ips_state, ++ extr->ips->ips_authalg, ++ extr->ips->ips_encalg, ++ extr->ips->ips_flags, ++ extr->ips->ips_ref), ++ extensions_reply) ++ /* The 3 lifetime extentions should only be sent if non-zero. */ ++ && (extensions[SADB_EXT_LIFETIME_HARD] ++ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[SADB_EXT_LIFETIME_HARD], ++ SADB_EXT_LIFETIME_HARD, ++ extr->ips->ips_life.ipl_allocations.ipl_hard, ++ extr->ips->ips_life.ipl_bytes.ipl_hard, ++ extr->ips->ips_life.ipl_addtime.ipl_hard, ++ extr->ips->ips_life.ipl_usetime.ipl_hard, ++ extr->ips->ips_life.ipl_packets.ipl_hard), ++ extensions_reply) : 1) ++ && (extensions[SADB_EXT_LIFETIME_SOFT] ++ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[SADB_EXT_LIFETIME_SOFT], ++ SADB_EXT_LIFETIME_SOFT, ++ extr->ips->ips_life.ipl_allocations.ipl_count, ++ extr->ips->ips_life.ipl_bytes.ipl_count, ++ extr->ips->ips_life.ipl_addtime.ipl_count, ++ extr->ips->ips_life.ipl_usetime.ipl_count, ++ extr->ips->ips_life.ipl_packets.ipl_count), ++ extensions_reply) : 1) ++ && (extr->ips->ips_life.ipl_allocations.ipl_count ++ || extr->ips->ips_life.ipl_bytes.ipl_count ++ || extr->ips->ips_life.ipl_addtime.ipl_count ++ || extr->ips->ips_life.ipl_usetime.ipl_count ++ || extr->ips->ips_life.ipl_packets.ipl_count ++ ++ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[SADB_EXT_LIFETIME_CURRENT], ++ SADB_EXT_LIFETIME_CURRENT, ++ extr->ips->ips_life.ipl_allocations.ipl_count, ++ extr->ips->ips_life.ipl_bytes.ipl_count, ++ extr->ips->ips_life.ipl_addtime.ipl_count, ++ extr->ips->ips_life.ipl_usetime.ipl_count, ++ extr->ips->ips_life.ipl_packets.ipl_count), ++ extensions_reply) : 1) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_s), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_d), ++ extensions_reply) ++ && (extr->ips->ips_ident_s.data ++ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[SADB_EXT_IDENTITY_SRC], ++ SADB_EXT_IDENTITY_SRC, ++ extr->ips->ips_ident_s.type, ++ extr->ips->ips_ident_s.id, ++ extr->ips->ips_ident_s.len, ++ extr->ips->ips_ident_s.data), ++ extensions_reply) : 1) ++ && (extr->ips->ips_ident_d.data ++ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[SADB_EXT_IDENTITY_DST], ++ SADB_EXT_IDENTITY_DST, ++ extr->ips->ips_ident_d.type, ++ extr->ips->ips_ident_d.id, ++ extr->ips->ips_ident_d.len, ++ extr->ips->ips_ident_d.data), ++ extensions_reply) : 1) ++#if 0 ++ /* FIXME: This won't work yet because I have not finished ++ it. */ ++ && (extr->ips->ips_sens_ ++ ? pfkey_safe_build(error = pfkey_sens_build(&extensions_reply[SADB_EXT_SENSITIVITY], ++ extr->ips->ips_sens_dpd, ++ extr->ips->ips_sens_sens_level, ++ extr->ips->ips_sens_sens_len, ++ extr->ips->ips_sens_sens_bitmap, ++ extr->ips->ips_sens_integ_level, ++ extr->ips->ips_sens_integ_len, ++ extr->ips->ips_sens_integ_bitmap), ++ extensions_reply) : 1) ++#endif ++ )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: " ++ "failed to build the update reply message extensions\n"); ++ SENDERR(-error); ++ } ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: " ++ "failed to build the update reply message\n"); ++ SENDERR(-error); ++ } ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: " ++ "sending up update reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: " ++ "sending up update reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ if (nat_t_ips_saved) { ++ /** ++ * As we _really_ update existing SA, we keep tdbq and need to delete ++ * parsed ips (nat_t_ips_saved, was extr->ips). ++ * ++ * goto errlab with extr->ips = nat_t_ips_saved will free it. ++ */ ++ ++ extr->ips = nat_t_ips_saved; ++ ++ error = 0; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse (NAT-T ports): " ++ "successful for SA: %s\n", ++ sa_len ? sa : " (error)"); ++ ++ goto errlab; ++ } ++#endif ++ ++ if((error = ipsec_sa_add(extr->ips))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: " ++ "failed to update the mature SA=%s with error=%d.\n", ++ sa_len ? sa : " (error)", ++ error); ++ SENDERR(-error); ++ } ++ extr->ips = NULL; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_update_parse: " ++ "successful for SA: %s\n", ++ sa_len ? sa : " (error)"); ++ ++ errlab: ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_add_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct ipsec_sa* ipsq; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_add_parse: .\n"); ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ if(((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_state != SADB_SASTATE_MATURE) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_add_parse: " ++ "error, sa_state=%d must be MATURE=%d\n", ++ ((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_state, ++ SADB_SASTATE_MATURE); ++ SENDERR(EINVAL); ++ } ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_add_parse: " ++ "extr or extr->ips pointer NULL\n"); ++ SENDERR(EINVAL); ++ } ++ ++ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa)); ++ ++ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said)); ++ if(ipsq != NULL) { ++ ipsec_sa_put(ipsq); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_add_parse: " ++ "found an old ipsec_sa for SA%s, delete it first.\n", ++ sa_len ? sa : " (error)"); ++ SENDERR(EEXIST); ++ } ++ ++ if(inet_addr_type((unsigned long)extr->ips->ips_said.dst.u.v4.sin_addr.s_addr) == RTN_LOCAL) { ++ extr->ips->ips_flags |= EMT_INBOUND; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_add_parse: " ++ "existing ipsec_sa not found (this is good) for SA%s, %s-bound, allocating.\n", ++ sa_len ? sa : " (error)", ++ extr->ips->ips_flags & EMT_INBOUND ? "in" : "out"); ++ ++ /* XXX extr->ips->ips_rcvif = &(enc_softc[em->em_if].enc_if);*/ ++ extr->ips->ips_rcvif = NULL; ++ ++ if ((error = pfkey_ipsec_sa_init(extr->ips))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_add_parse: " ++ "not successful for SA: %s, deleting.\n", ++ sa_len ? sa : " (error)"); ++ SENDERR(-error); ++ } ++ ++ extr->ips->ips_life.ipl_addtime.ipl_count = jiffies / HZ; ++ if(!extr->ips->ips_life.ipl_allocations.ipl_count) { ++ extr->ips->ips_life.ipl_allocations.ipl_count += 1; ++ } ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_ADD, ++ satype, ++ 0, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_seq, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ extr->ips->ips_replaywin, ++ extr->ips->ips_state, ++ extr->ips->ips_authalg, ++ extr->ips->ips_encalg, ++ extr->ips->ips_flags, ++ extr->ips->ips_ref), ++ extensions_reply) ++ /* The 3 lifetime extentions should only be sent if non-zero. */ ++ && (extensions[SADB_EXT_LIFETIME_HARD] ++ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[SADB_EXT_LIFETIME_HARD], ++ SADB_EXT_LIFETIME_HARD, ++ extr->ips->ips_life.ipl_allocations.ipl_hard, ++ extr->ips->ips_life.ipl_bytes.ipl_hard, ++ extr->ips->ips_life.ipl_addtime.ipl_hard, ++ extr->ips->ips_life.ipl_usetime.ipl_hard, ++ extr->ips->ips_life.ipl_packets.ipl_hard), ++ extensions_reply) : 1) ++ && (extensions[SADB_EXT_LIFETIME_SOFT] ++ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[SADB_EXT_LIFETIME_SOFT], ++ SADB_EXT_LIFETIME_SOFT, ++ extr->ips->ips_life.ipl_allocations.ipl_soft, ++ extr->ips->ips_life.ipl_bytes.ipl_soft, ++ extr->ips->ips_life.ipl_addtime.ipl_soft, ++ extr->ips->ips_life.ipl_usetime.ipl_soft, ++ extr->ips->ips_life.ipl_packets.ipl_soft), ++ extensions_reply) : 1) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_s), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_d), ++ extensions_reply) ++ && (extr->ips->ips_ident_s.data ++ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[SADB_EXT_IDENTITY_SRC], ++ SADB_EXT_IDENTITY_SRC, ++ extr->ips->ips_ident_s.type, ++ extr->ips->ips_ident_s.id, ++ extr->ips->ips_ident_s.len, ++ extr->ips->ips_ident_s.data), ++ extensions_reply) : 1) ++ && (extr->ips->ips_ident_d.data ++ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[SADB_EXT_IDENTITY_DST], ++ SADB_EXT_IDENTITY_DST, ++ extr->ips->ips_ident_d.type, ++ extr->ips->ips_ident_d.id, ++ extr->ips->ips_ident_d.len, ++ extr->ips->ips_ident_d.data), ++ extensions_reply) : 1) ++#if 0 ++ /* FIXME: This won't work yet because I have not finished ++ it. */ ++ && (extr->ips->ips_sens_ ++ ? pfkey_safe_build(error = pfkey_sens_build(&extensions_reply[SADB_EXT_SENSITIVITY], ++ extr->ips->ips_sens_dpd, ++ extr->ips->ips_sens_sens_level, ++ extr->ips->ips_sens_sens_len, ++ extr->ips->ips_sens_sens_bitmap, ++ extr->ips->ips_sens_integ_level, ++ extr->ips->ips_sens_integ_len, ++ extr->ips->ips_sens_integ_bitmap), ++ extensions_reply) : 1) ++#endif ++ )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: " ++ "failed to build the add reply message extensions\n"); ++ SENDERR(-error); ++ } ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: " ++ "failed to build the add reply message\n"); ++ SENDERR(-error); ++ } ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: " ++ "sending up add reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: " ++ "sending up add reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ if((error = ipsec_sa_add(extr->ips))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: " ++ "failed to add the mature SA=%s with error=%d.\n", ++ sa_len ? sa : " (error)", ++ error); ++ SENDERR(-error); ++ } ++ extr->ips = NULL; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_add_parse: " ++ "successful for SA: %s\n", ++ sa_len ? sa : " (error)"); ++ ++ errlab: ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_delete_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ struct ipsec_sa *ipsp; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ int error = 0; ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_delete_parse: .\n"); ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_delete_parse: " ++ "extr or extr->ips pointer NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa)); ++ ++ spin_lock_bh(&tdb_lock); ++ ++ ipsp = ipsec_sa_getbyid(&(extr->ips->ips_said)); ++ if (ipsp == NULL) { ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_delete_parse: " ++ "ipsec_sa not found for SA:%s, could not delete.\n", ++ sa_len ? sa : " (error)"); ++ SENDERR(ESRCH); ++ } ++ ++ ipsec_sa_put(ipsp); ++ if((error = ipsec_sa_delchain(ipsp))) { ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_delete_parse: " ++ "error=%d returned trying to delete ipsec_sa for SA:%s.\n", ++ error, ++ sa_len ? sa : " (error)"); ++ SENDERR(-error); ++ } ++ spin_unlock_bh(&tdb_lock); ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_DELETE, ++ satype, ++ 0, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_seq, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ 0, ++ 0, ++ 0, ++ 0, ++ 0, ++ extr->ips->ips_ref), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_s), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_d), ++ extensions_reply) ++ )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_delete_parse: " ++ "failed to build the delete reply message extensions\n"); ++ SENDERR(-error); ++ } ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_delete_parse: " ++ "failed to build the delete reply message\n"); ++ SENDERR(-error); ++ } ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_delete_parse: " ++ "sending up delete reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_delete_parse: " ++ "sending up delete reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ errlab: ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_get_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct ipsec_sa *ipsp; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_get_parse: .\n"); ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ if(!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_get_parse: " ++ "extr or extr->ips pointer NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa)); ++ ++ spin_lock_bh(&tdb_lock); ++ ++ ipsp = ipsec_sa_getbyid(&(extr->ips->ips_said)); ++ if (ipsp == NULL) { ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: " ++ "ipsec_sa not found for SA=%s, could not get.\n", ++ sa_len ? sa : " (error)"); ++ SENDERR(ESRCH); ++ } ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_GET, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype, ++ 0, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_seq, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ extr->ips->ips_replaywin, ++ extr->ips->ips_state, ++ extr->ips->ips_authalg, ++ extr->ips->ips_encalg, ++ extr->ips->ips_flags, ++ extr->ips->ips_ref), ++ extensions_reply) ++ /* The 3 lifetime extentions should only be sent if non-zero. */ ++ && (ipsp->ips_life.ipl_allocations.ipl_count ++ || ipsp->ips_life.ipl_bytes.ipl_count ++ || ipsp->ips_life.ipl_addtime.ipl_count ++ || ipsp->ips_life.ipl_usetime.ipl_count ++ || ipsp->ips_life.ipl_packets.ipl_count ++ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[SADB_EXT_LIFETIME_CURRENT], ++ SADB_EXT_LIFETIME_CURRENT, ++ ipsp->ips_life.ipl_allocations.ipl_count, ++ ipsp->ips_life.ipl_bytes.ipl_count, ++ ipsp->ips_life.ipl_addtime.ipl_count, ++ ipsp->ips_life.ipl_usetime.ipl_count, ++ ipsp->ips_life.ipl_packets.ipl_count), ++ extensions_reply) : 1) ++ && (ipsp->ips_life.ipl_allocations.ipl_hard ++ || ipsp->ips_life.ipl_bytes.ipl_hard ++ || ipsp->ips_life.ipl_addtime.ipl_hard ++ || ipsp->ips_life.ipl_usetime.ipl_hard ++ || ipsp->ips_life.ipl_packets.ipl_hard ++ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[SADB_EXT_LIFETIME_HARD], ++ SADB_EXT_LIFETIME_HARD, ++ ipsp->ips_life.ipl_allocations.ipl_hard, ++ ipsp->ips_life.ipl_bytes.ipl_hard, ++ ipsp->ips_life.ipl_addtime.ipl_hard, ++ ipsp->ips_life.ipl_usetime.ipl_hard, ++ ipsp->ips_life.ipl_packets.ipl_hard), ++ extensions_reply) : 1) ++ && (ipsp->ips_life.ipl_allocations.ipl_soft ++ || ipsp->ips_life.ipl_bytes.ipl_soft ++ || ipsp->ips_life.ipl_addtime.ipl_soft ++ || ipsp->ips_life.ipl_usetime.ipl_soft ++ || ipsp->ips_life.ipl_packets.ipl_soft ++ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[SADB_EXT_LIFETIME_SOFT], ++ SADB_EXT_LIFETIME_SOFT, ++ ipsp->ips_life.ipl_allocations.ipl_soft, ++ ipsp->ips_life.ipl_bytes.ipl_soft, ++ ipsp->ips_life.ipl_addtime.ipl_soft, ++ ipsp->ips_life.ipl_usetime.ipl_soft, ++ ipsp->ips_life.ipl_packets.ipl_soft), ++ extensions_reply) : 1) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_s), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_d), ++ extensions_reply) ++ && (extr->ips->ips_addr_p ++ ? pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_PROXY], ++ SADB_EXT_ADDRESS_PROXY, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_p), ++ extensions_reply) : 1) ++#if 0 ++ /* FIXME: This won't work yet because the keys are not ++ stored directly in the ipsec_sa. They are stored as ++ contexts. */ ++ && (extr->ips->ips_key_a_size ++ ? pfkey_safe_build(error = pfkey_key_build(&extensions_reply[SADB_EXT_KEY_AUTH], ++ SADB_EXT_KEY_AUTH, ++ extr->ips->ips_key_a_size * 8, ++ extr->ips->ips_key_a), ++ extensions_reply) : 1) ++ /* FIXME: This won't work yet because the keys are not ++ stored directly in the ipsec_sa. They are stored as ++ key schedules. */ ++ && (extr->ips->ips_key_e_size ++ ? pfkey_safe_build(error = pfkey_key_build(&extensions_reply[SADB_EXT_KEY_ENCRYPT], ++ SADB_EXT_KEY_ENCRYPT, ++ extr->ips->ips_key_e_size * 8, ++ extr->ips->ips_key_e), ++ extensions_reply) : 1) ++#endif ++ && (extr->ips->ips_ident_s.data ++ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[SADB_EXT_IDENTITY_SRC], ++ SADB_EXT_IDENTITY_SRC, ++ extr->ips->ips_ident_s.type, ++ extr->ips->ips_ident_s.id, ++ extr->ips->ips_ident_s.len, ++ extr->ips->ips_ident_s.data), ++ extensions_reply) : 1) ++ && (extr->ips->ips_ident_d.data ++ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[SADB_EXT_IDENTITY_DST], ++ SADB_EXT_IDENTITY_DST, ++ extr->ips->ips_ident_d.type, ++ extr->ips->ips_ident_d.id, ++ extr->ips->ips_ident_d.len, ++ extr->ips->ips_ident_d.data), ++ extensions_reply) : 1) ++#if 0 ++ /* FIXME: This won't work yet because I have not finished ++ it. */ ++ && (extr->ips->ips_sens_ ++ ? pfkey_safe_build(error = pfkey_sens_build(&extensions_reply[SADB_EXT_SENSITIVITY], ++ extr->ips->ips_sens_dpd, ++ extr->ips->ips_sens_sens_level, ++ extr->ips->ips_sens_sens_len, ++ extr->ips->ips_sens_sens_bitmap, ++ extr->ips->ips_sens_integ_level, ++ extr->ips->ips_sens_integ_len, ++ extr->ips->ips_sens_integ_bitmap), ++ extensions_reply) : 1) ++#endif ++ )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: " ++ "failed to build the get reply message extensions\n"); ++ ipsec_sa_put(ipsp); ++ spin_unlock_bh(&tdb_lock); ++ SENDERR(-error); ++ } ++ ++ ipsec_sa_put(ipsp); ++ spin_unlock_bh(&tdb_lock); ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: " ++ "failed to build the get reply message\n"); ++ SENDERR(-error); ++ } ++ ++ if((error = pfkey_upmsg(sk->sk_socket, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: " ++ "failed to send the get reply message\n"); ++ SENDERR(-error); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: " ++ "succeeded in sending get reply message.\n"); ++ ++ errlab: ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_acquire_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_acquire_parse: .\n"); ++ ++ /* XXX I don't know if we want an upper bound, since userspace may ++ want to register itself for an satype > SADB_SATYPE_MAX. */ ++ if((satype == 0) || (satype > SADB_SATYPE_MAX)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_acquire_parse: " ++ "SATYPE=%d invalid.\n", ++ satype); ++ SENDERR(EINVAL); ++ } ++ ++ if(!(pfkey_registered_sockets[satype])) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire_parse: " ++ "no sockets registered for SAtype=%d(%s).\n", ++ satype, ++ satype2name(satype)); ++ SENDERR(EPROTONOSUPPORT); ++ } ++ ++ for(pfkey_socketsp = pfkey_registered_sockets[satype]; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire_parse: " ++ "sending up acquire reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire_parse: " ++ "sending up acquire reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_register_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_parse: .\n"); ++ ++ /* XXX I don't know if we want an upper bound, since userspace may ++ want to register itself for an satype > SADB_SATYPE_MAX. */ ++ if((satype == 0) || (satype > SADB_SATYPE_MAX)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_parse: " ++ "SATYPE=%d invalid.\n", ++ satype); ++ SENDERR(EINVAL); ++ } ++ ++ if(!pfkey_list_insert_socket(sk->sk_socket, ++ &(pfkey_registered_sockets[satype]))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_parse: " ++ "SATYPE=%02d(%s) successfully registered by KMd (pid=%d).\n", ++ satype, ++ satype2name(satype), ++ key_pid(sk)); ++ }; ++ ++ /* send up register msg with supported SATYPE algos */ ++ ++ error=pfkey_register_reply(satype, (struct sadb_msg*)extensions[SADB_EXT_RESERVED]); ++ errlab: ++ return error; ++} ++ ++int ++pfkey_register_reply(int satype, struct sadb_msg *sadb_msg) ++{ ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ struct socket_list *pfkey_socketsp; ++ struct supported_list *pfkey_supported_listp; ++ unsigned int alg_num_a = 0, alg_num_e = 0; ++ struct sadb_alg *alg_a = NULL, *alg_e = NULL, *alg_ap = NULL, *alg_ep = NULL; ++ int error = 0; ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ if((satype == 0) || (satype > SADB_SATYPE_MAX)) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: " ++ "SAtype=%d unspecified or unknown.\n", ++ satype); ++ SENDERR(EINVAL); ++ } ++ if(!(pfkey_registered_sockets[satype])) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: " ++ "no sockets registered for SAtype=%d(%s).\n", ++ satype, ++ satype2name(satype)); ++ SENDERR(EPROTONOSUPPORT); ++ } ++ /* send up register msg with supported SATYPE algos */ ++ pfkey_supported_listp = pfkey_supported_list[satype]; ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "pfkey_supported_list[%d]=0p%p\n", ++ satype, ++ pfkey_supported_list[satype]); ++ while(pfkey_supported_listp) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "checking supported=0p%p\n", ++ pfkey_supported_listp); ++ if(pfkey_supported_listp->supportedp->ias_exttype == SADB_EXT_SUPPORTED_AUTH) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "adding auth alg.\n"); ++ alg_num_a++; ++ } ++ if(pfkey_supported_listp->supportedp->ias_exttype == SADB_EXT_SUPPORTED_ENCRYPT) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "adding encrypt alg.\n"); ++ alg_num_e++; ++ } ++ pfkey_supported_listp = pfkey_supported_listp->next; ++ } ++ ++ if(alg_num_a) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "allocating %lu bytes for auth algs.\n", ++ (unsigned long) (alg_num_a * sizeof(struct sadb_alg))); ++ if((alg_a = kmalloc(alg_num_a * sizeof(struct sadb_alg), GFP_ATOMIC) ) == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "auth alg memory allocation error\n"); ++ SENDERR(ENOMEM); ++ } ++ alg_ap = alg_a; ++ } ++ ++ if(alg_num_e) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "allocating %lu bytes for enc algs.\n", ++ (unsigned long) (alg_num_e * sizeof(struct sadb_alg))); ++ if((alg_e = kmalloc(alg_num_e * sizeof(struct sadb_alg), GFP_ATOMIC) ) == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "enc alg memory allocation error\n"); ++ SENDERR(ENOMEM); ++ } ++ alg_ep = alg_e; ++ } ++ ++ pfkey_supported_listp = pfkey_supported_list[satype]; ++ while(pfkey_supported_listp) { ++ if(alg_num_a) { ++ if(pfkey_supported_listp->supportedp->ias_exttype == SADB_EXT_SUPPORTED_AUTH) { ++ alg_ap->sadb_alg_id = pfkey_supported_listp->supportedp->ias_id; ++ alg_ap->sadb_alg_ivlen = pfkey_supported_listp->supportedp->ias_ivlen; ++ alg_ap->sadb_alg_minbits = pfkey_supported_listp->supportedp->ias_keyminbits; ++ alg_ap->sadb_alg_maxbits = pfkey_supported_listp->supportedp->ias_keymaxbits; ++ alg_ap->sadb_alg_reserved = 0; ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "klips_debug:pfkey_register_reply: " ++ "adding auth=0p%p\n", ++ alg_ap); ++ alg_ap++; ++ } ++ } ++ if(alg_num_e) { ++ if(pfkey_supported_listp->supportedp->ias_exttype == SADB_EXT_SUPPORTED_ENCRYPT) { ++ alg_ep->sadb_alg_id = pfkey_supported_listp->supportedp->ias_id; ++ alg_ep->sadb_alg_ivlen = pfkey_supported_listp->supportedp->ias_ivlen; ++ alg_ep->sadb_alg_minbits = pfkey_supported_listp->supportedp->ias_keyminbits; ++ alg_ep->sadb_alg_maxbits = pfkey_supported_listp->supportedp->ias_keymaxbits; ++ alg_ep->sadb_alg_reserved = 0; ++ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, ++ "klips_debug:pfkey_register_reply: " ++ "adding encrypt=0p%p\n", ++ alg_ep); ++ alg_ep++; ++ } ++ } ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_register_reply: " ++ "found satype=%d(%s) exttype=%d id=%d ivlen=%d minbits=%d maxbits=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_supported_listp->supportedp->ias_exttype, ++ pfkey_supported_listp->supportedp->ias_id, ++ pfkey_supported_listp->supportedp->ias_ivlen, ++ pfkey_supported_listp->supportedp->ias_keyminbits, ++ pfkey_supported_listp->supportedp->ias_keymaxbits); ++ pfkey_supported_listp = pfkey_supported_listp->next; ++ } ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_REGISTER, ++ satype, ++ 0, ++ sadb_msg? sadb_msg->sadb_msg_seq : ++pfkey_msg_seq, ++ sadb_msg? sadb_msg->sadb_msg_pid: current->pid), ++ extensions_reply) && ++ (alg_num_a ? pfkey_safe_build(error = pfkey_supported_build(&extensions_reply[SADB_EXT_SUPPORTED_AUTH], ++ SADB_EXT_SUPPORTED_AUTH, ++ alg_num_a, ++ alg_a), ++ extensions_reply) : 1) && ++ (alg_num_e ? pfkey_safe_build(error = pfkey_supported_build(&extensions_reply[SADB_EXT_SUPPORTED_ENCRYPT], ++ SADB_EXT_SUPPORTED_ENCRYPT, ++ alg_num_e, ++ alg_e), ++ extensions_reply) : 1))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: " ++ "failed to build the register message extensions_reply\n"); ++ SENDERR(-error); ++ } ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: " ++ "failed to build the register message\n"); ++ SENDERR(-error); ++ } ++ /* this should go to all registered sockets for that satype only */ ++ for(pfkey_socketsp = pfkey_registered_sockets[satype]; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: " ++ "sending up acquire message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: " ++ "sending up register message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ errlab: ++ if(alg_a) { ++ kfree(alg_a); ++ } ++ if(alg_e) { ++ kfree(alg_e); ++ } ++ ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_expire_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct socket_list *pfkey_socketsp; ++#ifdef CONFIG_KLIPS_DEBUG ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_expire_parse: .\n"); ++ ++ if(pfkey_open_sockets) { ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire_parse: " ++ "sending up expire reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire_parse: " ++ "sending up expire reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ } ++ ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_flush_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ uint8_t proto = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_flush_parse: " ++ "flushing type %d SAs\n", ++ satype); ++ ++ if(satype && !(proto = satype2proto(satype))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_flush_parse: " ++ "satype %d lookup failed.\n", ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype); ++ SENDERR(EINVAL); ++ } ++ ++ if ((error = ipsec_sadb_cleanup(proto))) { ++ SENDERR(-error); ++ } ++ ++ if(pfkey_open_sockets) { ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_flush_parse: " ++ "sending up flush reply message for satype=%d(%s) (proto=%d) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ proto, ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_flush_parse: " ++ "sending up flush reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ } ++ ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_dump_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_dump_parse: .\n"); ++ ++ SENDERR(ENOSYS); ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_promisc_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_promisc_parse: .\n"); ++ ++ SENDERR(ENOSYS); ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_pchange_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_pchange_parse: .\n"); ++ ++ SENDERR(ENOSYS); ++ errlab: ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_grpsa_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ struct ipsec_sa *ips1p, *ips2p, *ipsp; ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ char sa1[SATOT_BUF], sa2[SATOT_BUF]; ++ size_t sa_len1, sa_len2 = 0; ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: .\n"); ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ if(extr == NULL || extr->ips == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: " ++ "extr or extr->ips is NULL, fatal.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ sa_len1 = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa1, sizeof(sa1)); ++ if(extr->ips2 != NULL) { ++ sa_len2 = KLIPS_SATOT(debug_pfkey, &extr->ips2->ips_said, 0, sa2, sizeof(sa2)); ++ } ++ ++ spin_lock_bh(&tdb_lock); ++ ++ ips1p = ipsec_sa_getbyid(&(extr->ips->ips_said)); ++ if(ips1p == NULL) { ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: " ++ "reserved ipsec_sa for SA1: %s not found. Call SADB_ADD/UPDATE first.\n", ++ sa_len1 ? sa1 : " (error)"); ++ SENDERR(ENOENT); ++ } ++ if(extr->ips2) { /* GRPSA */ ++ ips2p = ipsec_sa_getbyid(&(extr->ips2->ips_said)); ++ if(ips2p == NULL) { ++ ipsec_sa_put(ips1p); ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: " ++ "reserved ipsec_sa for SA2: %s not found. Call SADB_ADD/UPDATE first.\n", ++ sa_len2 ? sa2 : " (error)"); ++ SENDERR(ENOENT); ++ } ++ ++ /* Is either one already linked? */ ++ if(ips1p->ips_onext) { ++ ipsec_sa_put(ips1p); ++ ipsec_sa_put(ips2p); ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: " ++ "ipsec_sa for SA: %s is already linked.\n", ++ sa_len1 ? sa1 : " (error)"); ++ SENDERR(EEXIST); ++ } ++ if(ips2p->ips_inext) { ++ ipsec_sa_put(ips1p); ++ ipsec_sa_put(ips2p); ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: " ++ "ipsec_sa for SA: %s is already linked.\n", ++ sa_len2 ? sa2 : " (error)"); ++ SENDERR(EEXIST); ++ } ++ ++ /* Is extr->ips already linked to extr->ips2? */ ++ ipsp = ips2p; ++ while(ipsp) { ++ if(ipsp == ips1p) { ++ ipsec_sa_put(ips1p); ++ ipsec_sa_put(ips2p); ++ spin_unlock_bh(&tdb_lock); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: " ++ "ipsec_sa for SA: %s is already linked to %s.\n", ++ sa_len1 ? sa1 : " (error)", ++ sa_len2 ? sa2 : " (error)"); ++ SENDERR(EEXIST); ++ } ++ ipsp = ipsp->ips_onext; ++ } ++ ++ /* link 'em */ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: " ++ "linking ipsec_sa SA: %s with %s.\n", ++ sa_len1 ? sa1 : " (error)", ++ sa_len2 ? sa2 : " (error)"); ++ ips1p->ips_onext = ips2p; ++ ips2p->ips_inext = ips1p; ++ } else { /* UNGRPSA */ ++ ipsec_sa_put(ips1p); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_grpsa_parse: " ++ "unlinking ipsec_sa SA: %s.\n", ++ sa_len1 ? sa1 : " (error)"); ++ while(ips1p->ips_onext) { ++ ips1p = ips1p->ips_onext; ++ } ++ while(ips1p->ips_inext) { ++ ipsp = ips1p; ++ ips1p = ips1p->ips_inext; ++ ipsec_sa_put(ips1p); ++ ipsp->ips_inext = NULL; ++ ipsec_sa_put(ipsp); ++ ips1p->ips_onext = NULL; ++ } ++ } ++ ++ spin_unlock_bh(&tdb_lock); ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_X_GRPSA, ++ satype, ++ 0, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_seq, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ extr->ips->ips_replaywin, ++ extr->ips->ips_state, ++ extr->ips->ips_authalg, ++ extr->ips->ips_encalg, ++ extr->ips->ips_flags, ++ extr->ips->ips_ref), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_d), ++ extensions_reply) ++ && (extr->ips2 ++ ? (pfkey_safe_build(error = pfkey_x_satype_build(&extensions_reply[SADB_X_EXT_SATYPE2], ++ ((struct sadb_x_satype*)extensions[SADB_X_EXT_SATYPE2])->sadb_x_satype_satype ++ /* proto2satype(extr->ips2->ips_said.proto) */), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_X_EXT_SA2], ++ SADB_X_EXT_SA2, ++ extr->ips2->ips_said.spi, ++ extr->ips2->ips_replaywin, ++ extr->ips2->ips_state, ++ extr->ips2->ips_authalg, ++ extr->ips2->ips_encalg, ++ extr->ips2->ips_flags, ++ extr->ips2->ips_ref), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_DST2], ++ SADB_X_EXT_ADDRESS_DST2, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips2->ips_addr_d), ++ extensions_reply) ) : 1 ) ++ )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: " ++ "failed to build the x_grpsa reply message extensions\n"); ++ SENDERR(-error); ++ } ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: " ++ "failed to build the x_grpsa reply message\n"); ++ SENDERR(-error); ++ } ++ ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: " ++ "sending up x_grpsa reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: " ++ "sending up x_grpsa reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: " ++ "succeeded in sending x_grpsa reply message.\n"); ++ ++ errlab: ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_addflow_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++#ifdef CONFIG_KLIPS_DEBUG ++ char buf1[64], buf2[64]; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ ip_address srcflow, dstflow, srcmask, dstmask; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: .\n"); ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ memset((caddr_t)&srcflow, 0, sizeof(srcflow)); ++ memset((caddr_t)&dstflow, 0, sizeof(dstflow)); ++ memset((caddr_t)&srcmask, 0, sizeof(srcmask)); ++ memset((caddr_t)&dstmask, 0, sizeof(dstmask)); ++ ++ if(!extr || !(extr->ips) || !(extr->eroute)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "missing extr, ipsec_sa or eroute data.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ srcflow.u.v4.sin_family = AF_INET; ++ dstflow.u.v4.sin_family = AF_INET; ++ srcmask.u.v4.sin_family = AF_INET; ++ dstmask.u.v4.sin_family = AF_INET; ++ srcflow.u.v4.sin_addr = extr->eroute->er_eaddr.sen_ip_src; ++ dstflow.u.v4.sin_addr = extr->eroute->er_eaddr.sen_ip_dst; ++ srcmask.u.v4.sin_addr = extr->eroute->er_emask.sen_ip_src; ++ dstmask.u.v4.sin_addr = extr->eroute->er_emask.sen_ip_dst; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (debug_pfkey) { ++ subnettoa(extr->eroute->er_eaddr.sen_ip_src, ++ extr->eroute->er_emask.sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(extr->eroute->er_eaddr.sen_ip_dst, ++ extr->eroute->er_emask.sen_ip_dst, 0, buf2, sizeof(buf2)); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "calling breakeroute and/or makeroute for %s->%s\n", ++ buf1, buf2); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++ if(extr->ips->ips_flags & SADB_X_SAFLAGS_INFLOW) { ++ struct ipsec_sa *ipsp, *ipsq; ++ char sa[SATOT_BUF]; ++ size_t sa_len; ++ ++ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said)); ++ if(ipsq == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "ipsec_sa not found, cannot set incoming policy.\n"); ++ SENDERR(ENOENT); ++ } ++ ++ ipsp = ipsq; ++ while(ipsp && ipsp->ips_said.proto != IPPROTO_IPIP) { ++ ipsp = ipsp->ips_inext; ++ } ++ ++ if(ipsp == NULL) { ++ ipsec_sa_put(ipsq); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "SA chain does not have an IPIP SA, cannot set incoming policy.\n"); ++ SENDERR(ENOENT); ++ } ++ ++ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa)); ++ ++ ipsp->ips_flags |= SADB_X_SAFLAGS_INFLOW; ++ ipsp->ips_flow_s = srcflow; ++ ipsp->ips_flow_d = dstflow; ++ ipsp->ips_mask_s = srcmask; ++ ipsp->ips_mask_d = dstmask; ++ ++ ipsec_sa_put(ipsq); ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "inbound eroute, setting incoming policy information in IPIP ipsec_sa for SA: %s.\n", ++ sa_len ? sa : " (error)"); ++ } else { ++ struct sk_buff *first = NULL, *last = NULL; ++ ++ if(extr->ips->ips_flags & SADB_X_SAFLAGS_REPLACEFLOW) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "REPLACEFLOW flag set, calling breakeroute.\n"); ++ if ((error = ipsec_breakroute(&(extr->eroute->er_eaddr), ++ &(extr->eroute->er_emask), ++ &first, &last))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "breakeroute returned %d. first=0p%p, last=0p%p\n", ++ error, ++ first, ++ last); ++ if(first != NULL) { ++ ipsec_kfree_skb(first); ++ } ++ if(last != NULL) { ++ ipsec_kfree_skb(last); ++ } ++ SENDERR(-error); ++ } ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "calling makeroute.\n"); ++ ++ if ((error = ipsec_makeroute(&(extr->eroute->er_eaddr), ++ &(extr->eroute->er_emask), ++ extr->ips->ips_said, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid, ++ NULL, ++ &(extr->ips->ips_ident_s), ++ &(extr->ips->ips_ident_d)))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "makeroute returned %d.\n", error); ++ SENDERR(-error); ++ } ++ if(first != NULL) { ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "first=0p%p HOLD packet re-injected.\n", ++ first); ++ DEV_QUEUE_XMIT(first, first->dev, SOPRI_NORMAL); ++ } ++ if(last != NULL) { ++ KLIPS_PRINT(debug_eroute, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "last=0p%p HOLD packet re-injected.\n", ++ last); ++ DEV_QUEUE_XMIT(last, last->dev, SOPRI_NORMAL); ++ } ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "makeroute call successful.\n"); ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_X_ADDFLOW, ++ satype, ++ 0, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_seq, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ extr->ips->ips_replaywin, ++ extr->ips->ips_state, ++ extr->ips->ips_authalg, ++ extr->ips->ips_encalg, ++ extr->ips->ips_flags, ++ extr->ips->ips_ref), ++ extensions_reply) ++ && (extensions[SADB_EXT_ADDRESS_SRC] ++ ? pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_s), ++ extensions_reply) : 1) ++ && (extensions[SADB_EXT_ADDRESS_DST] ++ ? pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ extr->ips->ips_addr_d), ++ extensions_reply) : 1) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_SRC_FLOW], ++ SADB_X_EXT_ADDRESS_SRC_FLOW, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ (struct sockaddr*)&srcflow), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_DST_FLOW], ++ SADB_X_EXT_ADDRESS_DST_FLOW, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ (struct sockaddr*)&dstflow), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_SRC_MASK], ++ SADB_X_EXT_ADDRESS_SRC_MASK, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ (struct sockaddr*)&srcmask), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_DST_MASK], ++ SADB_X_EXT_ADDRESS_DST_MASK, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ (struct sockaddr*)&dstmask), ++ extensions_reply) ++ )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_addflow_parse: " ++ "failed to build the x_addflow reply message extensions\n"); ++ SENDERR(-error); ++ } ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_addflow_parse: " ++ "failed to build the x_addflow reply message\n"); ++ SENDERR(-error); ++ } ++ ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_addflow_parse: " ++ "sending up x_addflow reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_addflow_parse: " ++ "sending up x_addflow reply message for satype=%d(%s) (proto=%d) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ extr->ips->ips_said.proto, ++ pfkey_socketsp->socketp); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_addflow_parse: " ++ "extr->ips cleaned up and freed.\n"); ++ ++ errlab: ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_delflow_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++#ifdef CONFIG_KLIPS_DEBUG ++ char buf1[64], buf2[64]; ++#endif /* CONFIG_KLIPS_DEBUG */ ++ struct sadb_ext *extensions_reply[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_reply = NULL; ++ struct socket_list *pfkey_socketsp; ++ uint8_t satype = ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype; ++ ip_address srcflow, dstflow, srcmask, dstmask; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_delflow_parse: .\n"); ++ ++ pfkey_extensions_init(extensions_reply); ++ ++ memset((caddr_t)&srcflow, 0, sizeof(srcflow)); ++ memset((caddr_t)&dstflow, 0, sizeof(dstflow)); ++ memset((caddr_t)&srcmask, 0, sizeof(srcmask)); ++ memset((caddr_t)&dstmask, 0, sizeof(dstmask)); ++ ++ if(!extr || !(extr->ips)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_delflow_parse: " ++ "extr, or extr->ips is NULL, fatal\n"); ++ SENDERR(EINVAL); ++ } ++ ++ if(extr->ips->ips_flags & SADB_X_SAFLAGS_CLEARFLOW) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_delflow_parse: " ++ "CLEARFLOW flag set, calling cleareroutes.\n"); ++ if ((error = ipsec_cleareroutes())) ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_delflow_parse: " ++ "cleareroutes returned %d.\n", error); ++ SENDERR(-error); ++ } else { ++ struct sk_buff *first = NULL, *last = NULL; ++ ++ if(!(extr->eroute)) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_delflow_parse: " ++ "extr->eroute is NULL, fatal.\n"); ++ SENDERR(EINVAL); ++ } ++ ++ srcflow.u.v4.sin_family = AF_INET; ++ dstflow.u.v4.sin_family = AF_INET; ++ srcmask.u.v4.sin_family = AF_INET; ++ dstmask.u.v4.sin_family = AF_INET; ++ srcflow.u.v4.sin_addr = extr->eroute->er_eaddr.sen_ip_src; ++ dstflow.u.v4.sin_addr = extr->eroute->er_eaddr.sen_ip_dst; ++ srcmask.u.v4.sin_addr = extr->eroute->er_emask.sen_ip_src; ++ dstmask.u.v4.sin_addr = extr->eroute->er_emask.sen_ip_dst; ++ ++#ifdef CONFIG_KLIPS_DEBUG ++ if (debug_pfkey) { ++ subnettoa(extr->eroute->er_eaddr.sen_ip_src, ++ extr->eroute->er_emask.sen_ip_src, 0, buf1, sizeof(buf1)); ++ subnettoa(extr->eroute->er_eaddr.sen_ip_dst, ++ extr->eroute->er_emask.sen_ip_dst, 0, buf2, sizeof(buf2)); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_delflow_parse: " ++ "calling breakeroute for %s->%s\n", ++ buf1, buf2); ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ error = ipsec_breakroute(&(extr->eroute->er_eaddr), ++ &(extr->eroute->er_emask), ++ &first, &last); ++ if(error) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_delflow_parse: " ++ "breakeroute returned %d. first=0p%p, last=0p%p\n", ++ error, ++ first, ++ last); ++ } ++ if(first != NULL) { ++ ipsec_kfree_skb(first); ++ } ++ if(last != NULL) { ++ ipsec_kfree_skb(last); ++ } ++ if(error) { ++ SENDERR(-error); ++ } ++ } ++ ++ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0], ++ SADB_X_DELFLOW, ++ satype, ++ 0, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_seq, ++ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_pid), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions_reply[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ extr->ips->ips_replaywin, ++ extr->ips->ips_state, ++ extr->ips->ips_authalg, ++ extr->ips->ips_encalg, ++ extr->ips->ips_flags, ++ extr->ips->ips_ref), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_SRC_FLOW], ++ SADB_X_EXT_ADDRESS_SRC_FLOW, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ (struct sockaddr*)&srcflow), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_DST_FLOW], ++ SADB_X_EXT_ADDRESS_DST_FLOW, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ (struct sockaddr*)&dstflow), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_SRC_MASK], ++ SADB_X_EXT_ADDRESS_SRC_MASK, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ (struct sockaddr*)&srcmask), ++ extensions_reply) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[SADB_X_EXT_ADDRESS_DST_MASK], ++ SADB_X_EXT_ADDRESS_DST_MASK, ++ 0, /*extr->ips->ips_said.proto,*/ ++ 0, ++ (struct sockaddr*)&dstmask), ++ extensions_reply) ++ )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_delflow_parse: " ++ "failed to build the x_delflow reply message extensions\n"); ++ SENDERR(-error); ++ } ++ ++ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_delflow_parse: " ++ "failed to build the x_delflow reply message\n"); ++ SENDERR(-error); ++ } ++ ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_delflow_parse: " ++ "sending up x_delflow reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_delflow_parse: " ++ "sending up x_delflow reply message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_delflow_parse: " ++ "extr->ips cleaned up and freed.\n"); ++ ++ errlab: ++ if (pfkey_reply) { ++ pfkey_msg_free(&pfkey_reply); ++ } ++ pfkey_extensions_free(extensions_reply); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_msg_debug_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ int error = 0; ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_x_msg_debug_parse: .\n"); ++ ++/* errlab:*/ ++ return error; ++} ++ ++/* pfkey_expire expects the ipsec_sa table to be locked before being called. */ ++int ++pfkey_expire(struct ipsec_sa *ipsp, int hard) ++{ ++ struct sadb_ext *extensions[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_msg = NULL; ++ struct socket_list *pfkey_socketsp; ++ int error = 0; ++ uint8_t satype; ++ ++ pfkey_extensions_init(extensions); ++ ++ if(!(satype = proto2satype(ipsp->ips_said.proto))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_expire: " ++ "satype lookup for protocol %d lookup failed.\n", ++ ipsp->ips_said.proto); ++ SENDERR(EINVAL); ++ } ++ ++ if(!pfkey_open_sockets) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: " ++ "no sockets listening.\n"); ++ SENDERR(EPROTONOSUPPORT); ++ } ++ ++ if (!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions[0], ++ SADB_EXPIRE, ++ satype, ++ 0, ++ ++pfkey_msg_seq, ++ 0), ++ extensions) ++ && pfkey_safe_build(error = pfkey_sa_ref_build(&extensions[SADB_EXT_SA], ++ SADB_EXT_SA, ++ ipsp->ips_said.spi, ++ ipsp->ips_replaywin, ++ ipsp->ips_state, ++ ipsp->ips_authalg, ++ ipsp->ips_encalg, ++ ipsp->ips_flags, ++ ipsp->ips_ref), ++ extensions) ++ && pfkey_safe_build(error = pfkey_lifetime_build(&extensions[SADB_EXT_LIFETIME_CURRENT], ++ SADB_EXT_LIFETIME_CURRENT, ++ ipsp->ips_life.ipl_allocations.ipl_count, ++ ipsp->ips_life.ipl_bytes.ipl_count, ++ ipsp->ips_life.ipl_addtime.ipl_count, ++ ipsp->ips_life.ipl_usetime.ipl_count, ++ ipsp->ips_life.ipl_packets.ipl_count), ++ extensions) ++ && (hard ? ++ pfkey_safe_build(error = pfkey_lifetime_build(&extensions[SADB_EXT_LIFETIME_HARD], ++ SADB_EXT_LIFETIME_HARD, ++ ipsp->ips_life.ipl_allocations.ipl_hard, ++ ipsp->ips_life.ipl_bytes.ipl_hard, ++ ipsp->ips_life.ipl_addtime.ipl_hard, ++ ipsp->ips_life.ipl_usetime.ipl_hard, ++ ipsp->ips_life.ipl_packets.ipl_hard), ++ extensions) ++ : pfkey_safe_build(error = pfkey_lifetime_build(&extensions[SADB_EXT_LIFETIME_SOFT], ++ SADB_EXT_LIFETIME_SOFT, ++ ipsp->ips_life.ipl_allocations.ipl_soft, ++ ipsp->ips_life.ipl_bytes.ipl_soft, ++ ipsp->ips_life.ipl_addtime.ipl_soft, ++ ipsp->ips_life.ipl_usetime.ipl_soft, ++ ipsp->ips_life.ipl_packets.ipl_soft), ++ extensions)) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ 0, /* ipsp->ips_said.proto, */ ++ 0, ++ ipsp->ips_addr_s), ++ extensions) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ 0, /* ipsp->ips_said.proto, */ ++ 0, ++ ipsp->ips_addr_d), ++ extensions))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: " ++ "failed to build the expire message extensions\n"); ++ spin_unlock(&tdb_lock); ++ goto errlab; ++ } ++ ++ if ((error = pfkey_msg_build(&pfkey_msg, extensions, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: " ++ "failed to build the expire message\n"); ++ SENDERR(-error); ++ } ++ ++ for(pfkey_socketsp = pfkey_open_sockets; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_msg))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: " ++ "sending up expire message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: " ++ "sending up expire message for satype=%d(%s) (proto=%d) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ ipsp->ips_said.proto, ++ pfkey_socketsp->socketp); ++ } ++ ++ errlab: ++ if (pfkey_msg) { ++ pfkey_msg_free(&pfkey_msg); ++ } ++ pfkey_extensions_free(extensions); ++ return error; ++} ++ ++int ++pfkey_acquire(struct ipsec_sa *ipsp) ++{ ++ struct sadb_ext *extensions[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_msg = NULL; ++ struct socket_list *pfkey_socketsp; ++ int error = 0; ++ struct sadb_comb comb[] = { ++ /* auth; encrypt; flags; */ ++ /* auth_minbits; auth_maxbits; encrypt_minbits; encrypt_maxbits; */ ++ /* reserved; soft_allocations; hard_allocations; soft_bytes; hard_bytes; */ ++ /* soft_addtime; hard_addtime; soft_usetime; hard_usetime; */ ++ /* soft_packets; hard_packets; */ ++ { SADB_AALG_MD5HMAC, SADB_EALG_3DESCBC, SADB_SAFLAGS_PFS, ++ 128, 128, 168, 168, ++ 0, 0, 0, 0, 0, ++ 57600, 86400, 57600, 86400, ++ 0, 0 }, ++ { SADB_AALG_SHA1HMAC, SADB_EALG_3DESCBC, SADB_SAFLAGS_PFS, ++ 160, 160, 168, 168, ++ 0, 0, 0, 0, 0, ++ 57600, 86400, 57600, 86400, ++ 0, 0 } ++ }; ++ ++ /* XXX This should not be hard-coded. It should be taken from the spdb */ ++ uint8_t satype = SADB_SATYPE_ESP; ++ ++ pfkey_extensions_init(extensions); ++ ++ if((satype == 0) || (satype > SADB_SATYPE_MAX)) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire: " ++ "SAtype=%d unspecified or unknown.\n", ++ satype); ++ SENDERR(EINVAL); ++ } ++ ++ if(!(pfkey_registered_sockets[satype])) { ++ KLIPS_PRINT(1|debug_pfkey, "klips_debug:pfkey_acquire: " ++ "no sockets registered for SAtype=%d(%s).\n", ++ satype, ++ satype2name(satype)); ++ SENDERR(EPROTONOSUPPORT); ++ } ++ ++ if (!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions[0], ++ SADB_ACQUIRE, ++ satype, ++ 0, ++ ++pfkey_msg_seq, ++ 0), ++ extensions) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ ipsp->ips_transport_protocol, ++ 0, ++ ipsp->ips_addr_s), ++ extensions) ++ && pfkey_safe_build(error = pfkey_address_build(&extensions[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ ipsp->ips_transport_protocol, ++ 0, ++ ipsp->ips_addr_d), ++ extensions) ++#if 0 ++ && (ipsp->ips_addr_p ++ ? pfkey_safe_build(error = pfkey_address_build(&extensions[SADB_EXT_ADDRESS_PROXY], ++ SADB_EXT_ADDRESS_PROXY, ++ ipsp->ips_transport_protocol, ++ 0, ++ ipsp->ips_addr_p), ++ extensions) : 1) ++#endif ++ && (ipsp->ips_ident_s.type != SADB_IDENTTYPE_RESERVED ++ ? pfkey_safe_build(error = pfkey_ident_build(&extensions[SADB_EXT_IDENTITY_SRC], ++ SADB_EXT_IDENTITY_SRC, ++ ipsp->ips_ident_s.type, ++ ipsp->ips_ident_s.id, ++ ipsp->ips_ident_s.len, ++ ipsp->ips_ident_s.data), ++ extensions) : 1) ++ ++ && (ipsp->ips_ident_d.type != SADB_IDENTTYPE_RESERVED ++ ? pfkey_safe_build(error = pfkey_ident_build(&extensions[SADB_EXT_IDENTITY_DST], ++ SADB_EXT_IDENTITY_DST, ++ ipsp->ips_ident_d.type, ++ ipsp->ips_ident_d.id, ++ ipsp->ips_ident_d.len, ++ ipsp->ips_ident_d.data), ++ extensions) : 1) ++#if 0 ++ /* FIXME: This won't work yet because I have not finished ++ it. */ ++ && (ipsp->ips_sens_ ++ ? pfkey_safe_build(error = pfkey_sens_build(&extensions[SADB_EXT_SENSITIVITY], ++ ipsp->ips_sens_dpd, ++ ipsp->ips_sens_sens_level, ++ ipsp->ips_sens_sens_len, ++ ipsp->ips_sens_sens_bitmap, ++ ipsp->ips_sens_integ_level, ++ ipsp->ips_sens_integ_len, ++ ipsp->ips_sens_integ_bitmap), ++ extensions) : 1) ++#endif ++ && pfkey_safe_build(error = pfkey_prop_build(&extensions[SADB_EXT_PROPOSAL], ++ 64, /* replay */ ++ sizeof(comb)/sizeof(struct sadb_comb), ++ &(comb[0])), ++ extensions) ++ )) { ++ KLIPS_PRINT(1|debug_pfkey, "klips_debug:pfkey_acquire: " ++ "failed to build the acquire message extensions\n"); ++ SENDERR(-error); ++ } ++ ++ if ((error = pfkey_msg_build(&pfkey_msg, extensions, EXT_BITS_OUT))) { ++ KLIPS_PRINT(1|debug_pfkey, "klips_debug:pfkey_acquire: " ++ "failed to build the acquire message\n"); ++ SENDERR(-error); ++ } ++ ++#ifdef KLIPS_PFKEY_ACQUIRE_LOSSAGE ++# if KLIPS_PFKEY_ACQUIRE_LOSSAGE > 0 ++ if(sysctl_ipsec_regress_pfkey_lossage) { ++ return(0); ++ } ++# endif ++#endif ++ ++ /* this should go to all registered sockets for that satype only */ ++ for(pfkey_socketsp = pfkey_registered_sockets[satype]; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_msg))) { ++ KLIPS_PRINT(1|debug_pfkey, "klips_debug:pfkey_acquire: " ++ "sending up acquire message for satype=%d(%s) to socket=0p%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire: " ++ "sending up acquire message for satype=%d(%s) to socket=0p%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ errlab: ++ if (pfkey_msg) { ++ pfkey_msg_free(&pfkey_msg); ++ } ++ pfkey_extensions_free(extensions); ++ return error; ++} ++ ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++int ++pfkey_nat_t_new_mapping(struct ipsec_sa *ipsp, struct sockaddr *ipaddr, ++ __u16 sport) ++{ ++ struct sadb_ext *extensions[SADB_EXT_MAX+1]; ++ struct sadb_msg *pfkey_msg = NULL; ++ struct socket_list *pfkey_socketsp; ++ int error = 0; ++ uint8_t satype = (ipsp->ips_said.proto==IPPROTO_ESP) ? SADB_SATYPE_ESP : 0; ++ ++ /* Construct SADB_X_NAT_T_NEW_MAPPING message */ ++ ++ pfkey_extensions_init(extensions); ++ ++ if((satype == 0) || (satype > SADB_SATYPE_MAX)) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: " ++ "SAtype=%d unspecified or unknown.\n", ++ satype); ++ SENDERR(EINVAL); ++ } ++ ++ if(!(pfkey_registered_sockets[satype])) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: " ++ "no sockets registered for SAtype=%d(%s).\n", ++ satype, ++ satype2name(satype)); ++ SENDERR(EPROTONOSUPPORT); ++ } ++ ++ if (!(pfkey_safe_build ++ (error = pfkey_msg_hdr_build(&extensions[0], SADB_X_NAT_T_NEW_MAPPING, ++ satype, 0, ++pfkey_msg_seq, 0), extensions) ++ /* SA */ ++ && pfkey_safe_build ++ (error = pfkey_sa_build(&extensions[SADB_EXT_SA], ++ SADB_EXT_SA, ipsp->ips_said.spi, 0, 0, 0, 0, 0), extensions) ++ /* ADDRESS_SRC = old addr */ ++ && pfkey_safe_build ++ (error = pfkey_address_build(&extensions[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ipsp->ips_said.proto, 0, ipsp->ips_addr_s), ++ extensions) ++ /* NAT_T_SPORT = old port */ ++ && pfkey_safe_build ++ (error = pfkey_x_nat_t_port_build(&extensions[SADB_X_EXT_NAT_T_SPORT], ++ SADB_X_EXT_NAT_T_SPORT, ipsp->ips_natt_sport), extensions) ++ /* ADDRESS_DST = new addr */ ++ && pfkey_safe_build ++ (error = pfkey_address_build(&extensions[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ipsp->ips_said.proto, 0, ipaddr), extensions) ++ /* NAT_T_DPORT = new port */ ++ && pfkey_safe_build ++ (error = pfkey_x_nat_t_port_build(&extensions[SADB_X_EXT_NAT_T_DPORT], ++ SADB_X_EXT_NAT_T_DPORT, sport), extensions) ++ )) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: " ++ "failed to build the nat_t_new_mapping message extensions\n"); ++ SENDERR(-error); ++ } ++ ++ if ((error = pfkey_msg_build(&pfkey_msg, extensions, EXT_BITS_OUT))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: " ++ "failed to build the nat_t_new_mapping message\n"); ++ SENDERR(-error); ++ } ++ ++ /* this should go to all registered sockets for that satype only */ ++ for(pfkey_socketsp = pfkey_registered_sockets[satype]; ++ pfkey_socketsp; ++ pfkey_socketsp = pfkey_socketsp->next) { ++ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_msg))) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: " ++ "sending up nat_t_new_mapping message for satype=%d(%s) to socket=%p failed with error=%d.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp, ++ error); ++ SENDERR(-error); ++ } ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: " ++ "sending up nat_t_new_mapping message for satype=%d(%s) to socket=%p succeeded.\n", ++ satype, ++ satype2name(satype), ++ pfkey_socketsp->socketp); ++ } ++ ++ errlab: ++ if (pfkey_msg) { ++ pfkey_msg_free(&pfkey_msg); ++ } ++ pfkey_extensions_free(extensions); ++ return error; ++} ++ ++DEBUG_NO_STATIC int ++pfkey_x_nat_t_new_mapping_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr) ++{ ++ /* SADB_X_NAT_T_NEW_MAPPING not used in kernel */ ++ return -EINVAL; ++} ++#endif ++ ++DEBUG_NO_STATIC int (*ext_processors[SADB_EXT_MAX+1])(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) = ++{ ++ NULL, /* pfkey_msg_process, */ ++ pfkey_sa_process, ++ pfkey_lifetime_process, ++ pfkey_lifetime_process, ++ pfkey_lifetime_process, ++ pfkey_address_process, ++ pfkey_address_process, ++ pfkey_address_process, ++ pfkey_key_process, ++ pfkey_key_process, ++ pfkey_ident_process, ++ pfkey_ident_process, ++ pfkey_sens_process, ++ pfkey_prop_process, ++ pfkey_supported_process, ++ pfkey_supported_process, ++ pfkey_spirange_process, ++ pfkey_x_kmprivate_process, ++ pfkey_x_satype_process, ++ pfkey_sa_process, ++ pfkey_address_process, ++ pfkey_address_process, ++ pfkey_address_process, ++ pfkey_address_process, ++ pfkey_address_process, ++ pfkey_x_debug_process, ++ pfkey_x_protocol_process ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ , ++ pfkey_x_nat_t_type_process, ++ pfkey_x_nat_t_port_process, ++ pfkey_x_nat_t_port_process, ++ pfkey_address_process ++#endif ++}; ++ ++ ++DEBUG_NO_STATIC int (*msg_parsers[SADB_MAX +1])(struct sock *sk, struct sadb_ext *extensions[], struct pfkey_extracted_data* extr) ++ = ++{ ++ NULL, /* RESERVED */ ++ pfkey_getspi_parse, ++ pfkey_update_parse, ++ pfkey_add_parse, ++ pfkey_delete_parse, ++ pfkey_get_parse, ++ pfkey_acquire_parse, ++ pfkey_register_parse, ++ pfkey_expire_parse, ++ pfkey_flush_parse, ++ pfkey_dump_parse, ++ pfkey_x_promisc_parse, ++ pfkey_x_pchange_parse, ++ pfkey_x_grpsa_parse, ++ pfkey_x_addflow_parse, ++ pfkey_x_delflow_parse, ++ pfkey_x_msg_debug_parse ++#ifdef CONFIG_IPSEC_NAT_TRAVERSAL ++ , pfkey_x_nat_t_new_mapping_parse ++#endif ++}; ++ ++int ++pfkey_build_reply(struct sadb_msg *pfkey_msg, ++ struct pfkey_extracted_data *extr, ++ struct sadb_msg **pfkey_reply) ++{ ++ struct sadb_ext *extensions[SADB_EXT_MAX+1]; ++ int error = 0; ++ int msg_type = pfkey_msg->sadb_msg_type; ++ int seq = pfkey_msg->sadb_msg_seq; ++ ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_build_reply: " ++ "building reply with type: %d\n", ++ msg_type); ++ pfkey_extensions_init(extensions); ++ if (!extr || !extr->ips) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_build_reply: " ++ "bad ipsec_sa passed\n"); ++ return EINVAL; ++ } ++ error = pfkey_safe_build(pfkey_msg_hdr_build(&extensions[0], ++ msg_type, ++ proto2satype(extr->ips->ips_said.proto), ++ 0, ++ seq, ++ pfkey_msg->sadb_msg_pid), ++ extensions) && ++ (!(extensions_bitmaps[EXT_BITS_OUT][EXT_BITS_REQ][msg_type] & ++ 1 << SADB_EXT_SA) ++ || pfkey_safe_build(pfkey_sa_ref_build(&extensions[SADB_EXT_SA], ++ SADB_EXT_SA, ++ extr->ips->ips_said.spi, ++ extr->ips->ips_replaywin, ++ extr->ips->ips_state, ++ extr->ips->ips_authalg, ++ extr->ips->ips_encalg, ++ extr->ips->ips_flags, ++ extr->ips->ips_ref), ++ extensions)) && ++ (!(extensions_bitmaps[EXT_BITS_OUT][EXT_BITS_REQ][msg_type] & ++ 1 << SADB_EXT_LIFETIME_CURRENT) ++ || pfkey_safe_build(pfkey_lifetime_build(&extensions ++ [SADB_EXT_LIFETIME_CURRENT], ++ SADB_EXT_LIFETIME_CURRENT, ++ extr->ips->ips_life.ipl_allocations.ipl_count, ++ extr->ips->ips_life.ipl_bytes.ipl_count, ++ extr->ips->ips_life.ipl_addtime.ipl_count, ++ extr->ips->ips_life.ipl_usetime.ipl_count, ++ extr->ips->ips_life.ipl_packets.ipl_count), ++ extensions)) && ++ (!(extensions_bitmaps[EXT_BITS_OUT][EXT_BITS_REQ][msg_type] & ++ 1 << SADB_EXT_ADDRESS_SRC) ++ || pfkey_safe_build(pfkey_address_build(&extensions[SADB_EXT_ADDRESS_SRC], ++ SADB_EXT_ADDRESS_SRC, ++ extr->ips->ips_said.proto, ++ 0, ++ extr->ips->ips_addr_s), ++ extensions)) && ++ (!(extensions_bitmaps[EXT_BITS_OUT][EXT_BITS_REQ][msg_type] & ++ 1 << SADB_EXT_ADDRESS_DST) ++ || pfkey_safe_build(pfkey_address_build(&extensions[SADB_EXT_ADDRESS_DST], ++ SADB_EXT_ADDRESS_DST, ++ extr->ips->ips_said.proto, ++ 0, ++ extr->ips->ips_addr_d), ++ extensions)); ++ ++ if (error == 0) { ++ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_build_reply: " ++ "building extensions failed\n"); ++ return EINVAL; ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_build_reply: " ++ "built extensions, proceed to build the message\n"); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_build_reply: " ++ "extensions[1]=0p%p\n", ++ extensions[1]); ++ error = pfkey_msg_build(pfkey_reply, extensions, EXT_BITS_OUT); ++ pfkey_extensions_free(extensions); ++ ++ return error; ++} ++ ++int ++pfkey_msg_interp(struct sock *sk, struct sadb_msg *pfkey_msg, ++ struct sadb_msg **pfkey_reply) ++{ ++ int error = 0; ++ int i; ++ struct sadb_ext *extensions[SADB_EXT_MAX+1]; ++ struct pfkey_extracted_data extr = {NULL, NULL, NULL}; ++ ++ pfkey_extensions_init(extensions); ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "parsing message ver=%d, type=%d, errno=%d, satype=%d(%s), len=%d, res=%d, seq=%d, pid=%d.\n", ++ pfkey_msg->sadb_msg_version, ++ pfkey_msg->sadb_msg_type, ++ pfkey_msg->sadb_msg_errno, ++ pfkey_msg->sadb_msg_satype, ++ satype2name(pfkey_msg->sadb_msg_satype), ++ pfkey_msg->sadb_msg_len, ++ pfkey_msg->sadb_msg_reserved, ++ pfkey_msg->sadb_msg_seq, ++ pfkey_msg->sadb_msg_pid); ++ ++ extr.ips = ipsec_sa_alloc(&error); /* pass in error var by pointer */ ++ if(extr.ips == NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "memory allocation error.\n"); ++ SENDERR(-error); ++ } ++ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "allocated extr->ips=0p%p.\n", ++ extr.ips); ++ ++ if(pfkey_msg->sadb_msg_satype > SADB_SATYPE_MAX) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "satype %d > max %d\n", ++ pfkey_msg->sadb_msg_satype, ++ SADB_SATYPE_MAX); ++ SENDERR(EINVAL); ++ } ++ ++ switch(pfkey_msg->sadb_msg_type) { ++ case SADB_GETSPI: ++ case SADB_UPDATE: ++ case SADB_ADD: ++ case SADB_DELETE: ++ case SADB_X_GRPSA: ++ case SADB_X_ADDFLOW: ++ if(!(extr.ips->ips_said.proto = satype2proto(pfkey_msg->sadb_msg_satype))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "satype %d lookup failed.\n", ++ pfkey_msg->sadb_msg_satype); ++ SENDERR(EINVAL); ++ } else { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "satype %d lookups to proto=%d.\n", ++ pfkey_msg->sadb_msg_satype, ++ extr.ips->ips_said.proto); ++ } ++ break; ++ default: ++ break; ++ } ++ ++ /* The NULL below causes the default extension parsers to be used */ ++ /* Parse the extensions */ ++ if((error = pfkey_msg_parse(pfkey_msg, NULL, extensions, EXT_BITS_IN))) ++ { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "message parsing failed with error %d.\n", ++ error); ++ SENDERR(-error); ++ } ++ ++ /* Process the extensions */ ++ for(i=1; i <= SADB_EXT_MAX;i++) { ++ if(extensions[i] != NULL) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "processing ext %d 0p%p with processor 0p%p.\n", ++ i, extensions[i], ext_processors[i]); ++ if((error = ext_processors[i](extensions[i], &extr))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "extension processing for type %d failed with error %d.\n", ++ i, ++ error); ++ SENDERR(-error); ++ } ++ ++ } ++ ++ } ++ ++ /* Parse the message types */ ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "parsing message type %d(%s) with msg_parser 0p%p.\n", ++ pfkey_msg->sadb_msg_type, ++ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type), ++ msg_parsers[pfkey_msg->sadb_msg_type]); ++ if((error = msg_parsers[pfkey_msg->sadb_msg_type](sk, extensions, &extr))) { ++ KLIPS_PRINT(debug_pfkey, ++ "klips_debug:pfkey_msg_interp: " ++ "message parsing failed with error %d.\n", ++ error); ++ SENDERR(-error); ++ } ++ ++#if 0 ++ error = pfkey_build_reply(pfkey_msg, &extr, pfkey_reply); ++ if (error) { ++ *pfkey_reply = NULL; ++ } ++#endif ++ errlab: ++ if(extr.ips != NULL) { ++ ipsec_sa_wipe(extr.ips); ++ } ++ if(extr.ips2 != NULL) { ++ ipsec_sa_wipe(extr.ips2); ++ } ++ if (extr.eroute != NULL) { ++ kfree(extr.eroute); ++ } ++ return(error); ++} ++ ++/* ++ * $Log: pfkey_v2_parser.c,v $ ++ * Revision 1.134.2.4 2007-10-30 21:40:36 paul ++ * Fix for KLIPS_PFKEY_ACQUIRE_LOSSAGE [dhr] ++ * ++ * Revision 1.134.2.3 2007/09/05 02:56:10 paul ++ * Use the new ipsec_kversion macros by David to deal with 2.6.22 kernels. ++ * Fixes based on David McCullough patch. ++ * ++ * Revision 1.134.2.2 2006/10/06 21:39:26 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.134.2.1 2006/05/01 14:37:25 mcr ++ * ip_chk_addr -> inet_addr_type for more direct 2.4/2.6 support. ++ * ++ * Revision 1.134 2005/05/11 01:48:20 mcr ++ * removed "poor-man"s OOP in favour of proper C structures. ++ * ++ * Revision 1.133 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.132 2005/04/14 20:56:24 mcr ++ * moved (pfkey_)ipsec_sa_init to ipsec_sa.c. ++ * ++ * Revision 1.131 2005/01/26 00:50:35 mcr ++ * adjustment of confusion of CONFIG_IPSEC_NAT vs CONFIG_KLIPS_NAT, ++ * and make sure that NAT_TRAVERSAL is set as well to match ++ * userspace compiles of code. ++ * ++ * Revision 1.130 2004/09/08 17:21:36 ken ++ * Rename MD5* -> osMD5 functions to prevent clashes with other symbols exported by kernel modules (CIFS in 2.6 initiated this) ++ * ++ * Revision 1.129 2004/09/06 18:36:30 mcr ++ * if a protocol can not be found, then log it. This is not ++ * debugging. ++ * ++ * Revision 1.128 2004/08/21 00:45:19 mcr ++ * CONFIG_KLIPS_NAT was wrong, also need to include udp.h. ++ * ++ * Revision 1.127 2004/08/20 21:45:45 mcr ++ * CONFIG_KLIPS_NAT_TRAVERSAL is not used in an attempt to ++ * be 26sec compatible. But, some defines where changed. ++ * ++ * Revision 1.126 2004/08/17 03:27:23 mcr ++ * klips 2.6 edits. ++ * ++ * Revision 1.125 2004/08/04 15:57:07 mcr ++ * moved des .h files to include/des/ * ++ * included 2.6 protocol specific things ++ * started at NAT-T support, but it will require a kernel patch. ++ * ++ * Revision 1.124 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.123 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.122.2.2 2004/04/05 04:30:46 mcr ++ * patches for alg-branch to compile/work with 2.x openswan ++ * ++ * Revision 1.122.2.1 2003/12/22 15:25:52 jjo ++ * . Merged algo-0.8.1-rc11-test1 into alg-branch ++ * ++ * Revision 1.122 2003/12/10 01:14:27 mcr ++ * NAT-traversal patches to KLIPS. ++ * ++ * Revision 1.121 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.120.4.2 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.120.4.1 2003/09/21 13:59:56 mcr ++ * pre-liminary X.509 patch - does not yet pass tests. ++ * ++ * Revision 1.120 2003/04/03 17:38:09 rgb ++ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}. ++ * ++ * Revision 1.119 2003/02/06 01:52:37 rgb ++ * Removed no longer relevant comment ++ * ++ * Revision 1.118 2003/01/30 02:32:44 rgb ++ * ++ * Transmit error code through to caller from callee for better diagnosis of problems. ++ * ++ * Revision 1.117 2003/01/16 18:48:13 rgb ++ * ++ * Fixed sign bug in error return from an sa allocation call in ++ * pfkey_msg_interp. ++ * ++ * Revision 1.116 2002/10/17 16:38:01 rgb ++ * Change pfkey_alloc_eroute() to never static since its consumers ++ * have been moved outside the file. ++ * ++ * Revision 1.115 2002/10/12 23:11:53 dhr ++ * ++ * [KenB + DHR] more 64-bit cleanup ++ * ++ * Revision 1.114 2002/10/05 05:02:58 dhr ++ * ++ * C labels go on statements ++ * ++ * Revision 1.113 2002/09/30 19:11:22 rgb ++ * Turn on debugging for upgoing acquire messages to test for reliability. ++ * ++ * Revision 1.112 2002/09/20 15:41:16 rgb ++ * Switch from pfkey_alloc_ipsec_sa() to ipsec_sa_alloc(). ++ * Added sadb_x_sa_ref to struct sadb_sa. ++ * Added ref parameter to pfkey_sa_build(). ++ * ++ * Revision 1.111 2002/09/20 05:02:08 rgb ++ * Added memory allocation debugging. ++ * Convert to switch to divulge hmac keys for debugging. ++ * Added text labels to elucidate numeric values presented. ++ * ++ * Revision 1.110 2002/08/03 18:03:05 mcr ++ * loop that checks for SPI's to have been already linked ++ * fails to actually step to next pointer, but continuously ++ * resets to head of list. Wrong pointer used. ++ * test east-icmp-02 revealed this. ++ * ++ * Revision 1.109 2002/07/26 08:48:31 rgb ++ * Added SA ref table code. ++ * ++ * Revision 1.108 2002/05/27 18:55:03 rgb ++ * Remove final vistiges of tdb references via IPSEC_KLIPS1_COMPAT. ++ * ++ * Revision 1.107 2002/05/23 07:16:08 rgb ++ * Added ipsec_sa_put() for releasing an ipsec_sa refcount. ++ * Pointer clean-up. ++ * Added refcount code. ++ * ++ * Revision 1.106 2002/05/14 02:34:13 rgb ++ * Converted reference from ipsec_sa_put to ipsec_sa_add to avoid confusion ++ * with "put" usage in the kernel. ++ * Change all references to tdb, TDB or Tunnel Descriptor Block to ips, ++ * ipsec_sa or ipsec_sa. ++ * Moved all the extension parsing functions to pfkey_v2_ext_process.c. ++ * ++ * Revision 1.105 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.104 2002/04/24 07:36:34 mcr ++ * Moved from ./klips/net/ipsec/pfkey_v2_parser.c,v ++ * ++ * Revision 1.103 2002/04/20 00:12:25 rgb ++ * Added esp IV CBC attack fix, disabled. ++ * ++ * Revision 1.102 2002/03/08 01:15:17 mcr ++ * put some internal structure only debug messages behind ++ * && sysctl_ipsec_debug_verbose. ++ * ++ * Revision 1.101 2002/01/29 17:17:57 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.100 2002/01/29 04:00:54 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.99 2002/01/29 02:13:19 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.98 2002/01/12 02:57:57 mcr ++ * first regression test causes acquire messages to be lost ++ * 100% of the time. This is to help testing of pluto. ++ * ++ * Revision 1.97 2001/11/26 09:23:52 rgb ++ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes. ++ * ++ * Revision 1.93.2.4 2001/10/23 04:20:27 mcr ++ * parity was forced on wrong structure! prototypes help here. ++ * ++ * Revision 1.93.2.3 2001/10/22 21:14:59 mcr ++ * include des.h, removed phony prototypes and fixed calling ++ * conventions to match real prototypes. ++ * ++ * Revision 1.93.2.2 2001/10/15 05:39:03 mcr ++ * %08lx is not the right format for u32. Use %08x. 64-bit safe? ha. ++ * ++ * Revision 1.93.2.1 2001/09/25 02:30:14 mcr ++ * struct tdb -> struct ipsec_sa. ++ * use new lifetime structure. common format routines for debug. ++ * ++ * Revision 1.96 2001/11/06 20:47:54 rgb ++ * Fixed user context call to ipsec_dev_start_xmit() bug. Call ++ * dev_queue_xmit() instead. ++ * ++ * Revision 1.95 2001/11/06 19:47:46 rgb ++ * Added packet parameter to lifetime and comb structures. ++ * ++ * Revision 1.94 2001/10/18 04:45:23 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/freeswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.93 2001/09/20 15:32:59 rgb ++ * Min/max cleanup. ++ * ++ * Revision 1.92 2001/09/19 16:35:48 rgb ++ * PF_KEY ident fix for getspi from NetCelo (puttdb duplication). ++ * ++ * Revision 1.91 2001/09/15 16:24:06 rgb ++ * Re-inject first and last HOLD packet when an eroute REPLACE is done. ++ * ++ * Revision 1.90 2001/09/14 16:58:38 rgb ++ * Added support for storing the first and last packets through a HOLD. ++ * ++ * Revision 1.89 2001/09/08 21:14:07 rgb ++ * Added pfkey ident extension support for ISAKMPd. (NetCelo) ++ * Better state coherency (error management) between pf_key and IKE daemon. ++ * (NetCelo) ++ * ++ * Revision 1.88 2001/08/27 19:42:44 rgb ++ * Fix memory leak of encrypt and auth structs in pfkey register. ++ * ++ * Revision 1.87 2001/07/06 19:50:46 rgb ++ * Removed unused debugging code. ++ * Added inbound policy checking code for IPIP SAs. ++ * ++ * Revision 1.86 2001/06/20 06:26:04 rgb ++ * Changed missing SA errors from EEXIST to ENOENT and added debug output ++ * for already linked SAs. ++ * ++ * Revision 1.85 2001/06/15 04:57:02 rgb ++ * Remove single error return condition check and check for all errors in ++ * the case of a replace eroute delete operation. This means that ++ * applications must expect to be deleting something before replacing it ++ * and if nothing is found, complain. ++ * ++ * Revision 1.84 2001/06/14 19:35:12 rgb ++ * Update copyright date. ++ * ++ * Revision 1.83 2001/06/12 00:03:19 rgb ++ * Silence debug set/unset under normal conditions. ++ * ++ * Revision 1.82 2001/05/30 08:14:04 rgb ++ * Removed vestiges of esp-null transforms. ++ * ++ * Revision 1.81 2001/05/27 06:12:12 rgb ++ * Added structures for pid, packet count and last access time to eroute. ++ * Added packet count to beginning of /proc/net/ipsec_eroute. ++ * ++ * Revision 1.80 2001/05/03 19:43:59 rgb ++ * Check error return codes for all build function calls. ++ * Standardise on SENDERR() macro. ++ * ++ * Revision 1.79 2001/04/20 21:09:16 rgb ++ * Cleaned up fixed tdbwipes. ++ * Free pfkey_reply and clean up extensions_reply for grpsa, addflow and ++ * delflow (Per Cederqvist) plugging memleaks. ++ * ++ * Revision 1.78 2001/04/19 19:02:39 rgb ++ * Fixed extr.tdb freeing, stealing it for getspi, update and add. ++ * Refined a couple of spinlocks, fixed the one in update. ++ * ++ * Revision 1.77 2001/04/18 20:26:16 rgb ++ * Wipe/free eroute and both tdbs from extr at end of pfkey_msg_interp() ++ * instead of inside each message type parser. This fixes two memleaks. ++ * ++ * Revision 1.76 2001/04/17 23:51:18 rgb ++ * Quiet down pfkey_x_debug_process(). ++ * ++ * Revision 1.75 2001/03/29 01:55:05 rgb ++ * Fixed pfkey key init memleak. ++ * Fixed pfkey encryption key debug output. ++ * ++ * Revision 1.74 2001/03/27 05:29:14 rgb ++ * Debug output cleanup/silencing. ++ * ++ * Revision 1.73 2001/02/28 05:03:28 rgb ++ * Clean up and rationalise startup messages. ++ * ++ * Revision 1.72 2001/02/27 22:24:56 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.71 2001/02/27 06:59:30 rgb ++ * Added satype2name() conversions most places satype is debug printed. ++ * ++ * Revision 1.70 2001/02/26 22:37:08 rgb ++ * Fixed 'unknown proto' INT bug in new code. ++ * Added satype to protocol debugging instrumentation. ++ * ++ * Revision 1.69 2001/02/26 19:57:51 rgb ++ * Re-formatted debug output (split lines, consistent spacing). ++ * Fixed as yet undetected FLUSH bug which called ipsec_tdbcleanup() ++ * with an satype instead of proto. ++ * Checked for satype consistency and fixed minor bugs. ++ * Fixed undetected ungrpspi bug that tried to upmsg a second tdb. ++ * Check for satype sanity in pfkey_expire(). ++ * Added satype sanity check to addflow. ++ * ++ * Revision 1.68 2001/02/12 23:14:40 rgb ++ * Remove double spin lock in pfkey_expire(). ++ * ++ * Revision 1.67 2001/01/31 19:23:40 rgb ++ * Fixed double-unlock bug introduced by grpsa upmsg (found by Lars Heete). ++ * ++ * Revision 1.66 2001/01/29 22:20:04 rgb ++ * Fix minor add upmsg lifetime bug. ++ * ++ * Revision 1.65 2001/01/24 06:12:33 rgb ++ * Fixed address extension compile bugs just introduced. ++ * ++ * Revision 1.64 2001/01/24 00:31:15 rgb ++ * Added upmsg for addflow/delflow. ++ * ++ * Revision 1.63 2001/01/23 22:02:55 rgb ++ * Added upmsg to x_grpsa. ++ * Fixed lifetimes extentions to add/update/get upmsg. ++ * ++ * Revision 1.62 2000/11/30 21:47:51 rgb ++ * Fix error return bug after returning from pfkey_tdb_init(). ++ * ++ * Revision 1.61 2000/11/17 18:10:29 rgb ++ * Fixed bugs mostly relating to spirange, to treat all spi variables as ++ * network byte order since this is the way PF_KEYv2 stored spis. ++ * ++ * Revision 1.60 2000/11/06 04:34:53 rgb ++ * Changed non-exported functions to DEBUG_NO_STATIC. ++ * Add Svenning's adaptive content compression. ++ * Ditched spin_lock_irqsave in favour of spin_lock/_bh. ++ * Fixed double unlock bug (Svenning). ++ * Fixed pfkey_msg uninitialized bug in pfkey_{expire,acquire}(). ++ * Fixed incorrect extension type (prop) in pfkey)acquire(). ++ * ++ * Revision 1.59 2000/10/11 15:25:12 rgb ++ * Fixed IPCOMP disabled compile bug. ++ * ++ * Revision 1.58 2000/10/11 14:54:03 rgb ++ * Fixed pfkey_acquire() satype to SADB_SATYPE_ESP and removed pfkey ++ * protocol violations of setting pfkey_address_build() protocol parameter ++ * to non-zero except in the case of pfkey_acquire(). ++ * ++ * Revision 1.57 2000/10/10 20:10:18 rgb ++ * Added support for debug_ipcomp and debug_verbose to klipsdebug. ++ * ++ * Revision 1.56 2000/10/06 20:24:36 rgb ++ * Fixes to pfkey_acquire to initialize extensions[] and use correct ++ * ipproto. ++ * ++ * Revision 1.55 2000/10/03 03:20:57 rgb ++ * Added brackets to get a?b:c scope right for pfkey_register reply. ++ * ++ * Revision 1.54 2000/09/29 19:49:30 rgb ++ * As-yet-unused-bits cleanup. ++ * ++ * Revision 1.53 2000/09/28 00:35:45 rgb ++ * Padded SATYPE printout in pfkey_register for vertical alignment. ++ * ++ * Revision 1.52 2000/09/20 16:21:58 rgb ++ * Cleaned up ident string alloc/free. ++ * ++ * Revision 1.51 2000/09/20 04:04:20 rgb ++ * Changed static functions to DEBUG_NO_STATIC to reveal function names in ++ * oopsen. ++ * ++ * Revision 1.50 2000/09/16 01:10:53 rgb ++ * Fixed unused var warning with debug off. ++ * ++ * Revision 1.49 2000/09/15 11:37:02 rgb ++ * Merge in heavily modified Svenning Soerensen's ++ * IPCOMP zlib deflate code. ++ * ++ * Revision 1.48 2000/09/15 04:57:57 rgb ++ * Cleaned up existing IPCOMP code before svenning addition. ++ * Initialize pfkey_reply and extensions_reply in case of early error in ++ * message parsing functions (thanks Kai!). ++ * ++ * Revision 1.47 2000/09/13 08:02:56 rgb ++ * Added KMd registration notification. ++ * ++ * Revision 1.46 2000/09/12 22:35:36 rgb ++ * Restructured to remove unused extensions from CLEARFLOW messages. ++ * ++ * Revision 1.45 2000/09/12 03:24:23 rgb ++ * Converted #if0 debugs to sysctl. ++ * ++ * Revision 1.44 2000/09/09 06:38:39 rgb ++ * Correct SADB message type for update, add and delete. ++ * ++ * Revision 1.43 2000/09/08 19:19:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * Removed all references to CONFIG_IPSEC_PFKEYv2. ++ * Put in sanity checks in most msg type parsers to catch invalid satypes ++ * and empty socket lists. ++ * Moved spin-locks in pfkey_get_parse() to simplify. ++ * Added pfkey_acquire(). ++ * Added upwards messages to update, add, delete, acquire_parse, ++ * expire_parse and flush. ++ * Fix pfkey_prop_build() parameter to be only single indirection. ++ * Changed all replies to use pfkey_reply. ++ * Check return code on puttdb() and deltdbchain() in getspi, update, ++ * add, delete. ++ * Fixed up all pfkey replies to open and registered sockets. ++ * ++ * Revision 1.42 2000/09/01 18:50:26 rgb ++ * Added a supported algorithms array lists, one per satype and registered ++ * existing algorithms. ++ * Fixed pfkey_list_{insert,remove}_{socket,support}() to allow change to ++ * list. ++ * Only send pfkey_expire() messages to sockets registered for that satype. ++ * Added reply to pfkey_getspi_parse(). ++ * Added reply to pfkey_get_parse(). ++ * Fixed debug output label bug in pfkey_lifetime_process(). ++ * Cleaned up pfkey_sa_process a little. ++ * Moved pfkey_safe_build() above message type parsers to make it available ++ * for creating replies. ++ * Added comments for future work in pfkey_acquire_parse(). ++ * Fleshed out guts of pfkey_register_parse(). ++ * ++ * Revision 1.41 2000/08/24 16:58:11 rgb ++ * Fixed key debugging variables. ++ * Fixed error return code for a failed search. ++ * Changed order of pfkey_get operations. ++ * ++ * Revision 1.40 2000/08/21 16:32:27 rgb ++ * Re-formatted for cosmetic consistency and readability. ++ * ++ * Revision 1.39 2000/08/20 21:38:57 rgb ++ * Bugfixes to as-yet-unused pfkey_update_parse() and ++ * pfkey_register_parse(). (Momchil) ++ * Added functions pfkey_safe_build(), pfkey_expire() and ++ * pfkey_build_reply(). (Momchil) ++ * Added a pfkey_reply parameter to pfkey_msg_interp(). (Momchil) ++ * ++ * Revision 1.38 2000/08/18 21:30:41 rgb ++ * Purged all tdb_spi, tdb_proto and tdb_dst macros. They are unclear. ++ * ++ * Revision 1.37 2000/08/18 18:18:02 rgb ++ * Cosmetic and descriptive changes made to debug test. ++ * getspi and update fixes from Momchil. ++ * ++ * Revision 1.36 2000/08/15 15:41:55 rgb ++ * Fixed the (as yet unused and untested) pfkey_getspi() routine. ++ * ++ * Revision 1.35 2000/08/01 14:51:52 rgb ++ * Removed _all_ remaining traces of DES. ++ * ++ * Revision 1.34 2000/07/28 14:58:32 rgb ++ * Changed kfree_s to kfree, eliminating extra arg to fix 2.4.0-test5. ++ * ++ * Revision 1.33 2000/06/28 05:50:11 rgb ++ * Actually set iv_bits. ++ * ++ * Revision 1.32 2000/05/30 18:36:56 rgb ++ * Fix AH auth hash setup bug. This breaks interop with previous PF_KEY ++ * FreeS/WAN, but fixes interop with other implementations. ++ * ++ * Revision 1.31 2000/03/16 14:05:48 rgb ++ * Fixed brace scope preventing non-debug compile. ++ * Added null parameter check for pfkey_x_debug(). ++ * ++ * Revision 1.30 2000/01/22 23:21:13 rgb ++ * Use new function satype2proto(). ++ * ++ * Revision 1.29 2000/01/22 08:40:21 rgb ++ * Invert condition to known value to avoid AF_INET6 in 2.0.36. ++ * ++ * Revision 1.28 2000/01/22 07:58:57 rgb ++ * Fixed REPLACEFLOW bug, missing braces around KLIPS_PRINT *and* SENDERR. ++ * ++ * Revision 1.27 2000/01/22 03:48:01 rgb ++ * Added extr pointer component debugging. ++ * ++ * Revision 1.26 2000/01/21 09:41:25 rgb ++ * Changed a (void*) to (char*) cast to do proper pointer math. ++ * Don't call tdbwipe if tdb2 is NULL. ++ * ++ * Revision 1.25 2000/01/21 06:21:01 rgb ++ * Added address cases for eroute flows. ++ * Tidied up compiler directive indentation for readability. ++ * Added ictx,octx vars for simplification. ++ * Added macros for HMAC padding magic numbers. ++ * Converted from double tdb arguments to one structure (extr) ++ * containing pointers to all temporary information structures ++ * and checking for valid arguments to all ext processors and ++ * msg type parsers. ++ * Added spiungrp'ing. ++ * Added klipsdebug switching capability. ++ * Removed sa_process() check for zero protocol. ++ * Added address case for DST2 for grouping. ++ * Added/changed minor debugging instrumentation. ++ * Fixed spigrp for single said, ungrouping case. ++ * Added code to parse addflow and delflow messages. ++ * Removed redundant statements duplicating tdbwipe() functionality ++ * and causing double kfrees. ++ * Permit addflow to have a protocol of 0. ++ * ++ * Revision 1.24 1999/12/09 23:23:00 rgb ++ * Added check to pfkey_sa_process() to do eroutes. ++ * Converted to DIVUP() macro. ++ * Converted if() to switch() in pfkey_register_parse(). ++ * Use new pfkey_extensions_init() instead of memset(). ++ * ++ * Revision 1.23 1999/12/01 22:18:13 rgb ++ * Preset minspi and maxspi values in case and spirange extension is not ++ * included and check for the presence of an spirange extension before ++ * using it. Initialise tdb_sastate to LARVAL. ++ * Fixed debugging output typo. ++ * Fixed authentication context initialisation bugs (4 places). ++ * ++ * Revision 1.22 1999/11/27 11:53:08 rgb ++ * Moved pfkey_msg_parse prototype to pfkey.h ++ * Moved exts_permitted/required prototype to pfkey.h. ++ * Moved sadb_satype2proto protocol lookup table to lib/pfkey_v2_parse.c. ++ * Deleted SADB_X_EXT_SA2 code from pfkey_sa_process() since it will never ++ * be called. ++ * Moved protocol/algorithm checks to lib/pfkey_v2_parse.c ++ * Debugging error messages added. ++ * Enable lifetime_current checking. ++ * Remove illegal requirement for SA extension to be present in an ++ * originating GETSPI call. ++ * Re-instate requirement for UPDATE or ADD message to be MATURE. ++ * Add argument to pfkey_msg_parse() for direction. ++ * Fixed IPIP dst address bug and purged redundant, leaky code. ++ * ++ * Revision 1.21 1999/11/24 05:24:20 rgb ++ * hanged 'void*extensions' to 'struct sadb_ext*extensions'. ++ * Fixed indention. ++ * Ditched redundant replay check. ++ * Fixed debug message text from 'parse' to 'process'. ++ * Added more debug output. ++ * Forgot to zero extensions array causing bug, fixed. ++ * ++ * Revision 1.20 1999/11/23 23:08:13 rgb ++ * Move all common parsing code to lib/pfkey_v2_parse.c and rename ++ * remaining bits to *_process. (PJO) ++ * Add macros for dealing with alignment and rounding up more opaquely. ++ * Use provided macro ADDRTOA_BUF instead of hardcoded value. ++ * Sort out pfkey and freeswan headers, putting them in a library path. ++ * Corrected a couple of bugs in as-yet-inactive code. ++ * ++ * Revision 1.19 1999/11/20 22:01:10 rgb ++ * Add more descriptive error messages for non-zero reserved fields. ++ * Add more descriptive error message for spirange parsing. ++ * Start on supported extension parsing. ++ * Start on register and get message parsing. ++ * ++ * Revision 1.18 1999/11/18 04:09:20 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * ++ * Revision 1.17 1999/11/17 15:53:41 rgb ++ * Changed all occurrences of #include "../../../lib/freeswan.h" ++ * to #include which works due to -Ilibfreeswan in the ++ * klips/net/ipsec/Makefile. ++ * ++ * Revision 1.16 1999/10/26 16:57:43 rgb ++ * Add shorter macros for compiler directives to visually clean-up. ++ * Give ipv6 code meaningful compiler directive. ++ * Add comments to other #if 0 debug code. ++ * Remove unused *_bh_atomic() calls. ++ * Fix mis-placed spinlock. ++ * ++ * Revision 1.15 1999/10/16 18:27:10 rgb ++ * Clean-up unused cruft. ++ * Fix-up lifetime_allocations_c and lifetime_addtime_c initialisations. ++ * ++ * Revision 1.14 1999/10/08 18:37:34 rgb ++ * Fix end-of-line spacing to sate whining PHMs. ++ * ++ * Revision 1.13 1999/10/03 18:49:12 rgb ++ * Spinlock fixes for 2.0.xx and 2.3.xx. ++ * ++ * Revision 1.12 1999/10/01 15:44:54 rgb ++ * Move spinlock header include to 2.1> scope. ++ * ++ * Revision 1.11 1999/10/01 00:05:45 rgb ++ * Added tdb structure locking. ++ * Use 'jiffies' instead of do_get_timeofday(). ++ * Fix lifetime assignments. ++ * ++ * Revision 1.10 1999/09/21 15:24:45 rgb ++ * Rework spirange code to save entropy and prevent endless loops. ++ * ++ * Revision 1.9 1999/09/16 12:10:21 rgb ++ * Minor fixes to random spi selection for correctness and entropy conservation. ++ * ++ * Revision 1.8 1999/05/25 22:54:46 rgb ++ * Fix comparison that should be an assignment in an if. ++ * ++ * Revision 1.7 1999/05/09 03:25:37 rgb ++ * Fix bug introduced by 2.2 quick-and-dirty patch. ++ * ++ * Revision 1.6 1999/05/08 21:32:30 rgb ++ * Fix error return reporting. ++ * ++ * Revision 1.5 1999/05/05 22:02:33 rgb ++ * Add a quick and dirty port to 2.2 kernels by Marc Boucher . ++ * ++ * Revision 1.4 1999/04/29 15:22:40 rgb ++ * Standardise an error return method. ++ * Add debugging instrumentation. ++ * Add check for existence of macros min/max. ++ * Add extensions permitted/required in/out filters. ++ * Add satype-to-protocol table. ++ * Add a second tdb pointer to each parser to accomodate GRPSA. ++ * Move AH & no_algo_set to GETSPI, UPDATE and ADD. ++ * Add OOO window check. ++ * Add support for IPPROTO_IPIP and hooks for IPPROTO_COMP. ++ * Add timestamp to lifetime parse. ++ * Fix address structure length checking bug. ++ * Fix address structure allocation bug (forgot to kmalloc!). ++ * Add checks for extension lengths. ++ * Add checks for extension reserved illegal values. ++ * Add check for spirange legal values. ++ * Add an extension type for parsing a second satype, SA and ++ * DST_ADDRESS. ++ * Make changes to tdb_init() template to get pfkey_tdb_init(), ++ * eliminating any mention of xformsw. ++ * Implement getspi, update and grpsa (not tested). ++ * Add stubs for as yet unimplemented message types. ++ * Add table of message parsers to substitute for msg_parse switch. ++ * ++ * Revision 1.3 1999/04/15 17:58:07 rgb ++ * Add RCSID labels. ++ * ++ * Revision 1.2 1999/04/15 15:37:26 rgb ++ * Forward check changes from POST1_00 branch. ++ * ++ * Revision 1.1.2.1 1999/03/26 20:58:56 rgb ++ * Add pfkeyv2 support to KLIPS. ++ * ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/prng.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,201 @@ ++/* ++ * crypto-class pseudorandom number generator ++ * currently uses same algorithm as RC4(TM), from Schneier 2nd ed p397 ++ * Copyright (C) 2002 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: prng.c,v 1.7 2004-07-10 07:48:36 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++/* ++ - prng_init - initialize PRNG from a key ++ */ ++void ++prng_init(prng, key, keylen) ++struct prng *prng; ++const unsigned char *key; ++size_t keylen; ++{ ++ unsigned char k[256]; ++ int i, j; ++ unsigned const char *p; ++ unsigned const char *keyend = key + keylen; ++ unsigned char t; ++ ++ for (i = 0; i <= 255; i++) ++ prng->sbox[i] = i; ++ p = key; ++ for (i = 0; i <= 255; i++) { ++ k[i] = *p++; ++ if (p >= keyend) ++ p = key; ++ } ++ j = 0; ++ for (i = 0; i <= 255; i++) { ++ j = (j + prng->sbox[i] + k[i]) & 0xff; ++ t = prng->sbox[i]; ++ prng->sbox[i] = prng->sbox[j]; ++ prng->sbox[j] = t; ++ k[i] = 0; /* clear out key memory */ ++ } ++ prng->i = 0; ++ prng->j = 0; ++ prng->count = 0; ++} ++ ++/* ++ - prng_bytes - get some pseudorandom bytes from PRNG ++ */ ++void ++prng_bytes(prng, dst, dstlen) ++struct prng *prng; ++unsigned char *dst; ++size_t dstlen; ++{ ++ int i, j, t; ++ unsigned char *p = dst; ++ size_t remain = dstlen; ++# define MAX 4000000000ul ++ ++ while (remain > 0) { ++ i = (prng->i + 1) & 0xff; ++ prng->i = i; ++ j = (prng->j + prng->sbox[i]) & 0xff; ++ prng->j = j; ++ t = prng->sbox[i]; ++ prng->sbox[i] = prng->sbox[j]; ++ prng->sbox[j] = t; ++ t = (t + prng->sbox[i]) & 0xff; ++ *p++ = prng->sbox[t]; ++ remain--; ++ } ++ if (prng->count < MAX - dstlen) ++ prng->count += dstlen; ++ else ++ prng->count = MAX; ++} ++ ++/* ++ - prnt_count - how many bytes have been extracted from PRNG so far? ++ */ ++unsigned long ++prng_count(prng) ++struct prng *prng; ++{ ++ return prng->count; ++} ++ ++/* ++ - prng_final - clear out PRNG to ensure nothing left in memory ++ */ ++void ++prng_final(prng) ++struct prng *prng; ++{ ++ int i; ++ ++ for (i = 0; i <= 255; i++) ++ prng->sbox[i] = 0; ++ prng->i = 0; ++ prng->j = 0; ++ prng->count = 0; /* just for good measure */ ++} ++ ++ ++ ++#ifdef PRNG_MAIN ++ ++#include ++ ++void regress(); ++ ++int ++main(argc, argv) ++int argc; ++char *argv[]; ++{ ++ struct prng pr; ++ unsigned char buf[100]; ++ unsigned char *p; ++ size_t n; ++ ++ if (argc < 2) { ++ fprintf(stderr, "Usage: %s {key|-r}\n", argv[0]); ++ exit(2); ++ } ++ ++ if (strcmp(argv[1], "-r") == 0) { ++ regress(); ++ fprintf(stderr, "regress() returned?!?\n"); ++ exit(1); ++ } ++ ++ prng_init(&pr, argv[1], strlen(argv[1])); ++ prng_bytes(&pr, buf, 32); ++ printf("0x"); ++ for (p = buf, n = 32; n > 0; p++, n--) ++ printf("%02x", *p); ++ printf("\n%lu bytes\n", prng_count(&pr)); ++ prng_final(&pr); ++ exit(0); ++} ++ ++void ++regress() ++{ ++ struct prng pr; ++ unsigned char buf[100]; ++ unsigned char *p; ++ size_t n; ++ /* somewhat non-random sample key */ ++ unsigned char key[] = "here we go gathering nuts in May"; ++ /* first thirty bytes of output from that key */ ++ unsigned char good[] = "\x3f\x02\x8e\x4a\x2a\xea\x23\x18\x92\x7c" ++ "\x09\x52\x83\x61\xaa\x26\xce\xbb\x9d\x71" ++ "\x71\xe5\x10\x22\xaf\x60\x54\x8d\x5b\x28"; ++ int nzero, none; ++ int show = 0; ++ ++ prng_init(&pr, key, strlen(key)); ++ prng_bytes(&pr, buf, sizeof(buf)); ++ for (p = buf, n = sizeof(buf); n > 0; p++, n--) { ++ if (*p == 0) ++ nzero++; ++ if (*p == 255) ++ none++; ++ } ++ if (nzero > 3 || none > 3) { ++ fprintf(stderr, "suspiciously non-random output!\n"); ++ show = 1; ++ } ++ if (memcmp(buf, good, strlen(good)) != 0) { ++ fprintf(stderr, "incorrect output!\n"); ++ show = 1; ++ } ++ if (show) { ++ fprintf(stderr, "0x"); ++ for (p = buf, n = sizeof(buf); n > 0; p++, n--) ++ fprintf(stderr, "%02x", *p); ++ fprintf(stderr, "\n"); ++ exit(1); ++ } ++ if (prng_count(&pr) != sizeof(buf)) { ++ fprintf(stderr, "got %u bytes, but count is %lu\n", ++ sizeof(buf), prng_count(&pr)); ++ exit(1); ++ } ++ prng_final(&pr); ++ exit(0); ++} ++ ++#endif /* PRNG_MAIN */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/radij.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1237 @@ ++char radij_c_version[] = "RCSID $Id: radij.c,v 1.48.2.1 2006-10-06 21:39:27 paul Exp $"; ++ ++/* ++ * This file is defived from ${SRC}/sys/net/radix.c of BSD 4.4lite ++ * ++ * Variable and procedure names have been modified so that they don't ++ * conflict with the original BSD code, as a small number of modifications ++ * have been introduced and we may want to reuse this code in BSD. ++ * ++ * The `j' in `radij' is pronounced as a voiceless guttural (like a Greek ++ * chi or a German ch sound (as `doch', not as in `milch'), or even a ++ * spanish j as in Juan. It is not as far back in the throat like ++ * the corresponding Hebrew sound, nor is it a soft breath like the English h. ++ * It has nothing to do with the Dutch ij sound. ++ * ++ * Here is the appropriate copyright notice: ++ */ ++ ++/* ++ * Copyright (c) 1988, 1989, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * This product includes software developed by the University of ++ * California, Berkeley and its contributors. ++ * 4. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)radix.c 8.2 (Berkeley) 1/4/94 ++ */ ++ ++/* ++ * Routines to build and maintain radix trees for routing lookups. ++ */ ++ ++#ifndef AUTOCONF_INCLUDED ++#include ++#endif ++#include ++#include /* printk() */ ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef MALLOC_SLAB ++# include /* kmalloc() */ ++#else /* MALLOC_SLAB */ ++# include /* kmalloc() */ ++#endif /* MALLOC_SLAB */ ++#include /* error codes */ ++#include /* size_t */ ++#include /* mark_bh */ ++ ++#include /* struct device, and other headers */ ++#include /* eth_type_trans */ ++#include /* struct iphdr */ ++#include ++#ifdef NET_21 ++# include ++#endif /* NET_21 */ ++ ++#include ++ ++#include ++ ++#include "openswan/radij.h" ++#include "openswan/ipsec_encap.h" ++#include "openswan/ipsec_radij.h" ++ ++int maj_keylen; ++struct radij_mask *rj_mkfreelist; ++struct radij_node_head *mask_rjhead; ++static int gotOddMasks; ++static char *maskedKey; ++static char *rj_zeroes, *rj_ones; ++ ++#define rj_masktop (mask_rjhead->rnh_treetop) ++#ifdef Bcmp ++# undef Bcmp ++#endif /* Bcmp */ ++#define Bcmp(a, b, l) (l == 0 ? 0 : memcmp((caddr_t)(b), (caddr_t)(a), (size_t)l)) ++/* ++ * The data structure for the keys is a radix tree with one way ++ * branching removed. The index rj_b at an internal node n represents a bit ++ * position to be tested. The tree is arranged so that all descendants ++ * of a node n have keys whose bits all agree up to position rj_b - 1. ++ * (We say the index of n is rj_b.) ++ * ++ * There is at least one descendant which has a one bit at position rj_b, ++ * and at least one with a zero there. ++ * ++ * A route is determined by a pair of key and mask. We require that the ++ * bit-wise logical and of the key and mask to be the key. ++ * We define the index of a route to associated with the mask to be ++ * the first bit number in the mask where 0 occurs (with bit number 0 ++ * representing the highest order bit). ++ * ++ * We say a mask is normal if every bit is 0, past the index of the mask. ++ * If a node n has a descendant (k, m) with index(m) == index(n) == rj_b, ++ * and m is a normal mask, then the route applies to every descendant of n. ++ * If the index(m) < rj_b, this implies the trailing last few bits of k ++ * before bit b are all 0, (and hence consequently true of every descendant ++ * of n), so the route applies to all descendants of the node as well. ++ * ++ * The present version of the code makes no use of normal routes, ++ * but similar logic shows that a non-normal mask m such that ++ * index(m) <= index(n) could potentially apply to many children of n. ++ * Thus, for each non-host route, we attach its mask to a list at an internal ++ * node as high in the tree as we can go. ++ */ ++ ++struct radij_node * ++rj_search(v_arg, head) ++ void *v_arg; ++ struct radij_node *head; ++{ ++ register struct radij_node *x; ++ register caddr_t v; ++ ++ for (x = head, v = v_arg; x->rj_b >= 0;) { ++ if (x->rj_bmask & v[x->rj_off]) ++ x = x->rj_r; ++ else ++ x = x->rj_l; ++ } ++ return (x); ++}; ++ ++struct radij_node * ++rj_search_m(v_arg, head, m_arg) ++ struct radij_node *head; ++ void *v_arg, *m_arg; ++{ ++ register struct radij_node *x; ++ register caddr_t v = v_arg, m = m_arg; ++ ++ for (x = head; x->rj_b >= 0;) { ++ if ((x->rj_bmask & m[x->rj_off]) && ++ (x->rj_bmask & v[x->rj_off])) ++ x = x->rj_r; ++ else ++ x = x->rj_l; ++ } ++ return x; ++}; ++ ++int ++rj_refines(m_arg, n_arg) ++ void *m_arg, *n_arg; ++{ ++ register caddr_t m = m_arg, n = n_arg; ++ register caddr_t lim, lim2 = lim = n + *(u_char *)n; ++ int longer = (*(u_char *)n++) - (int)(*(u_char *)m++); ++ int masks_are_equal = 1; ++ ++ if (longer > 0) ++ lim -= longer; ++ while (n < lim) { ++ if (*n & ~(*m)) ++ return 0; ++ if (*n++ != *m++) ++ masks_are_equal = 0; ++ ++ } ++ while (n < lim2) ++ if (*n++) ++ return 0; ++ if (masks_are_equal && (longer < 0)) ++ for (lim2 = m - longer; m < lim2; ) ++ if (*m++) ++ return 1; ++ return (!masks_are_equal); ++} ++ ++ ++struct radij_node * ++rj_match(v_arg, head) ++ void *v_arg; ++ struct radij_node_head *head; ++{ ++ caddr_t v = v_arg; ++ register struct radij_node *t = head->rnh_treetop, *x; ++ register caddr_t cp = v, cp2, cp3; ++ caddr_t cplim, mstart; ++ struct radij_node *saved_t, *top = t; ++ int off = t->rj_off, vlen = *(u_char *)cp, matched_off; ++ ++ /* ++ * Open code rj_search(v, top) to avoid overhead of extra ++ * subroutine call. ++ */ ++ for (; t->rj_b >= 0; ) { ++ if (t->rj_bmask & cp[t->rj_off]) ++ t = t->rj_r; ++ else ++ t = t->rj_l; ++ } ++ /* ++ * See if we match exactly as a host destination ++ */ ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:rj_match: " ++ "* See if we match exactly as a host destination\n"); ++ ++ cp += off; cp2 = t->rj_key + off; cplim = v + vlen; ++ for (; cp < cplim; cp++, cp2++) ++ if (*cp != *cp2) ++ goto on1; ++ /* ++ * This extra grot is in case we are explicitly asked ++ * to look up the default. Ugh! ++ */ ++ if ((t->rj_flags & RJF_ROOT) && t->rj_dupedkey) ++ t = t->rj_dupedkey; ++ return t; ++on1: ++ matched_off = cp - v; ++ saved_t = t; ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:rj_match: " ++ "** try to match a leaf, t=0p%p\n", t); ++ do { ++ if (t->rj_mask) { ++ /* ++ * Even if we don't match exactly as a hosts; ++ * we may match if the leaf we wound up at is ++ * a route to a net. ++ */ ++ cp3 = matched_off + t->rj_mask; ++ cp2 = matched_off + t->rj_key; ++ for (; cp < cplim; cp++) ++ if ((*cp2++ ^ *cp) & *cp3++) ++ break; ++ if (cp == cplim) ++ return t; ++ cp = matched_off + v; ++ } ++ } while ((t = t->rj_dupedkey)); ++ t = saved_t; ++ /* start searching up the tree */ ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:rj_match: " ++ "*** start searching up the tree, t=0p%p\n", ++ t); ++ do { ++ register struct radij_mask *m; ++ ++ t = t->rj_p; ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:rj_match: " ++ "**** t=0p%p\n", ++ t); ++ if ((m = t->rj_mklist)) { ++ /* ++ * After doing measurements here, it may ++ * turn out to be faster to open code ++ * rj_search_m here instead of always ++ * copying and masking. ++ */ ++ /* off = min(t->rj_off, matched_off); */ ++ off = t->rj_off; ++ if (matched_off < off) ++ off = matched_off; ++ mstart = maskedKey + off; ++ do { ++ cp2 = mstart; ++ cp3 = m->rm_mask + off; ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:rj_match: " ++ "***** cp2=0p%p cp3=0p%p\n", ++ cp2, cp3); ++ for (cp = v + off; cp < cplim;) ++ *cp2++ = *cp++ & *cp3++; ++ x = rj_search(maskedKey, t); ++ while (x && x->rj_mask != m->rm_mask) ++ x = x->rj_dupedkey; ++ if (x && ++ (Bcmp(mstart, x->rj_key + off, ++ vlen - off) == 0)) ++ return x; ++ } while ((m = m->rm_mklist)); ++ } ++ } while (t != top); ++ KLIPS_PRINT(debug_radij, ++ "klips_debug:rj_match: " ++ "***** not found.\n"); ++ return 0; ++}; ++ ++#ifdef RJ_DEBUG ++int rj_nodenum; ++struct radij_node *rj_clist; ++int rj_saveinfo; ++DEBUG_NO_STATIC void traverse(struct radij_node *); ++#ifdef RJ_DEBUG2 ++int rj_debug = 1; ++#else ++int rj_debug = 0; ++#endif /* RJ_DEBUG2 */ ++#endif /* RJ_DEBUG */ ++ ++struct radij_node * ++rj_newpair(v, b, nodes) ++ void *v; ++ int b; ++ struct radij_node nodes[2]; ++{ ++ register struct radij_node *tt = nodes, *t = tt + 1; ++ t->rj_b = b; t->rj_bmask = 0x80 >> (b & 7); ++ t->rj_l = tt; t->rj_off = b >> 3; ++ tt->rj_b = -1; tt->rj_key = (caddr_t)v; tt->rj_p = t; ++ tt->rj_flags = t->rj_flags = RJF_ACTIVE; ++#ifdef RJ_DEBUG ++ tt->rj_info = rj_nodenum++; t->rj_info = rj_nodenum++; ++ tt->rj_twin = t; tt->rj_ybro = rj_clist; rj_clist = tt; ++#endif /* RJ_DEBUG */ ++ return t; ++} ++ ++struct radij_node * ++rj_insert(v_arg, head, dupentry, nodes) ++ void *v_arg; ++ struct radij_node_head *head; ++ int *dupentry; ++ struct radij_node nodes[2]; ++{ ++ caddr_t v = v_arg; ++ struct radij_node *top = head->rnh_treetop; ++ int head_off = top->rj_off, vlen = (int)*((u_char *)v); ++ register struct radij_node *t = rj_search(v_arg, top); ++ register caddr_t cp = v + head_off; ++ register int b; ++ struct radij_node *tt; ++ /* ++ *find first bit at which v and t->rj_key differ ++ */ ++ { ++ register caddr_t cp2 = t->rj_key + head_off; ++ register int cmp_res; ++ caddr_t cplim = v + vlen; ++ ++ while (cp < cplim) ++ if (*cp2++ != *cp++) ++ goto on1; ++ *dupentry = 1; ++ return t; ++on1: ++ *dupentry = 0; ++ cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; ++ for (b = (cp - v) << 3; cmp_res; b--) ++ cmp_res >>= 1; ++ } ++ { ++ register struct radij_node *p, *x = top; ++ cp = v; ++ do { ++ p = x; ++ if (cp[x->rj_off] & x->rj_bmask) ++ x = x->rj_r; ++ else x = x->rj_l; ++ } while (b > (unsigned) x->rj_b); /* x->rj_b < b && x->rj_b >= 0 */ ++#ifdef RJ_DEBUG ++ if (rj_debug) ++ printk("klips_debug:rj_insert: Going In:\n"), traverse(p); ++#endif /* RJ_DEBUG */ ++ t = rj_newpair(v_arg, b, nodes); tt = t->rj_l; ++ if ((cp[p->rj_off] & p->rj_bmask) == 0) ++ p->rj_l = t; ++ else ++ p->rj_r = t; ++ x->rj_p = t; t->rj_p = p; /* frees x, p as temp vars below */ ++ if ((cp[t->rj_off] & t->rj_bmask) == 0) { ++ t->rj_r = x; ++ } else { ++ t->rj_r = tt; t->rj_l = x; ++ } ++#ifdef RJ_DEBUG ++ if (rj_debug) ++ printk("klips_debug:rj_insert: Coming out:\n"), traverse(p); ++#endif /* RJ_DEBUG */ ++ } ++ return (tt); ++} ++ ++struct radij_node * ++rj_addmask(n_arg, search, skip) ++ int search, skip; ++ void *n_arg; ++{ ++ caddr_t netmask = (caddr_t)n_arg; ++ register struct radij_node *x; ++ register caddr_t cp, cplim; ++ register int b, mlen, j; ++ int maskduplicated; ++ ++ mlen = *(u_char *)netmask; ++ if (search) { ++ x = rj_search(netmask, rj_masktop); ++ mlen = *(u_char *)netmask; ++ if (Bcmp(netmask, x->rj_key, mlen) == 0) ++ return (x); ++ } ++ R_Malloc(x, struct radij_node *, maj_keylen + 2 * sizeof (*x)); ++ if (x == 0) ++ return (0); ++ Bzero(x, maj_keylen + 2 * sizeof (*x)); ++ cp = (caddr_t)(x + 2); ++ Bcopy(netmask, cp, mlen); ++ netmask = cp; ++ x = rj_insert(netmask, mask_rjhead, &maskduplicated, x); ++ /* ++ * Calculate index of mask. ++ */ ++ cplim = netmask + mlen; ++ for (cp = netmask + skip; cp < cplim; cp++) ++ if (*(u_char *)cp != 0xff) ++ break; ++ b = (cp - netmask) << 3; ++ if (cp != cplim) { ++ if (*cp != 0) { ++ gotOddMasks = 1; ++ for (j = 0x80; j; b++, j >>= 1) ++ if ((j & *cp) == 0) ++ break; ++ } ++ } ++ x->rj_b = -1 - b; ++ return (x); ++} ++ ++#if 0 ++struct radij_node * ++#endif ++int ++rj_addroute(v_arg, n_arg, head, treenodes) ++ void *v_arg, *n_arg; ++ struct radij_node_head *head; ++ struct radij_node treenodes[2]; ++{ ++ caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; ++ register struct radij_node *t, *x=NULL, *tt; ++ struct radij_node *saved_tt, *top = head->rnh_treetop; ++ short b = 0, b_leaf; ++ int mlen, keyduplicated; ++ caddr_t cplim; ++ struct radij_mask *m, **mp; ++ ++ /* ++ * In dealing with non-contiguous masks, there may be ++ * many different routes which have the same mask. ++ * We will find it useful to have a unique pointer to ++ * the mask to speed avoiding duplicate references at ++ * nodes and possibly save time in calculating indices. ++ */ ++ if (netmask) { ++ x = rj_search(netmask, rj_masktop); ++ mlen = *(u_char *)netmask; ++ if (Bcmp(netmask, x->rj_key, mlen) != 0) { ++ x = rj_addmask(netmask, 0, top->rj_off); ++ if (x == 0) ++ return -ENOMEM; /* (0) rgb */ ++ } ++ netmask = x->rj_key; ++ b = -1 - x->rj_b; ++ } ++ /* ++ * Deal with duplicated keys: attach node to previous instance ++ */ ++ saved_tt = tt = rj_insert(v, head, &keyduplicated, treenodes); ++#ifdef RJ_DEBUG ++ printk("addkey: duplicated: %d\n", keyduplicated); ++#endif ++ if (keyduplicated) { ++ do { ++ if (tt->rj_mask == netmask) ++ return -EEXIST; /* -ENXIO; (0) rgb */ ++ t = tt; ++ if (netmask == 0 || ++ (tt->rj_mask && rj_refines(netmask, tt->rj_mask))) ++ break; ++ } while ((tt = tt->rj_dupedkey)); ++ /* ++ * If the mask is not duplicated, we wouldn't ++ * find it among possible duplicate key entries ++ * anyway, so the above test doesn't hurt. ++ * ++ * We sort the masks for a duplicated key the same way as ++ * in a masklist -- most specific to least specific. ++ * This may require the unfortunate nuisance of relocating ++ * the head of the list. ++ */ ++ if (tt && t == saved_tt) { ++ struct radij_node *xx = x; ++ /* link in at head of list */ ++ (tt = treenodes)->rj_dupedkey = t; ++ tt->rj_flags = t->rj_flags; ++ tt->rj_p = x = t->rj_p; ++ if (x->rj_l == t) x->rj_l = tt; else x->rj_r = tt; ++ saved_tt = tt; x = xx; ++ } else { ++ (tt = treenodes)->rj_dupedkey = t->rj_dupedkey; ++ t->rj_dupedkey = tt; ++ } ++#ifdef RJ_DEBUG ++ t=tt+1; tt->rj_info = rj_nodenum++; t->rj_info = rj_nodenum++; ++ tt->rj_twin = t; tt->rj_ybro = rj_clist; rj_clist = tt; ++#endif /* RJ_DEBUG */ ++ t = saved_tt; ++ tt->rj_key = (caddr_t) v; ++ tt->rj_b = -1; ++ tt->rj_flags = t->rj_flags & ~RJF_ROOT; ++ } ++ /* ++ * Put mask in tree. ++ */ ++ if (netmask) { ++ tt->rj_mask = netmask; ++ tt->rj_b = x->rj_b; ++ } ++ t = saved_tt->rj_p; ++ b_leaf = -1 - t->rj_b; ++ if (t->rj_r == saved_tt) x = t->rj_l; else x = t->rj_r; ++ /* Promote general routes from below */ ++ if (x->rj_b < 0) { ++ if (x->rj_mask && (x->rj_b >= b_leaf) && x->rj_mklist == 0) { ++ MKGet(m); ++ if (m) { ++ Bzero(m, sizeof *m); ++ m->rm_b = x->rj_b; ++ m->rm_mask = x->rj_mask; ++ x->rj_mklist = t->rj_mklist = m; ++ } ++ } ++ } else if (x->rj_mklist) { ++ /* ++ * Skip over masks whose index is > that of new node ++ */ ++ for (mp = &x->rj_mklist; (m = *mp); mp = &m->rm_mklist) ++ if (m->rm_b >= b_leaf) ++ break; ++ t->rj_mklist = m; *mp = 0; ++ } ++ /* Add new route to highest possible ancestor's list */ ++ if ((netmask == 0) || (b > t->rj_b )) { ++#ifdef RJ_DEBUG ++ printk("klips:radij.c: netmask = %p or b(%d)>t->rjb(%d)\n", netmask, b, t->rj_b); ++#endif ++ return 0; /* tt rgb */ /* can't lift at all */ ++ } ++ b_leaf = tt->rj_b; ++ do { ++ x = t; ++ t = t->rj_p; ++ } while (b <= t->rj_b && x != top); ++ /* ++ * Search through routes associated with node to ++ * insert new route according to index. ++ * For nodes of equal index, place more specific ++ * masks first. ++ */ ++ cplim = netmask + mlen; ++ for (mp = &x->rj_mklist; (m = *mp); mp = &m->rm_mklist) { ++ if (m->rm_b < b_leaf) ++ continue; ++ if (m->rm_b > b_leaf) ++ break; ++ if (m->rm_mask == netmask) { ++ m->rm_refs++; ++ tt->rj_mklist = m; ++#ifdef RJ_DEBUG ++ printk("klips:radij.c: m->rm_mask %p == netmask\n", netmask); ++#endif ++ return 0; /* tt rgb */ ++ } ++ if (rj_refines(netmask, m->rm_mask)) ++ break; ++ } ++ MKGet(m); ++ if (m == 0) { ++ printk("klips_debug:rj_addroute: " ++ "Mask for route not entered\n"); ++ return 0; /* (tt) rgb */ ++ } ++ Bzero(m, sizeof *m); ++ m->rm_b = b_leaf; ++ m->rm_mask = netmask; ++ m->rm_mklist = *mp; ++ *mp = m; ++ tt->rj_mklist = m; ++#ifdef RJ_DEBUG ++ printk("klips:radij.c: addroute done\n"); ++#endif ++ return 0; /* tt rgb */ ++} ++ ++int ++rj_delete(v_arg, netmask_arg, head, node) ++ void *v_arg, *netmask_arg; ++ struct radij_node_head *head; ++ struct radij_node **node; ++{ ++ register struct radij_node *t, *p, *x, *tt; ++ struct radij_mask *m, *saved_m, **mp; ++ struct radij_node *dupedkey, *saved_tt, *top; ++ caddr_t v, netmask; ++ int b, head_off, vlen; ++ ++ v = v_arg; ++ netmask = netmask_arg; ++ x = head->rnh_treetop; ++ tt = rj_search(v, x); ++ head_off = x->rj_off; ++ vlen = *(u_char *)v; ++ saved_tt = tt; ++ top = x; ++ if (tt == 0 || ++ Bcmp(v + head_off, tt->rj_key + head_off, vlen - head_off)) ++ return -EFAULT; /* (0) rgb */ ++ /* ++ * Delete our route from mask lists. ++ */ ++ if ((dupedkey = tt->rj_dupedkey)) { ++ if (netmask) ++ netmask = rj_search(netmask, rj_masktop)->rj_key; ++ while (tt->rj_mask != netmask) ++ if ((tt = tt->rj_dupedkey) == 0) ++ return -ENOENT; /* -ENXIO; (0) rgb */ ++ } ++ if (tt->rj_mask == 0 || (saved_m = m = tt->rj_mklist) == 0) ++ goto on1; ++ if (m->rm_mask != tt->rj_mask) { ++ printk("klips_debug:rj_delete: " ++ "inconsistent annotation\n"); ++ goto on1; ++ } ++ if (--m->rm_refs >= 0) ++ goto on1; ++ b = -1 - tt->rj_b; ++ t = saved_tt->rj_p; ++ if (b > t->rj_b) ++ goto on1; /* Wasn't lifted at all */ ++ do { ++ x = t; ++ t = t->rj_p; ++ } while (b <= t->rj_b && x != top); ++ for (mp = &x->rj_mklist; (m = *mp); mp = &m->rm_mklist) ++ if (m == saved_m) { ++ *mp = m->rm_mklist; ++ MKFree(m); ++ break; ++ } ++ if (m == 0) ++ printk("klips_debug:rj_delete: " ++ "couldn't find our annotation\n"); ++on1: ++ /* ++ * Eliminate us from tree ++ */ ++ if (tt->rj_flags & RJF_ROOT) ++ return -EFAULT; /* (0) rgb */ ++#ifdef RJ_DEBUG ++ /* Get us out of the creation list */ ++ for (t = rj_clist; t && t->rj_ybro != tt; t = t->rj_ybro) {} ++ if (t) t->rj_ybro = tt->rj_ybro; ++#endif /* RJ_DEBUG */ ++ t = tt->rj_p; ++ if (dupedkey) { ++ if (tt == saved_tt) { ++ x = dupedkey; x->rj_p = t; ++ if (t->rj_l == tt) t->rj_l = x; else t->rj_r = x; ++ } else { ++ for (x = p = saved_tt; p && p->rj_dupedkey != tt;) ++ p = p->rj_dupedkey; ++ if (p) p->rj_dupedkey = tt->rj_dupedkey; ++ else printk("klips_debug:rj_delete: " ++ "couldn't find node that we started with\n"); ++ } ++ t = tt + 1; ++ if (t->rj_flags & RJF_ACTIVE) { ++#ifndef RJ_DEBUG ++ *++x = *t; p = t->rj_p; ++#else ++ b = t->rj_info; *++x = *t; t->rj_info = b; p = t->rj_p; ++#endif /* RJ_DEBUG */ ++ if (p->rj_l == t) p->rj_l = x; else p->rj_r = x; ++ x->rj_l->rj_p = x; x->rj_r->rj_p = x; ++ } ++ goto out; ++ } ++ if (t->rj_l == tt) x = t->rj_r; else x = t->rj_l; ++ p = t->rj_p; ++ if (p->rj_r == t) p->rj_r = x; else p->rj_l = x; ++ x->rj_p = p; ++ /* ++ * Demote routes attached to us. ++ */ ++ if (t->rj_mklist) { ++ if (x->rj_b >= 0) { ++ for (mp = &x->rj_mklist; (m = *mp);) ++ mp = &m->rm_mklist; ++ *mp = t->rj_mklist; ++ } else { ++ for (m = t->rj_mklist; m;) { ++ struct radij_mask *mm = m->rm_mklist; ++ if (m == x->rj_mklist && (--(m->rm_refs) < 0)) { ++ x->rj_mklist = 0; ++ MKFree(m); ++ } else ++ printk("klips_debug:rj_delete: " ++ "Orphaned Mask 0p%p at 0p%p\n", m, x); ++ m = mm; ++ } ++ } ++ } ++ /* ++ * We may be holding an active internal node in the tree. ++ */ ++ x = tt + 1; ++ if (t != x) { ++#ifndef RJ_DEBUG ++ *t = *x; ++#else ++ b = t->rj_info; *t = *x; t->rj_info = b; ++#endif /* RJ_DEBUG */ ++ t->rj_l->rj_p = t; t->rj_r->rj_p = t; ++ p = x->rj_p; ++ if (p->rj_l == x) p->rj_l = t; else p->rj_r = t; ++ } ++out: ++ tt->rj_flags &= ~RJF_ACTIVE; ++ tt[1].rj_flags &= ~RJF_ACTIVE; ++ *node = tt; ++ return 0; /* (tt) rgb */ ++} ++ ++int ++rj_walktree(h, f, w) ++ struct radij_node_head *h; ++ register int (*f)(struct radij_node *,void *); ++ void *w; ++{ ++ int error; ++ struct radij_node *base, *next; ++ register struct radij_node *rn; ++ ++ if(!h || !f /* || !w */) { ++ return -ENODATA; ++ } ++ ++ rn = h->rnh_treetop; ++ /* ++ * This gets complicated because we may delete the node ++ * while applying the function f to it, so we need to calculate ++ * the successor node in advance. ++ */ ++ /* First time through node, go left */ ++ while (rn->rj_b >= 0) ++ rn = rn->rj_l; ++ for (;;) { ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_radij) { ++ printk("klips_debug:rj_walktree: " ++ "for: rn=0p%p rj_b=%d rj_flags=%x", ++ rn, ++ rn->rj_b, ++ rn->rj_flags); ++ rn->rj_b >= 0 ? ++ printk(" node off=%x\n", ++ rn->rj_off) : ++ printk(" leaf key = %08x->%08x\n", ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_src.s_addr), ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_dst.s_addr)) ++ ; ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ base = rn; ++ /* If at right child go back up, otherwise, go right */ ++ while (rn->rj_p->rj_r == rn && (rn->rj_flags & RJF_ROOT) == 0) ++ rn = rn->rj_p; ++ /* Find the next *leaf* since next node might vanish, too */ ++ for (rn = rn->rj_p->rj_r; rn->rj_b >= 0;) ++ rn = rn->rj_l; ++ next = rn; ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_radij) { ++ printk("klips_debug:rj_walktree: " ++ "processing leaves, rn=0p%p rj_b=%d rj_flags=%x", ++ rn, ++ rn->rj_b, ++ rn->rj_flags); ++ rn->rj_b >= 0 ? ++ printk(" node off=%x\n", ++ rn->rj_off) : ++ printk(" leaf key = %08x->%08x\n", ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_src.s_addr), ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_dst.s_addr)) ++ ; ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ /* Process leaves */ ++ while ((rn = base)) { ++ base = rn->rj_dupedkey; ++#ifdef CONFIG_KLIPS_DEBUG ++ if(debug_radij) { ++ printk("klips_debug:rj_walktree: " ++ "while: base=0p%p rn=0p%p rj_b=%d rj_flags=%x", ++ base, ++ rn, ++ rn->rj_b, ++ rn->rj_flags); ++ rn->rj_b >= 0 ? ++ printk(" node off=%x\n", ++ rn->rj_off) : ++ printk(" leaf key = %08x->%08x\n", ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_src.s_addr), ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_dst.s_addr)) ++ ; ++ } ++#endif /* CONFIG_KLIPS_DEBUG */ ++ if (!(rn->rj_flags & RJF_ROOT) && (error = (*f)(rn, w))) ++ return (-error); ++ } ++ rn = next; ++ if (rn->rj_flags & RJF_ROOT) ++ return (0); ++ } ++ /* NOTREACHED */ ++} ++ ++int ++rj_inithead(head, off) ++ void **head; ++ int off; ++{ ++ register struct radij_node_head *rnh; ++ register struct radij_node *t, *tt, *ttt; ++ if (*head) ++ return (1); ++ R_Malloc(rnh, struct radij_node_head *, sizeof (*rnh)); ++ if (rnh == NULL) ++ return (0); ++ Bzero(rnh, sizeof (*rnh)); ++ *head = rnh; ++ t = rj_newpair(rj_zeroes, off, rnh->rnh_nodes); ++ ttt = rnh->rnh_nodes + 2; ++ t->rj_r = ttt; ++ t->rj_p = t; ++ tt = t->rj_l; ++ tt->rj_flags = t->rj_flags = RJF_ROOT | RJF_ACTIVE; ++ tt->rj_b = -1 - off; ++ *ttt = *tt; ++ ttt->rj_key = rj_ones; ++ rnh->rnh_addaddr = rj_addroute; ++ rnh->rnh_deladdr = rj_delete; ++ rnh->rnh_matchaddr = rj_match; ++ rnh->rnh_walktree = rj_walktree; ++ rnh->rnh_treetop = t; ++ return (1); ++} ++ ++void ++rj_init() ++{ ++ char *cp, *cplim; ++ ++ if (maj_keylen == 0) { ++ printk("klips_debug:rj_init: " ++ "radij functions require maj_keylen be set\n"); ++ return; ++ } ++ R_Malloc(rj_zeroes, char *, 3 * maj_keylen); ++ if (rj_zeroes == NULL) ++ panic("rj_init"); ++ Bzero(rj_zeroes, 3 * maj_keylen); ++ rj_ones = cp = rj_zeroes + maj_keylen; ++ maskedKey = cplim = rj_ones + maj_keylen; ++ while (cp < cplim) ++ *cp++ = -1; ++ if (rj_inithead((void **)&mask_rjhead, 0) == 0) ++ panic("rj_init 2"); ++} ++ ++void ++rj_preorder(struct radij_node *rn, int l) ++{ ++ int i; ++ ++ if (rn == NULL){ ++ printk("klips_debug:rj_preorder: " ++ "NULL pointer\n"); ++ return; ++ } ++ ++ if (rn->rj_b >= 0){ ++ rj_preorder(rn->rj_l, l+1); ++ rj_preorder(rn->rj_r, l+1); ++ printk("klips_debug:"); ++ for (i=0; irj_off); ++ } else { ++ printk("klips_debug:"); ++ for (i=0; irj_flags); ++ if (rn->rj_flags & RJF_ACTIVE) { ++ printk(" @key=0p%p", ++ rn->rj_key); ++ printk(" key = %08x->%08x", ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_src.s_addr), ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_dst.s_addr)); ++ printk(" @mask=0p%p", ++ rn->rj_mask); ++ if (rn->rj_mask) ++ printk(" mask = %08x->%08x", ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_mask)->sen_ip_src.s_addr), ++ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_mask)->sen_ip_dst.s_addr)); ++ if (rn->rj_dupedkey) ++ printk(" dupedkey = 0p%p", ++ rn->rj_dupedkey); ++ } ++ printk("\n"); ++ } ++} ++ ++#ifdef RJ_DEBUG ++DEBUG_NO_STATIC void traverse(struct radij_node *p) ++{ ++ rj_preorder(p, 0); ++} ++#endif /* RJ_DEBUG */ ++ ++void ++rj_dumptrees(void) ++{ ++ rj_preorder(rnh->rnh_treetop, 0); ++} ++ ++void ++rj_free_mkfreelist(void) ++{ ++ struct radij_mask *mknp, *mknp2; ++ ++ mknp = rj_mkfreelist; ++ while(mknp) ++ { ++ mknp2 = mknp; ++ mknp = mknp->rm_mklist; ++ kfree(mknp2); ++ } ++} ++ ++int ++radijcleartree(void) ++{ ++ return rj_walktree(rnh, ipsec_rj_walker_delete, NULL); ++} ++ ++int ++radijcleanup(void) ++{ ++ int error = 0; ++ ++ error = radijcleartree(); ++ ++ rj_free_mkfreelist(); ++ ++/* rj_walktree(mask_rjhead, ipsec_rj_walker_delete, NULL); */ ++ if(mask_rjhead) { ++ kfree(mask_rjhead); ++ } ++ ++ if(rj_zeroes) { ++ kfree(rj_zeroes); ++ } ++ ++ if(rnh) { ++ kfree(rnh); ++ } ++ ++ return error; ++} ++ ++/* ++ * $Log: radij.c,v $ ++ * Revision 1.48.2.1 2006-10-06 21:39:27 paul ++ * Fix for 2.6.18+ only include linux/config.h if AUTOCONF_INCLUDED is not ++ * set. This is defined through autoconf.h which is included through the ++ * linux kernel build macros. ++ * ++ * Revision 1.48 2005/04/29 05:10:22 mcr ++ * removed from extraenous includes to make unit testing easier. ++ * ++ * Revision 1.47 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.46 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.45 2003/10/31 02:27:55 mcr ++ * pulled up port-selector patches and sa_id elimination. ++ * ++ * Revision 1.44.30.1 2003/10/29 01:30:41 mcr ++ * elimited "struct sa_id". ++ * ++ * Revision 1.44 2002/07/24 18:44:54 rgb ++ * Type fiddling to tame ia64 compiler. ++ * ++ * Revision 1.43 2002/05/23 07:14:11 rgb ++ * Cleaned up %p variants to 0p%p for test suite cleanup. ++ * ++ * Revision 1.42 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.41 2002/04/24 07:36:35 mcr ++ * Moved from ./klips/net/ipsec/radij.c,v ++ * ++ * Revision 1.40 2002/01/29 17:17:58 mcr ++ * moved include of ipsec_param.h to after include of linux/kernel.h ++ * otherwise, it seems that some option that is set in ipsec_param.h ++ * screws up something subtle in the include path to kernel.h, and ++ * it complains on the snprintf() prototype. ++ * ++ * Revision 1.39 2002/01/29 04:00:55 mcr ++ * more excise of kversions.h header. ++ * ++ * Revision 1.38 2002/01/29 02:13:19 mcr ++ * introduction of ipsec_kversion.h means that include of ++ * ipsec_param.h must preceed any decisions about what files to ++ * include to deal with differences in kernel source. ++ * ++ * Revision 1.37 2001/10/18 04:45:23 rgb ++ * 2.4.9 kernel deprecates linux/malloc.h in favour of linux/slab.h, ++ * lib/freeswan.h version macros moved to lib/kversions.h. ++ * Other compiler directive cleanups. ++ * ++ * Revision 1.36 2001/08/22 13:43:51 henry ++ * eliminate the single use of min() to avoid problems with Linus changing it ++ * ++ * Revision 1.35 2001/06/15 04:57:29 rgb ++ * Clarified error return codes. ++ * Changed mask add already exists to EEXIST. ++ * Changed mask delete did not exist to ENOENT. ++ * ++ * Revision 1.34 2001/05/03 19:44:26 rgb ++ * Fix sign of error return codes for rj_addroute(). ++ * ++ * Revision 1.33 2001/02/27 22:24:56 rgb ++ * Re-formatting debug output (line-splitting, joining, 1arg/line). ++ * Check for satoa() return codes. ++ * ++ * Revision 1.32 2001/02/27 06:23:15 rgb ++ * Debug line splitting. ++ * ++ * Revision 1.31 2000/11/06 04:35:21 rgb ++ * Clear table *before* releasing other items in radijcleanup. ++ * ++ * Revision 1.30 2000/09/20 04:07:40 rgb ++ * Changed static functions to DEBUG_NO_STATIC to reveal function names in ++ * oopsen. ++ * ++ * Revision 1.29 2000/09/12 03:25:02 rgb ++ * Moved radij_c_version printing to ipsec_version_get_info(). ++ * ++ * Revision 1.28 2000/09/08 19:12:56 rgb ++ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG. ++ * ++ * Revision 1.27 2000/07/28 14:58:32 rgb ++ * Changed kfree_s to kfree, eliminating extra arg to fix 2.4.0-test5. ++ * ++ * Revision 1.26 2000/05/10 23:11:37 rgb ++ * Comment out most of the startup version information. ++ * ++ * Revision 1.25 2000/01/21 06:21:47 rgb ++ * Change return codes to negative on error. ++ * ++ * Revision 1.24 1999/11/18 04:09:20 rgb ++ * Replaced all kernel version macros to shorter, readable form. ++ * ++ * Revision 1.23 1999/11/17 15:53:41 rgb ++ * Changed all occurrences of #include "../../../lib/freeswan.h" ++ * to #include which works due to -Ilibfreeswan in the ++ * klips/net/ipsec/Makefile. ++ * ++ * Revision 1.22 1999/10/15 22:17:28 rgb ++ * Modify radijcleanup() to call radijcleartree(). ++ * ++ * Revision 1.21 1999/10/08 18:37:34 rgb ++ * Fix end-of-line spacing to sate whining PHMs. ++ * ++ * Revision 1.20 1999/10/01 15:44:54 rgb ++ * Move spinlock header include to 2.1> scope. ++ * ++ * Revision 1.19 1999/10/01 08:35:52 rgb ++ * Add spinlock include to shut up compiler for 2.0.38. ++ * ++ * Revision 1.18 1999/09/23 18:02:52 rgb ++ * De-alarm the search failure message so it doesn't sound so grave. ++ * ++ * Revision 1.17 1999/05/25 21:26:01 rgb ++ * Fix rj_walktree() sanity checking bug. ++ * ++ * Revision 1.16 1999/05/09 03:25:38 rgb ++ * Fix bug introduced by 2.2 quick-and-dirty patch. ++ * ++ * Revision 1.15 1999/05/05 22:02:33 rgb ++ * Add a quick and dirty port to 2.2 kernels by Marc Boucher . ++ * ++ * Revision 1.14 1999/04/29 15:24:15 rgb ++ * Add sanity checking for null pointer arguments. ++ * Standardise an error return method. ++ * ++ * Revision 1.13 1999/04/11 00:29:02 henry ++ * GPL boilerplate ++ * ++ * Revision 1.12 1999/04/06 04:54:28 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ * Revision 1.11 1999/02/17 16:52:53 rgb ++ * Convert DEBUG_IPSEC to KLIPS_PRINT ++ * Clean out unused cruft. ++ * ++ * Revision 1.10 1999/01/22 06:30:05 rgb ++ * Cruft clean-out. ++ * 64-bit clean-up. ++ * ++ * Revision 1.9 1998/12/01 13:22:04 rgb ++ * Added support for debug printing of version info. ++ * ++ * Revision 1.8 1998/11/30 13:22:55 rgb ++ * Rationalised all the klips kernel file headers. They are much shorter ++ * now and won't conflict under RH5.2. ++ * ++ * Revision 1.7 1998/10/25 02:43:26 rgb ++ * Change return type on rj_addroute and rj_delete and add and argument ++ * to the latter to be able to transmit more infomation about errors. ++ * ++ * Revision 1.6 1998/10/19 14:30:06 rgb ++ * Added inclusion of freeswan.h. ++ * ++ * Revision 1.5 1998/10/09 04:33:27 rgb ++ * Added 'klips_debug' prefix to all klips printk debug statements. ++ * Fixed output formatting slightly. ++ * ++ * Revision 1.4 1998/07/28 00:06:59 rgb ++ * Add debug detail to tree traversing. ++ * ++ * Revision 1.3 1998/07/14 18:07:58 rgb ++ * Add a routine to clear the eroute tree. ++ * ++ * Revision 1.2 1998/06/25 20:03:22 rgb ++ * Cleanup #endif comments. Debug output for rj_init. ++ * ++ * Revision 1.1 1998/06/18 21:30:22 henry ++ * move sources from klips/src to klips/net/ipsec to keep stupid kernel ++ * build scripts happier about symlinks ++ * ++ * Revision 1.8 1998/05/25 20:34:15 rgb ++ * Remove temporary ipsec_walk, rj_deltree and rj_delnodes functions. ++ * ++ * Rename ipsec_rj_walker (ipsec_walk) to ipsec_rj_walker_procprint and ++ * add ipsec_rj_walker_delete. ++ * ++ * Recover memory for eroute table on unload of module. ++ * ++ * Revision 1.7 1998/05/21 12:58:58 rgb ++ * Moved 'extern' definitions to ipsec_radij.h to support /proc 3k limit fix. ++ * ++ * Revision 1.6 1998/04/23 20:57:29 rgb ++ * Cleaned up compiler warnings for unused debugging functions. ++ * ++ * Revision 1.5 1998/04/22 16:51:38 rgb ++ * Tidy up radij debug code from recent rash of modifications to debug code. ++ * ++ * Revision 1.4 1998/04/21 21:28:56 rgb ++ * Rearrange debug switches to change on the fly debug output from user ++ * space. Only kernel changes checked in at this time. radij.c was also ++ * changed to temporarily remove buggy debugging code in rj_delete causing ++ * an OOPS and hence, netlink device open errors. ++ * ++ * Revision 1.3 1998/04/14 17:30:37 rgb ++ * Fix up compiling errors for radij tree memory reclamation. ++ * ++ * Revision 1.2 1998/04/12 22:03:25 rgb ++ * Updated ESP-3DES-HMAC-MD5-96, ++ * ESP-DES-HMAC-MD5-96, ++ * AH-HMAC-MD5-96, ++ * AH-HMAC-SHA1-96 since Henry started freeswan cvs repository ++ * from old standards (RFC182[5-9] to new (as of March 1998) drafts. ++ * ++ * Fixed eroute references in /proc/net/ipsec*. ++ * ++ * Started to patch module unloading memory leaks in ipsec_netlink and ++ * radij tree unloading. ++ * ++ * Revision 1.1 1998/04/09 03:06:15 henry ++ * sources moved up from linux/net/ipsec ++ * ++ * Revision 1.1.1.1 1998/04/08 05:35:03 henry ++ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8 ++ * ++ * Revision 0.4 1997/01/15 01:28:15 ji ++ * No changes. ++ * ++ * Revision 0.3 1996/11/20 14:39:04 ji ++ * Minor cleanups. ++ * Rationalized debugging code. ++ * ++ * Revision 0.2 1996/11/02 00:18:33 ji ++ * First limited release. ++ * ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/rangetoa.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,60 @@ ++/* ++ * convert binary form of address range to ASCII ++ * Copyright (C) 1998, 1999 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: rangetoa.c,v 1.9 2004-07-10 07:48:37 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++/* ++ - rangetoa - convert address range to ASCII ++ */ ++size_t /* space needed for full conversion */ ++rangetoa(addrs, format, dst, dstlen) ++struct in_addr addrs[2]; ++int format; /* character */ ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ size_t len; ++ size_t rest; ++ int n; ++ char *p; ++ ++ switch (format) { ++ case 0: ++ break; ++ default: ++ return 0; ++ break; ++ } ++ ++ len = addrtoa(addrs[0], 0, dst, dstlen); ++ if (len < dstlen) ++ for (p = dst + len - 1, n = 3; len < dstlen && n > 0; ++ p++, len++, n--) ++ *p = '.'; ++ else ++ p = NULL; ++ if (len < dstlen) ++ rest = dstlen - len; ++ else { ++ if (dstlen > 0) ++ *(dst + dstlen - 1) = '\0'; ++ rest = 0; ++ } ++ ++ len += addrtoa(addrs[1], 0, p, rest); ++ ++ return len; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/satot.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,133 @@ ++/* ++ * convert from binary form of SA ID to text ++ * Copyright (C) 2000, 2001 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: satot.c,v 1.13 2004-07-10 07:48:37 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++static struct typename { ++ char type; ++ char *name; ++} typenames[] = { ++ { SA_AH, "ah" }, ++ { SA_ESP, "esp" }, ++ { SA_IPIP, "tun" }, ++ { SA_COMP, "comp" }, ++ { SA_INT, "int" }, ++ { 0, NULL } ++}; ++ ++/* ++ - satot - convert SA to text "ah507@1.2.3.4" ++ */ ++size_t /* space needed for full conversion */ ++satot(sa, format, dst, dstlen) ++const ip_said *sa; ++int format; /* character */ ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ size_t len = 0; /* 0 means "not recognized yet" */ ++ int base; ++ int showversion; /* use delimiter to show IP version? */ ++ struct typename *tn; ++ char *p; ++ char *pre; ++ char buf[10+1+ULTOT_BUF+ADDRTOT_BUF]; ++ char unk[10]; ++ ++ switch (format) { ++ case 0: ++ base = 16; ++ showversion = 1; ++ break; ++ case 'f': ++ base = 17; ++ showversion = 1; ++ break; ++ case 'x': ++ base = 'x'; ++ showversion = 0; ++ break; ++ case 'd': ++ base = 10; ++ showversion = 0; ++ break; ++ default: ++ return 0; ++ break; ++ } ++ ++ memset(buf, 0, sizeof(buf)); ++ ++ pre = NULL; ++ for (tn = typenames; tn->name != NULL; tn++) ++ if (sa->proto == tn->type) { ++ pre = tn->name; ++ break; /* NOTE BREAK OUT */ ++ } ++ if (pre == NULL) { /* unknown protocol */ ++ strcpy(unk, "unk"); ++ (void) ultot((unsigned char)sa->proto, 10, unk+strlen(unk), ++ sizeof(unk)-strlen(unk)); ++ pre = unk; ++ } ++ ++ if (strcmp(pre, PASSTHROUGHTYPE) == 0 && ++ sa->spi == PASSTHROUGHSPI && ++ isunspecaddr(&sa->dst)) { ++ strcpy(buf, (addrtypeof(&sa->dst) == AF_INET) ? ++ PASSTHROUGH4NAME : ++ PASSTHROUGH6NAME); ++ len = strlen(buf); ++ } ++ ++ if (sa->proto == SA_INT) { ++ switch (ntohl(sa->spi)) { ++ case SPI_PASS: p = "%pass"; break; ++ case SPI_DROP: p = "%drop"; break; ++ case SPI_REJECT: p = "%reject"; break; ++ case SPI_HOLD: p = "%hold"; break; ++ case SPI_TRAP: p = "%trap"; break; ++ case SPI_TRAPSUBNET: p = "%trapsubnet"; break; ++ default: p = NULL; break; ++ } ++ if (p != NULL) { ++ strcpy(buf, p); ++ len = strlen(buf); ++ } ++ } ++ ++ if (len == 0) { /* general case needed */ ++ strcpy(buf, pre); ++ len = strlen(buf); ++ if (showversion) { ++ *(buf+len) = (addrtypeof(&sa->dst) == AF_INET) ? '.' : ++ ':'; ++ len++; ++ *(buf+len) = '\0'; ++ } ++ len += ultot(ntohl(sa->spi), base, buf+len, sizeof(buf)-len); ++ *(buf+len-1) = '@'; ++ len += addrtot(&sa->dst, 0, buf+len, sizeof(buf)-len); ++ *(buf+len) = '\0'; ++ } ++ ++ if (dst != NULL) { ++ if (len > dstlen) ++ *(buf+dstlen-1) = '\0'; ++ strcpy(dst, buf); ++ } ++ return len; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/subnetof.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,59 @@ ++/* ++ * minor network-address manipulation utilities ++ * Copyright (C) 1998, 1999 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: subnetof.c,v 1.8 2004-07-10 07:48:37 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++/* ++ - subnetof - given address and mask, return subnet part ++ */ ++struct in_addr ++subnetof(addr, mask) ++struct in_addr addr; ++struct in_addr mask; ++{ ++ struct in_addr result; ++ ++ result.s_addr = addr.s_addr & mask.s_addr; ++ return result; ++} ++ ++/* ++ - hostof - given address and mask, return host part ++ */ ++struct in_addr ++hostof(addr, mask) ++struct in_addr addr; ++struct in_addr mask; ++{ ++ struct in_addr result; ++ ++ result.s_addr = addr.s_addr & ~mask.s_addr; ++ return result; ++} ++ ++/* ++ - broadcastof - given (network) address and mask, return broadcast address ++ */ ++struct in_addr ++broadcastof(addr, mask) ++struct in_addr addr; ++struct in_addr mask; ++{ ++ struct in_addr result; ++ ++ result.s_addr = addr.s_addr | ~mask.s_addr; ++ return result; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/subnettoa.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,61 @@ ++/* ++ * convert binary form of subnet description to ASCII ++ * Copyright (C) 1998, 1999 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: subnettoa.c,v 1.11 2004-07-10 07:48:37 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++/* ++ - subnettoa - convert address and mask to ASCII "addr/mask" ++ * Output expresses the mask as a bit count if possible, else dotted decimal. ++ */ ++size_t /* space needed for full conversion */ ++subnettoa(addr, mask, format, dst, dstlen) ++struct in_addr addr; ++struct in_addr mask; ++int format; /* character */ ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ size_t len; ++ size_t rest; ++ int n; ++ char *p; ++ ++ switch (format) { ++ case 0: ++ break; ++ default: ++ return 0; ++ break; ++ } ++ ++ len = addrtoa(addr, 0, dst, dstlen); ++ if (len < dstlen) { ++ dst[len - 1] = '/'; ++ p = dst + len; ++ rest = dstlen - len; ++ } else { ++ p = NULL; ++ rest = 0; ++ } ++ ++ n = masktobits(mask); ++ if (n >= 0) ++ len += ultoa((unsigned long)n, 10, p, rest); ++ else ++ len += addrtoa(mask, 0, p, rest); ++ ++ return len; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/sysctl_net_ipsec.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,217 @@ ++/* ++ * sysctl interface to net IPSEC subsystem. ++ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * for more details. ++ * ++ * RCSID $Id: sysctl_net_ipsec.c,v 1.17.10.2 2007-10-30 21:42:25 paul Exp $ ++ */ ++ ++/* -*- linux-c -*- ++ * ++ * Initiated April 3, 1998, Richard Guy Briggs ++ */ ++ ++#include ++#include ++#include ++ ++#include "openswan/ipsec_param.h" ++ ++#ifdef CONFIG_SYSCTL ++ ++#define NET_IPSEC 2112 /* Random number */ ++#ifdef CONFIG_KLIPS_DEBUG ++extern int debug_ah; ++extern int debug_esp; ++extern int debug_tunnel; ++extern int debug_eroute; ++extern int debug_spi; ++extern int debug_radij; ++extern int debug_netlink; ++extern int debug_xform; ++extern int debug_rcv; ++extern int debug_pfkey; ++extern int sysctl_ipsec_debug_verbose; ++#ifdef CONFIG_KLIPS_IPCOMP ++extern int sysctl_ipsec_debug_ipcomp; ++#endif /* CONFIG_KLIPS_IPCOMP */ ++#endif /* CONFIG_KLIPS_DEBUG */ ++ ++extern int sysctl_ipsec_icmp; ++extern int sysctl_ipsec_inbound_policy_check; ++extern int sysctl_ipsec_tos; ++int sysctl_ipsec_regress_pfkey_lossage; ++ ++enum { ++#ifdef CONFIG_KLIPS_DEBUG ++ NET_IPSEC_DEBUG_AH=1, ++ NET_IPSEC_DEBUG_ESP=2, ++ NET_IPSEC_DEBUG_TUNNEL=3, ++ NET_IPSEC_DEBUG_EROUTE=4, ++ NET_IPSEC_DEBUG_SPI=5, ++ NET_IPSEC_DEBUG_RADIJ=6, ++ NET_IPSEC_DEBUG_NETLINK=7, ++ NET_IPSEC_DEBUG_XFORM=8, ++ NET_IPSEC_DEBUG_RCV=9, ++ NET_IPSEC_DEBUG_PFKEY=10, ++ NET_IPSEC_DEBUG_VERBOSE=11, ++ NET_IPSEC_DEBUG_IPCOMP=12, ++#endif /* CONFIG_KLIPS_DEBUG */ ++ NET_IPSEC_ICMP=13, ++ NET_IPSEC_INBOUND_POLICY_CHECK=14, ++ NET_IPSEC_TOS=15, ++ NET_IPSEC_REGRESS_PFKEY_LOSSAGE=16, ++}; ++ ++static ctl_table ipsec_table[] = { ++#ifdef CONFIG_KLIPS_DEBUG ++ { NET_IPSEC_DEBUG_AH, "debug_ah", &debug_ah, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_ESP, "debug_esp", &debug_esp, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_TUNNEL, "debug_tunnel", &debug_tunnel, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_EROUTE, "debug_eroute", &debug_eroute, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_SPI, "debug_spi", &debug_spi, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_RADIJ, "debug_radij", &debug_radij, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_NETLINK, "debug_netlink", &debug_netlink, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_XFORM, "debug_xform", &debug_xform, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_RCV, "debug_rcv", &debug_rcv, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_PFKEY, "debug_pfkey", &debug_pfkey, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_DEBUG_VERBOSE, "debug_verbose",&sysctl_ipsec_debug_verbose, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++#ifdef CONFIG_KLIPS_IPCOMP ++ { NET_IPSEC_DEBUG_IPCOMP, "debug_ipcomp", &sysctl_ipsec_debug_ipcomp, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++#endif /* CONFIG_KLIPS_IPCOMP */ ++ ++#ifdef CONFIG_KLIPS_REGRESS ++ { NET_IPSEC_REGRESS_PFKEY_LOSSAGE, "pfkey_lossage", ++ &sysctl_ipsec_regress_pfkey_lossage, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++#endif /* CONFIG_KLIPS_REGRESS */ ++ ++#endif /* CONFIG_KLIPS_DEBUG */ ++ { NET_IPSEC_ICMP, "icmp", &sysctl_ipsec_icmp, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_INBOUND_POLICY_CHECK, "inbound_policy_check", &sysctl_ipsec_inbound_policy_check, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ { NET_IPSEC_TOS, "tos", &sysctl_ipsec_tos, ++ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec}, ++ {0} ++}; ++ ++static ctl_table ipsec_net_table[] = { ++ { NET_IPSEC, "ipsec", NULL, 0, 0555, ipsec_table }, ++ { 0 } ++}; ++ ++static ctl_table ipsec_root_table[] = { ++ { CTL_NET, "net", NULL, 0, 0555, ipsec_net_table }, ++ { 0 } ++}; ++ ++static struct ctl_table_header *ipsec_table_header; ++ ++int ipsec_sysctl_register(void) ++{ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) ++ ipsec_table_header = register_sysctl_table(ipsec_root_table); ++#else ++ ipsec_table_header = register_sysctl_table(ipsec_root_table, 0); ++#endif ++ if (!ipsec_table_header) { ++ return -ENOMEM; ++ } ++ return 0; ++} ++ ++void ipsec_sysctl_unregister(void) ++{ ++ unregister_sysctl_table(ipsec_table_header); ++} ++ ++#endif /* CONFIG_SYSCTL */ ++ ++/* ++ * $Log: sysctl_net_ipsec.c,v $ ++ * Revision 1.17.10.2 2007-10-30 21:42:25 paul ++ * The kernel has changed the layout of ctl_table (defined in ++ * linux/sysctl.h). Unfortunately, a new field has been inserted before ++ * the last one we wish to initialize in ipsec_table. ++ * ++ * The easiest fix that works with old and new kernels is to use an ++ * initializer that explicitly says which field is being initialized. ++ * ++ * Patch by dhr ++ * ++ * Revision 1.17.10.1 2007/09/05 02:54:13 paul ++ * register_sysctl_table() takes one argument for 2.6.21+ [david] ++ * ++ * Revision 1.17 2004/07/10 19:11:18 mcr ++ * CONFIG_IPSEC -> CONFIG_KLIPS. ++ * ++ * Revision 1.16 2004/04/06 02:49:26 mcr ++ * pullup of algo code from alg-branch. ++ * ++ * Revision 1.15 2002/04/24 07:55:32 mcr ++ * #include patches and Makefiles for post-reorg compilation. ++ * ++ * Revision 1.14 2002/04/24 07:36:35 mcr ++ * Moved from ./klips/net/ipsec/sysctl_net_ipsec.c,v ++ * ++ * Revision 1.13 2002/01/12 02:58:32 mcr ++ * first regression test causes acquire messages to be lost ++ * 100% of the time. This is to help testing of pluto. ++ * ++ * Revision 1.12 2001/06/14 19:35:13 rgb ++ * Update copyright date. ++ * ++ * Revision 1.11 2001/02/26 19:58:13 rgb ++ * Drop sysctl_ipsec_{no_eroute_pass,opportunistic}, replaced by magic SAs. ++ * ++ * Revision 1.10 2000/09/16 01:50:15 rgb ++ * Protect sysctl_ipsec_debug_ipcomp with compiler defines too so that the ++ * linker won't blame rj_delete() for missing symbols. ;-> Damn statics... ++ * ++ * Revision 1.9 2000/09/15 23:17:51 rgb ++ * Moved stuff around to compile with debug off. ++ * ++ * Revision 1.8 2000/09/15 11:37:02 rgb ++ * Merge in heavily modified Svenning Soerensen's ++ * IPCOMP zlib deflate code. ++ * ++ * Revision 1.7 2000/09/15 07:37:15 rgb ++ * Munged silly log comment that was causing a warning. ++ * ++ * Revision 1.6 2000/09/15 04:58:23 rgb ++ * Added tos runtime switch. ++ * Removed 'sysctl_ipsec_' prefix from /proc/sys/net/ipsec/ filenames. ++ * ++ * Revision 1.5 2000/09/12 03:25:28 rgb ++ * Filled in and implemented sysctl. ++ * ++ * Revision 1.4 1999/04/11 00:29:03 henry ++ * GPL boilerplate ++ * ++ * Revision 1.3 1999/04/06 04:54:29 rgb ++ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes ++ * patch shell fixes. ++ * ++ */ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/trees.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,1214 @@ ++/* trees.c -- output deflated data using Huffman coding ++ * Copyright (C) 1995-2002 Jean-loup Gailly ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++/* ++ * ALGORITHM ++ * ++ * The "deflation" process uses several Huffman trees. The more ++ * common source values are represented by shorter bit sequences. ++ * ++ * Each code tree is stored in a compressed form which is itself ++ * a Huffman encoding of the lengths of all the code strings (in ++ * ascending order by source values). The actual code strings are ++ * reconstructed from the lengths in the inflate process, as described ++ * in the deflate specification. ++ * ++ * REFERENCES ++ * ++ * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". ++ * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc ++ * ++ * Storer, James A. ++ * Data Compression: Methods and Theory, pp. 49-50. ++ * Computer Science Press, 1988. ISBN 0-7167-8156-5. ++ * ++ * Sedgewick, R. ++ * Algorithms, p290. ++ * Addison-Wesley, 1983. ISBN 0-201-06672-6. ++ */ ++ ++/* @(#) $Id: trees.c,v 1.4 2004-07-10 07:48:39 mcr Exp $ */ ++ ++/* #define GEN_TREES_H */ ++ ++#include "deflate.h" ++ ++#ifdef DEBUG ++# include ++#endif ++ ++/* =========================================================================== ++ * Constants ++ */ ++ ++#define MAX_BL_BITS 7 ++/* Bit length codes must not exceed MAX_BL_BITS bits */ ++ ++#define END_BLOCK 256 ++/* end of block literal code */ ++ ++#define REP_3_6 16 ++/* repeat previous bit length 3-6 times (2 bits of repeat count) */ ++ ++#define REPZ_3_10 17 ++/* repeat a zero length 3-10 times (3 bits of repeat count) */ ++ ++#define REPZ_11_138 18 ++/* repeat a zero length 11-138 times (7 bits of repeat count) */ ++ ++local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ ++ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; ++ ++local const int extra_dbits[D_CODES] /* extra bits for each distance code */ ++ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; ++ ++local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ ++ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; ++ ++local const uch bl_order[BL_CODES] ++ = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; ++/* The lengths of the bit length codes are sent in order of decreasing ++ * probability, to avoid transmitting the lengths for unused bit length codes. ++ */ ++ ++#define Buf_size (8 * 2*sizeof(char)) ++/* Number of bits used within bi_buf. (bi_buf might be implemented on ++ * more than 16 bits on some systems.) ++ */ ++ ++/* =========================================================================== ++ * Local data. These are initialized only once. ++ */ ++ ++#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ ++ ++#if defined(GEN_TREES_H) || !defined(STDC) ++/* non ANSI compilers may not accept trees.h */ ++ ++local ct_data static_ltree[L_CODES+2]; ++/* The static literal tree. Since the bit lengths are imposed, there is no ++ * need for the L_CODES extra codes used during heap construction. However ++ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init ++ * below). ++ */ ++ ++local ct_data static_dtree[D_CODES]; ++/* The static distance tree. (Actually a trivial tree since all codes use ++ * 5 bits.) ++ */ ++ ++uch _dist_code[DIST_CODE_LEN]; ++/* Distance codes. The first 256 values correspond to the distances ++ * 3 .. 258, the last 256 values correspond to the top 8 bits of ++ * the 15 bit distances. ++ */ ++ ++uch _length_code[MAX_MATCH-MIN_MATCH+1]; ++/* length code for each normalized match length (0 == MIN_MATCH) */ ++ ++local int base_length[LENGTH_CODES]; ++/* First normalized length for each code (0 = MIN_MATCH) */ ++ ++local int base_dist[D_CODES]; ++/* First normalized distance for each code (0 = distance of 1) */ ++ ++#else ++# include "trees.h" ++#endif /* GEN_TREES_H */ ++ ++struct static_tree_desc_s { ++ const ct_data *static_tree; /* static tree or NULL */ ++ const intf *extra_bits; /* extra bits for each code or NULL */ ++ int extra_base; /* base index for extra_bits */ ++ int elems; /* max number of elements in the tree */ ++ int max_length; /* max bit length for the codes */ ++}; ++ ++local static_tree_desc static_l_desc = ++{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; ++ ++local static_tree_desc static_d_desc = ++{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; ++ ++local static_tree_desc static_bl_desc = ++{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; ++ ++/* =========================================================================== ++ * Local (static) routines in this file. ++ */ ++ ++local void tr_static_init OF((void)); ++local void init_block OF((deflate_state *s)); ++local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); ++local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); ++local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); ++local void build_tree OF((deflate_state *s, tree_desc *desc)); ++local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); ++local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); ++local int build_bl_tree OF((deflate_state *s)); ++local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, ++ int blcodes)); ++local void compress_block OF((deflate_state *s, const ct_data *ltree, ++ const ct_data *dtree)); ++local void set_data_type OF((deflate_state *s)); ++local unsigned bi_reverse OF((unsigned value, int length)); ++local void bi_windup OF((deflate_state *s)); ++local void bi_flush OF((deflate_state *s)); ++local void copy_block OF((deflate_state *s, charf *buf, unsigned len, ++ int header)); ++ ++#ifdef GEN_TREES_H ++local void gen_trees_header OF((void)); ++#endif ++ ++#ifndef DEBUG ++# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) ++ /* Send a code of the given tree. c and tree must not have side effects */ ++ ++#else /* DEBUG */ ++# define send_code(s, c, tree) \ ++ { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ ++ send_bits(s, tree[c].Code, tree[c].Len); } ++#endif ++ ++/* =========================================================================== ++ * Output a short LSB first on the stream. ++ * IN assertion: there is enough room in pendingBuf. ++ */ ++#define put_short(s, w) { \ ++ put_byte(s, (uch)((w) & 0xff)); \ ++ put_byte(s, (uch)((ush)(w) >> 8)); \ ++} ++ ++/* =========================================================================== ++ * Send a value on a given number of bits. ++ * IN assertion: length <= 16 and value fits in length bits. ++ */ ++#ifdef DEBUG ++local void send_bits OF((deflate_state *s, int value, int length)); ++ ++local void send_bits(s, value, length) ++ deflate_state *s; ++ int value; /* value to send */ ++ int length; /* number of bits */ ++{ ++ Tracevv((stderr," l %2d v %4x ", length, value)); ++ Assert(length > 0 && length <= 15, "invalid length"); ++ s->bits_sent += (ulg)length; ++ ++ /* If not enough room in bi_buf, use (valid) bits from bi_buf and ++ * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) ++ * unused bits in value. ++ */ ++ if (s->bi_valid > (int)Buf_size - length) { ++ s->bi_buf |= (value << s->bi_valid); ++ put_short(s, s->bi_buf); ++ s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); ++ s->bi_valid += length - Buf_size; ++ } else { ++ s->bi_buf |= value << s->bi_valid; ++ s->bi_valid += length; ++ } ++} ++#else /* !DEBUG */ ++ ++#define send_bits(s, value, length) \ ++{ int len = length;\ ++ if (s->bi_valid > (int)Buf_size - len) {\ ++ int val = value;\ ++ s->bi_buf |= (val << s->bi_valid);\ ++ put_short(s, s->bi_buf);\ ++ s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ ++ s->bi_valid += len - Buf_size;\ ++ } else {\ ++ s->bi_buf |= (value) << s->bi_valid;\ ++ s->bi_valid += len;\ ++ }\ ++} ++#endif /* DEBUG */ ++ ++ ++#define MAX(a,b) (a >= b ? a : b) ++/* the arguments must not have side effects */ ++ ++/* =========================================================================== ++ * Initialize the various 'constant' tables. ++ */ ++local void tr_static_init() ++{ ++#if defined(GEN_TREES_H) || !defined(STDC) ++ static int static_init_done = 0; ++ int n; /* iterates over tree elements */ ++ int bits; /* bit counter */ ++ int length; /* length value */ ++ int code; /* code value */ ++ int dist; /* distance index */ ++ ush bl_count[MAX_BITS+1]; ++ /* number of codes at each bit length for an optimal tree */ ++ ++ if (static_init_done) return; ++ ++ /* For some embedded targets, global variables are not initialized: */ ++ static_l_desc.static_tree = static_ltree; ++ static_l_desc.extra_bits = extra_lbits; ++ static_d_desc.static_tree = static_dtree; ++ static_d_desc.extra_bits = extra_dbits; ++ static_bl_desc.extra_bits = extra_blbits; ++ ++ /* Initialize the mapping length (0..255) -> length code (0..28) */ ++ length = 0; ++ for (code = 0; code < LENGTH_CODES-1; code++) { ++ base_length[code] = length; ++ for (n = 0; n < (1< dist code (0..29) */ ++ dist = 0; ++ for (code = 0 ; code < 16; code++) { ++ base_dist[code] = dist; ++ for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ ++ for ( ; code < D_CODES; code++) { ++ base_dist[code] = dist << 7; ++ for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { ++ _dist_code[256 + dist++] = (uch)code; ++ } ++ } ++ Assert (dist == 256, "tr_static_init: 256+dist != 512"); ++ ++ /* Construct the codes of the static literal tree */ ++ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; ++ n = 0; ++ while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; ++ while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; ++ while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; ++ while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; ++ /* Codes 286 and 287 do not exist, but we must include them in the ++ * tree construction to get a canonical Huffman tree (longest code ++ * all ones) ++ */ ++ gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); ++ ++ /* The static distance tree is trivial: */ ++ for (n = 0; n < D_CODES; n++) { ++ static_dtree[n].Len = 5; ++ static_dtree[n].Code = bi_reverse((unsigned)n, 5); ++ } ++ static_init_done = 1; ++ ++# ifdef GEN_TREES_H ++ gen_trees_header(); ++# endif ++#endif /* defined(GEN_TREES_H) || !defined(STDC) */ ++} ++ ++/* =========================================================================== ++ * Genererate the file trees.h describing the static trees. ++ */ ++#ifdef GEN_TREES_H ++# ifndef DEBUG ++# include ++# endif ++ ++# define SEPARATOR(i, last, width) \ ++ ((i) == (last)? "\n};\n\n" : \ ++ ((i) % (width) == (width)-1 ? ",\n" : ", ")) ++ ++void gen_trees_header() ++{ ++ FILE *header = fopen("trees.h", "w"); ++ int i; ++ ++ Assert (header != NULL, "Can't open trees.h"); ++ fprintf(header, ++ "/* header created automatically with -DGEN_TREES_H */\n\n"); ++ ++ fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); ++ for (i = 0; i < L_CODES+2; i++) { ++ fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, ++ static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); ++ } ++ ++ fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); ++ for (i = 0; i < D_CODES; i++) { ++ fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, ++ static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); ++ } ++ ++ fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); ++ for (i = 0; i < DIST_CODE_LEN; i++) { ++ fprintf(header, "%2u%s", _dist_code[i], ++ SEPARATOR(i, DIST_CODE_LEN-1, 20)); ++ } ++ ++ fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); ++ for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { ++ fprintf(header, "%2u%s", _length_code[i], ++ SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); ++ } ++ ++ fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); ++ for (i = 0; i < LENGTH_CODES; i++) { ++ fprintf(header, "%1u%s", base_length[i], ++ SEPARATOR(i, LENGTH_CODES-1, 20)); ++ } ++ ++ fprintf(header, "local const int base_dist[D_CODES] = {\n"); ++ for (i = 0; i < D_CODES; i++) { ++ fprintf(header, "%5u%s", base_dist[i], ++ SEPARATOR(i, D_CODES-1, 10)); ++ } ++ ++ fclose(header); ++} ++#endif /* GEN_TREES_H */ ++ ++/* =========================================================================== ++ * Initialize the tree data structures for a new zlib stream. ++ */ ++void _tr_init(s) ++ deflate_state *s; ++{ ++ tr_static_init(); ++ ++ s->l_desc.dyn_tree = s->dyn_ltree; ++ s->l_desc.stat_desc = &static_l_desc; ++ ++ s->d_desc.dyn_tree = s->dyn_dtree; ++ s->d_desc.stat_desc = &static_d_desc; ++ ++ s->bl_desc.dyn_tree = s->bl_tree; ++ s->bl_desc.stat_desc = &static_bl_desc; ++ ++ s->bi_buf = 0; ++ s->bi_valid = 0; ++ s->last_eob_len = 8; /* enough lookahead for inflate */ ++#ifdef DEBUG ++ s->compressed_len = 0L; ++ s->bits_sent = 0L; ++#endif ++ ++ /* Initialize the first block of the first file: */ ++ init_block(s); ++} ++ ++/* =========================================================================== ++ * Initialize a new block. ++ */ ++local void init_block(s) ++ deflate_state *s; ++{ ++ int n; /* iterates over tree elements */ ++ ++ /* Initialize the trees. */ ++ for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; ++ for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; ++ for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; ++ ++ s->dyn_ltree[END_BLOCK].Freq = 1; ++ s->opt_len = s->static_len = 0L; ++ s->last_lit = s->matches = 0; ++} ++ ++#define SMALLEST 1 ++/* Index within the heap array of least frequent node in the Huffman tree */ ++ ++ ++/* =========================================================================== ++ * Remove the smallest element from the heap and recreate the heap with ++ * one less element. Updates heap and heap_len. ++ */ ++#define pqremove(s, tree, top) \ ++{\ ++ top = s->heap[SMALLEST]; \ ++ s->heap[SMALLEST] = s->heap[s->heap_len--]; \ ++ pqdownheap(s, tree, SMALLEST); \ ++} ++ ++/* =========================================================================== ++ * Compares to subtrees, using the tree depth as tie breaker when ++ * the subtrees have equal frequency. This minimizes the worst case length. ++ */ ++#define smaller(tree, n, m, depth) \ ++ (tree[n].Freq < tree[m].Freq || \ ++ (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) ++ ++/* =========================================================================== ++ * Restore the heap property by moving down the tree starting at node k, ++ * exchanging a node with the smallest of its two sons if necessary, stopping ++ * when the heap property is re-established (each father smaller than its ++ * two sons). ++ */ ++local void pqdownheap(s, tree, k) ++ deflate_state *s; ++ ct_data *tree; /* the tree to restore */ ++ int k; /* node to move down */ ++{ ++ int v = s->heap[k]; ++ int j = k << 1; /* left son of k */ ++ while (j <= s->heap_len) { ++ /* Set j to the smallest of the two sons: */ ++ if (j < s->heap_len && ++ smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { ++ j++; ++ } ++ /* Exit if v is smaller than both sons */ ++ if (smaller(tree, v, s->heap[j], s->depth)) break; ++ ++ /* Exchange v with the smallest son */ ++ s->heap[k] = s->heap[j]; k = j; ++ ++ /* And continue down the tree, setting j to the left son of k */ ++ j <<= 1; ++ } ++ s->heap[k] = v; ++} ++ ++/* =========================================================================== ++ * Compute the optimal bit lengths for a tree and update the total bit length ++ * for the current block. ++ * IN assertion: the fields freq and dad are set, heap[heap_max] and ++ * above are the tree nodes sorted by increasing frequency. ++ * OUT assertions: the field len is set to the optimal bit length, the ++ * array bl_count contains the frequencies for each bit length. ++ * The length opt_len is updated; static_len is also updated if stree is ++ * not null. ++ */ ++local void gen_bitlen(s, desc) ++ deflate_state *s; ++ tree_desc *desc; /* the tree descriptor */ ++{ ++ ct_data *tree = desc->dyn_tree; ++ int max_code = desc->max_code; ++ const ct_data *stree = desc->stat_desc->static_tree; ++ const intf *extra = desc->stat_desc->extra_bits; ++ int base = desc->stat_desc->extra_base; ++ int max_length = desc->stat_desc->max_length; ++ int h; /* heap index */ ++ int n, m; /* iterate over the tree elements */ ++ int bits; /* bit length */ ++ int xbits; /* extra bits */ ++ ush f; /* frequency */ ++ int overflow = 0; /* number of elements with bit length too large */ ++ ++ for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; ++ ++ /* In a first pass, compute the optimal bit lengths (which may ++ * overflow in the case of the bit length tree). ++ */ ++ tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ ++ ++ for (h = s->heap_max+1; h < HEAP_SIZE; h++) { ++ n = s->heap[h]; ++ bits = tree[tree[n].Dad].Len + 1; ++ if (bits > max_length) bits = max_length, overflow++; ++ tree[n].Len = (ush)bits; ++ /* We overwrite tree[n].Dad which is no longer needed */ ++ ++ if (n > max_code) continue; /* not a leaf node */ ++ ++ s->bl_count[bits]++; ++ xbits = 0; ++ if (n >= base) xbits = extra[n-base]; ++ f = tree[n].Freq; ++ s->opt_len += (ulg)f * (bits + xbits); ++ if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); ++ } ++ if (overflow == 0) return; ++ ++ Trace((stderr,"\nbit length overflow\n")); ++ /* This happens for example on obj2 and pic of the Calgary corpus */ ++ ++ /* Find the first bit length which could increase: */ ++ do { ++ bits = max_length-1; ++ while (s->bl_count[bits] == 0) bits--; ++ s->bl_count[bits]--; /* move one leaf down the tree */ ++ s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ ++ s->bl_count[max_length]--; ++ /* The brother of the overflow item also moves one step up, ++ * but this does not affect bl_count[max_length] ++ */ ++ overflow -= 2; ++ } while (overflow > 0); ++ ++ /* Now recompute all bit lengths, scanning in increasing frequency. ++ * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all ++ * lengths instead of fixing only the wrong ones. This idea is taken ++ * from 'ar' written by Haruhiko Okumura.) ++ */ ++ for (bits = max_length; bits != 0; bits--) { ++ n = s->bl_count[bits]; ++ while (n != 0) { ++ m = s->heap[--h]; ++ if (m > max_code) continue; ++ if (tree[m].Len != (unsigned) bits) { ++ Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); ++ s->opt_len += ((long)bits - (long)tree[m].Len) ++ *(long)tree[m].Freq; ++ tree[m].Len = (ush)bits; ++ } ++ n--; ++ } ++ } ++} ++ ++/* =========================================================================== ++ * Generate the codes for a given tree and bit counts (which need not be ++ * optimal). ++ * IN assertion: the array bl_count contains the bit length statistics for ++ * the given tree and the field len is set for all tree elements. ++ * OUT assertion: the field code is set for all tree elements of non ++ * zero code length. ++ */ ++local void gen_codes (tree, max_code, bl_count) ++ ct_data *tree; /* the tree to decorate */ ++ int max_code; /* largest code with non zero frequency */ ++ ushf *bl_count; /* number of codes at each bit length */ ++{ ++ ush next_code[MAX_BITS+1]; /* next code value for each bit length */ ++ ush code = 0; /* running code value */ ++ int bits; /* bit index */ ++ int n; /* code index */ ++ ++ /* The distribution counts are first used to generate the code values ++ * without bit reversal. ++ */ ++ for (bits = 1; bits <= MAX_BITS; bits++) { ++ next_code[bits] = code = (code + bl_count[bits-1]) << 1; ++ } ++ /* Check that the bit counts in bl_count are consistent. The last code ++ * must be all ones. ++ */ ++ Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; ++ const ct_data *stree = desc->stat_desc->static_tree; ++ int elems = desc->stat_desc->elems; ++ int n, m; /* iterate over heap elements */ ++ int max_code = -1; /* largest code with non zero frequency */ ++ int node; /* new node being created */ ++ ++ /* Construct the initial heap, with least frequent element in ++ * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. ++ * heap[0] is not used. ++ */ ++ s->heap_len = 0, s->heap_max = HEAP_SIZE; ++ ++ for (n = 0; n < elems; n++) { ++ if (tree[n].Freq != 0) { ++ s->heap[++(s->heap_len)] = max_code = n; ++ s->depth[n] = 0; ++ } else { ++ tree[n].Len = 0; ++ } ++ } ++ ++ /* The pkzip format requires that at least one distance code exists, ++ * and that at least one bit should be sent even if there is only one ++ * possible code. So to avoid special checks later on we force at least ++ * two codes of non zero frequency. ++ */ ++ while (s->heap_len < 2) { ++ node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); ++ tree[node].Freq = 1; ++ s->depth[node] = 0; ++ s->opt_len--; if (stree) s->static_len -= stree[node].Len; ++ /* node is 0 or 1 so it does not have extra bits */ ++ } ++ desc->max_code = max_code; ++ ++ /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, ++ * establish sub-heaps of increasing lengths: ++ */ ++ for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); ++ ++ /* Construct the Huffman tree by repeatedly combining the least two ++ * frequent nodes. ++ */ ++ node = elems; /* next internal node of the tree */ ++ do { ++ pqremove(s, tree, n); /* n = node of least frequency */ ++ m = s->heap[SMALLEST]; /* m = node of next least frequency */ ++ ++ s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ ++ s->heap[--(s->heap_max)] = m; ++ ++ /* Create a new node father of n and m */ ++ tree[node].Freq = tree[n].Freq + tree[m].Freq; ++ s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1); ++ tree[n].Dad = tree[m].Dad = (ush)node; ++#ifdef DUMP_BL_TREE ++ if (tree == s->bl_tree) { ++ fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", ++ node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); ++ } ++#endif ++ /* and insert the new node in the heap */ ++ s->heap[SMALLEST] = node++; ++ pqdownheap(s, tree, SMALLEST); ++ ++ } while (s->heap_len >= 2); ++ ++ s->heap[--(s->heap_max)] = s->heap[SMALLEST]; ++ ++ /* At this point, the fields freq and dad are set. We can now ++ * generate the bit lengths. ++ */ ++ gen_bitlen(s, (tree_desc *)desc); ++ ++ /* The field len is now set, we can generate the bit codes */ ++ gen_codes ((ct_data *)tree, max_code, s->bl_count); ++} ++ ++/* =========================================================================== ++ * Scan a literal or distance tree to determine the frequencies of the codes ++ * in the bit length tree. ++ */ ++local void scan_tree (s, tree, max_code) ++ deflate_state *s; ++ ct_data *tree; /* the tree to be scanned */ ++ int max_code; /* and its largest code of non zero frequency */ ++{ ++ int n; /* iterates over all tree elements */ ++ int prevlen = -1; /* last emitted length */ ++ int curlen; /* length of current code */ ++ int nextlen = tree[0].Len; /* length of next code */ ++ int count = 0; /* repeat count of the current code */ ++ int max_count = 7; /* max repeat count */ ++ int min_count = 4; /* min repeat count */ ++ ++ if (nextlen == 0) max_count = 138, min_count = 3; ++ tree[max_code+1].Len = (ush)0xffff; /* guard */ ++ ++ for (n = 0; n <= max_code; n++) { ++ curlen = nextlen; nextlen = tree[n+1].Len; ++ if (++count < max_count && curlen == nextlen) { ++ continue; ++ } else if (count < min_count) { ++ s->bl_tree[curlen].Freq += count; ++ } else if (curlen != 0) { ++ if (curlen != prevlen) s->bl_tree[curlen].Freq++; ++ s->bl_tree[REP_3_6].Freq++; ++ } else if (count <= 10) { ++ s->bl_tree[REPZ_3_10].Freq++; ++ } else { ++ s->bl_tree[REPZ_11_138].Freq++; ++ } ++ count = 0; prevlen = curlen; ++ if (nextlen == 0) { ++ max_count = 138, min_count = 3; ++ } else if (curlen == nextlen) { ++ max_count = 6, min_count = 3; ++ } else { ++ max_count = 7, min_count = 4; ++ } ++ } ++} ++ ++/* =========================================================================== ++ * Send a literal or distance tree in compressed form, using the codes in ++ * bl_tree. ++ */ ++local void send_tree (s, tree, max_code) ++ deflate_state *s; ++ ct_data *tree; /* the tree to be scanned */ ++ int max_code; /* and its largest code of non zero frequency */ ++{ ++ int n; /* iterates over all tree elements */ ++ int prevlen = -1; /* last emitted length */ ++ int curlen; /* length of current code */ ++ int nextlen = tree[0].Len; /* length of next code */ ++ int count = 0; /* repeat count of the current code */ ++ int max_count = 7; /* max repeat count */ ++ int min_count = 4; /* min repeat count */ ++ ++ /* tree[max_code+1].Len = -1; */ /* guard already set */ ++ if (nextlen == 0) max_count = 138, min_count = 3; ++ ++ for (n = 0; n <= max_code; n++) { ++ curlen = nextlen; nextlen = tree[n+1].Len; ++ if (++count < max_count && curlen == nextlen) { ++ continue; ++ } else if (count < min_count) { ++ do { send_code(s, curlen, s->bl_tree); } while (--count != 0); ++ ++ } else if (curlen != 0) { ++ if (curlen != prevlen) { ++ send_code(s, curlen, s->bl_tree); count--; ++ } ++ Assert(count >= 3 && count <= 6, " 3_6?"); ++ send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); ++ ++ } else if (count <= 10) { ++ send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); ++ ++ } else { ++ send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); ++ } ++ count = 0; prevlen = curlen; ++ if (nextlen == 0) { ++ max_count = 138, min_count = 3; ++ } else if (curlen == nextlen) { ++ max_count = 6, min_count = 3; ++ } else { ++ max_count = 7, min_count = 4; ++ } ++ } ++} ++ ++/* =========================================================================== ++ * Construct the Huffman tree for the bit lengths and return the index in ++ * bl_order of the last bit length code to send. ++ */ ++local int build_bl_tree(s) ++ deflate_state *s; ++{ ++ int max_blindex; /* index of last bit length code of non zero freq */ ++ ++ /* Determine the bit length frequencies for literal and distance trees */ ++ scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); ++ scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); ++ ++ /* Build the bit length tree: */ ++ build_tree(s, (tree_desc *)(&(s->bl_desc))); ++ /* opt_len now includes the length of the tree representations, except ++ * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. ++ */ ++ ++ /* Determine the number of bit length codes to send. The pkzip format ++ * requires that at least 4 bit length codes be sent. (appnote.txt says ++ * 3 but the actual value used is 4.) ++ */ ++ for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { ++ if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; ++ } ++ /* Update opt_len to include the bit length tree and counts */ ++ s->opt_len += 3*(max_blindex+1) + 5+5+4; ++ Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", ++ s->opt_len, s->static_len)); ++ ++ return max_blindex; ++} ++ ++/* =========================================================================== ++ * Send the header for a block using dynamic Huffman trees: the counts, the ++ * lengths of the bit length codes, the literal tree and the distance tree. ++ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. ++ */ ++local void send_all_trees(s, lcodes, dcodes, blcodes) ++ deflate_state *s; ++ int lcodes, dcodes, blcodes; /* number of codes for each tree */ ++{ ++ int rank; /* index in bl_order */ ++ ++ Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); ++ Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, ++ "too many codes"); ++ Tracev((stderr, "\nbl counts: ")); ++ send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ ++ send_bits(s, dcodes-1, 5); ++ send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ ++ for (rank = 0; rank < blcodes; rank++) { ++ Tracev((stderr, "\nbl code %2d ", bl_order[rank])); ++ send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); ++ } ++ Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); ++ ++ send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ ++ Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); ++ ++ send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ ++ Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); ++} ++ ++/* =========================================================================== ++ * Send a stored block ++ */ ++void _tr_stored_block(s, buf, stored_len, eof) ++ deflate_state *s; ++ charf *buf; /* input block */ ++ ulg stored_len; /* length of input block */ ++ int eof; /* true if this is the last block for a file */ ++{ ++ send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ ++#ifdef DEBUG ++ s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; ++ s->compressed_len += (stored_len + 4) << 3; ++#endif ++ copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ ++} ++ ++/* =========================================================================== ++ * Send one empty static block to give enough lookahead for inflate. ++ * This takes 10 bits, of which 7 may remain in the bit buffer. ++ * The current inflate code requires 9 bits of lookahead. If the ++ * last two codes for the previous block (real code plus EOB) were coded ++ * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode ++ * the last real code. In this case we send two empty static blocks instead ++ * of one. (There are no problems if the previous block is stored or fixed.) ++ * To simplify the code, we assume the worst case of last real code encoded ++ * on one bit only. ++ */ ++void _tr_align(s) ++ deflate_state *s; ++{ ++ send_bits(s, STATIC_TREES<<1, 3); ++ send_code(s, END_BLOCK, static_ltree); ++#ifdef DEBUG ++ s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ ++#endif ++ bi_flush(s); ++ /* Of the 10 bits for the empty block, we have already sent ++ * (10 - bi_valid) bits. The lookahead for the last real code (before ++ * the EOB of the previous block) was thus at least one plus the length ++ * of the EOB plus what we have just sent of the empty static block. ++ */ ++ if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { ++ send_bits(s, STATIC_TREES<<1, 3); ++ send_code(s, END_BLOCK, static_ltree); ++#ifdef DEBUG ++ s->compressed_len += 10L; ++#endif ++ bi_flush(s); ++ } ++ s->last_eob_len = 7; ++} ++ ++/* =========================================================================== ++ * Determine the best encoding for the current block: dynamic trees, static ++ * trees or store, and output the encoded block to the zip file. ++ */ ++void _tr_flush_block(s, buf, stored_len, eof) ++ deflate_state *s; ++ charf *buf; /* input block, or NULL if too old */ ++ ulg stored_len; /* length of input block */ ++ int eof; /* true if this is the last block for a file */ ++{ ++ ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ ++ int max_blindex = 0; /* index of last bit length code of non zero freq */ ++ ++ /* Build the Huffman trees unless a stored block is forced */ ++ if (s->level > 0) { ++ ++ /* Check if the file is ascii or binary */ ++ if (s->data_type == Z_UNKNOWN) set_data_type(s); ++ ++ /* Construct the literal and distance trees */ ++ build_tree(s, (tree_desc *)(&(s->l_desc))); ++ Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, ++ s->static_len)); ++ ++ build_tree(s, (tree_desc *)(&(s->d_desc))); ++ Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, ++ s->static_len)); ++ /* At this point, opt_len and static_len are the total bit lengths of ++ * the compressed block data, excluding the tree representations. ++ */ ++ ++ /* Build the bit length tree for the above two trees, and get the index ++ * in bl_order of the last bit length code to send. ++ */ ++ max_blindex = build_bl_tree(s); ++ ++ /* Determine the best encoding. Compute first the block length in bytes*/ ++ opt_lenb = (s->opt_len+3+7)>>3; ++ static_lenb = (s->static_len+3+7)>>3; ++ ++ Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", ++ opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, ++ s->last_lit)); ++ ++ if (static_lenb <= opt_lenb) opt_lenb = static_lenb; ++ ++ } else { ++ Assert(buf != (char*)0, "lost buf"); ++ opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ ++ } ++ ++#ifdef FORCE_STORED ++ if (buf != (char*)0) { /* force stored block */ ++#else ++ if (stored_len+4 <= opt_lenb && buf != (char*)0) { ++ /* 4: two words for the lengths */ ++#endif ++ /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. ++ * Otherwise we can't have processed more than WSIZE input bytes since ++ * the last block flush, because compression would have been ++ * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to ++ * transform a block into a stored block. ++ */ ++ _tr_stored_block(s, buf, stored_len, eof); ++ ++#ifdef FORCE_STATIC ++ } else if (static_lenb >= 0) { /* force static trees */ ++#else ++ } else if (static_lenb == opt_lenb) { ++#endif ++ send_bits(s, (STATIC_TREES<<1)+eof, 3); ++ compress_block(s, static_ltree, static_dtree); ++#ifdef DEBUG ++ s->compressed_len += 3 + s->static_len; ++#endif ++ } else { ++ send_bits(s, (DYN_TREES<<1)+eof, 3); ++ send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, ++ max_blindex+1); ++ compress_block(s, s->dyn_ltree, s->dyn_dtree); ++#ifdef DEBUG ++ s->compressed_len += 3 + s->opt_len; ++#endif ++ } ++ Assert (s->compressed_len == s->bits_sent, "bad compressed size"); ++ /* The above check is made mod 2^32, for files larger than 512 MB ++ * and uLong implemented on 32 bits. ++ */ ++ init_block(s); ++ ++ if (eof) { ++ bi_windup(s); ++#ifdef DEBUG ++ s->compressed_len += 7; /* align on byte boundary */ ++#endif ++ } ++ Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, ++ s->compressed_len-7*eof)); ++} ++ ++/* =========================================================================== ++ * Save the match info and tally the frequency counts. Return true if ++ * the current block must be flushed. ++ */ ++int _tr_tally (s, dist, lc) ++ deflate_state *s; ++ unsigned dist; /* distance of matched string */ ++ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ ++{ ++ s->d_buf[s->last_lit] = (ush)dist; ++ s->l_buf[s->last_lit++] = (uch)lc; ++ if (dist == 0) { ++ /* lc is the unmatched char */ ++ s->dyn_ltree[lc].Freq++; ++ } else { ++ s->matches++; ++ /* Here, lc is the match length - MIN_MATCH */ ++ dist--; /* dist = match distance - 1 */ ++ Assert((ush)dist < (ush)MAX_DIST(s) && ++ (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && ++ (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); ++ ++ s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; ++ s->dyn_dtree[d_code(dist)].Freq++; ++ } ++ ++#ifdef TRUNCATE_BLOCK ++ /* Try to guess if it is profitable to stop the current block here */ ++ if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { ++ /* Compute an upper bound for the compressed length */ ++ ulg out_length = (ulg)s->last_lit*8L; ++ ulg in_length = (ulg)((long)s->strstart - s->block_start); ++ int dcode; ++ for (dcode = 0; dcode < D_CODES; dcode++) { ++ out_length += (ulg)s->dyn_dtree[dcode].Freq * ++ (5L+extra_dbits[dcode]); ++ } ++ out_length >>= 3; ++ Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", ++ s->last_lit, in_length, out_length, ++ 100L - out_length*100L/in_length)); ++ if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; ++ } ++#endif ++ return (s->last_lit == s->lit_bufsize-1); ++ /* We avoid equality with lit_bufsize because of wraparound at 64K ++ * on 16 bit machines and because stored blocks are restricted to ++ * 64K-1 bytes. ++ */ ++} ++ ++/* =========================================================================== ++ * Send the block data compressed using the given Huffman trees ++ */ ++local void compress_block(s, ltree, dtree) ++ deflate_state *s; ++ const ct_data *ltree; /* literal tree */ ++ const ct_data *dtree; /* distance tree */ ++{ ++ unsigned dist; /* distance of matched string */ ++ int lc; /* match length or unmatched char (if dist == 0) */ ++ unsigned lx = 0; /* running index in l_buf */ ++ unsigned code; /* the code to send */ ++ int extra; /* number of extra bits to send */ ++ ++ if (s->last_lit != 0) do { ++ dist = s->d_buf[lx]; ++ lc = s->l_buf[lx++]; ++ if (dist == 0) { ++ send_code(s, lc, ltree); /* send a literal byte */ ++ Tracecv(isgraph(lc), (stderr," '%c' ", lc)); ++ } else { ++ /* Here, lc is the match length - MIN_MATCH */ ++ code = _length_code[lc]; ++ send_code(s, code+LITERALS+1, ltree); /* send the length code */ ++ extra = extra_lbits[code]; ++ if (extra != 0) { ++ lc -= base_length[code]; ++ send_bits(s, lc, extra); /* send the extra length bits */ ++ } ++ dist--; /* dist is now the match distance - 1 */ ++ code = d_code(dist); ++ Assert (code < D_CODES, "bad d_code"); ++ ++ send_code(s, code, dtree); /* send the distance code */ ++ extra = extra_dbits[code]; ++ if (extra != 0) { ++ dist -= base_dist[code]; ++ send_bits(s, dist, extra); /* send the extra distance bits */ ++ } ++ } /* literal or match pair ? */ ++ ++ /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ ++ Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); ++ ++ } while (lx < s->last_lit); ++ ++ send_code(s, END_BLOCK, ltree); ++ s->last_eob_len = ltree[END_BLOCK].Len; ++} ++ ++/* =========================================================================== ++ * Set the data type to ASCII or BINARY, using a crude approximation: ++ * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. ++ * IN assertion: the fields freq of dyn_ltree are set and the total of all ++ * frequencies does not exceed 64K (to fit in an int on 16 bit machines). ++ */ ++local void set_data_type(s) ++ deflate_state *s; ++{ ++ int n = 0; ++ unsigned ascii_freq = 0; ++ unsigned bin_freq = 0; ++ while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; ++ while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; ++ while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; ++ s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); ++} ++ ++/* =========================================================================== ++ * Reverse the first len bits of a code, using straightforward code (a faster ++ * method would use a table) ++ * IN assertion: 1 <= len <= 15 ++ */ ++local unsigned bi_reverse(code, len) ++ unsigned code; /* the value to invert */ ++ int len; /* its bit length */ ++{ ++ register unsigned res = 0; ++ do { ++ res |= code & 1; ++ code >>= 1, res <<= 1; ++ } while (--len > 0); ++ return res >> 1; ++} ++ ++/* =========================================================================== ++ * Flush the bit buffer, keeping at most 7 bits in it. ++ */ ++local void bi_flush(s) ++ deflate_state *s; ++{ ++ if (s->bi_valid == 16) { ++ put_short(s, s->bi_buf); ++ s->bi_buf = 0; ++ s->bi_valid = 0; ++ } else if (s->bi_valid >= 8) { ++ put_byte(s, (Byte)s->bi_buf); ++ s->bi_buf >>= 8; ++ s->bi_valid -= 8; ++ } ++} ++ ++/* =========================================================================== ++ * Flush the bit buffer and align the output on a byte boundary ++ */ ++local void bi_windup(s) ++ deflate_state *s; ++{ ++ if (s->bi_valid > 8) { ++ put_short(s, s->bi_buf); ++ } else if (s->bi_valid > 0) { ++ put_byte(s, (Byte)s->bi_buf); ++ } ++ s->bi_buf = 0; ++ s->bi_valid = 0; ++#ifdef DEBUG ++ s->bits_sent = (s->bits_sent+7) & ~7; ++#endif ++} ++ ++/* =========================================================================== ++ * Copy a stored block, storing first the length and its ++ * one's complement if requested. ++ */ ++local void copy_block(s, buf, len, header) ++ deflate_state *s; ++ charf *buf; /* the input data */ ++ unsigned len; /* its length */ ++ int header; /* true if block header must be written */ ++{ ++ bi_windup(s); /* align on byte boundary */ ++ s->last_eob_len = 8; /* enough lookahead for inflate */ ++ ++ if (header) { ++ put_short(s, (ush)len); ++ put_short(s, (ush)~len); ++#ifdef DEBUG ++ s->bits_sent += 2*16; ++#endif ++ } ++#ifdef DEBUG ++ s->bits_sent += (ulg)len<<3; ++#endif ++ while (len--) { ++ put_byte(s, *buf++); ++ } ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/trees.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,128 @@ ++/* header created automatically with -DGEN_TREES_H */ ++ ++local const ct_data static_ltree[L_CODES+2] = { ++{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, ++{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, ++{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, ++{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, ++{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, ++{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, ++{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, ++{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, ++{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, ++{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, ++{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, ++{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, ++{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, ++{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, ++{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, ++{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, ++{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, ++{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, ++{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, ++{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, ++{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, ++{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, ++{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, ++{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, ++{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, ++{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, ++{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, ++{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, ++{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, ++{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, ++{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, ++{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, ++{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, ++{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, ++{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, ++{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, ++{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, ++{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, ++{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, ++{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, ++{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, ++{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, ++{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, ++{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, ++{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, ++{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, ++{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, ++{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, ++{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, ++{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, ++{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, ++{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, ++{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, ++{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, ++{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, ++{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, ++{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, ++{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} ++}; ++ ++local const ct_data static_dtree[D_CODES] = { ++{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, ++{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, ++{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, ++{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, ++{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, ++{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} ++}; ++ ++const uch _dist_code[DIST_CODE_LEN] = { ++ 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, ++ 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, ++10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, ++11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, ++12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, ++13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, ++13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, ++14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, ++14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, ++14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, ++15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, ++15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, ++15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, ++18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, ++23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, ++24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, ++26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, ++26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, ++27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, ++27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, ++28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, ++28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, ++28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, ++29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, ++29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, ++29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 ++}; ++ ++const uch _length_code[MAX_MATCH-MIN_MATCH+1]= { ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, ++13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, ++17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, ++19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, ++21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, ++22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, ++23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, ++24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, ++25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, ++25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, ++26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, ++26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, ++27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 ++}; ++ ++local const int base_length[LENGTH_CODES] = { ++0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, ++64, 80, 96, 112, 128, 160, 192, 224, 0 ++}; ++ ++local const int base_dist[D_CODES] = { ++ 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, ++ 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, ++ 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 ++}; ++ +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ultoa.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,66 @@ ++/* ++ * convert unsigned long to ASCII ++ * Copyright (C) 1998, 1999 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: ultoa.c,v 1.10 2004-07-10 07:48:37 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++/* ++ - ultoa - convert unsigned long to decimal ASCII ++ */ ++size_t /* length required for full conversion */ ++ultoa(n, base, dst, dstlen) ++unsigned long n; ++int base; ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ char buf[3*sizeof(unsigned long) + 1]; ++ char *bufend = buf + sizeof(buf); ++ size_t len; ++ char *p; ++ static char hex[] = "0123456789abcdef"; ++ ++ p = bufend; ++ *--p = '\0'; ++ if (base == 10) { ++ do { ++ *--p = n%10 + '0'; ++ n /= 10; ++ } while (n != 0); ++ } else if (base == 16) { ++ do { ++ *--p = hex[n&0xf]; ++ n >>= 4; ++ } while (n != 0); ++ *--p = 'x'; ++ *--p = '0'; ++ } else if (base == 8) { ++ do { ++ *--p = (n&07) + '0'; ++ n >>= 3; ++ } while (n != 0); ++ *--p = '0'; ++ } else ++ *--p = '?'; ++ ++ len = bufend - p; ++ ++ if (dstlen > 0) { ++ if (len > dstlen) ++ *(p + dstlen - 1) = '\0'; ++ strcpy(dst, p); ++ } ++ return len; ++} +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ linux/net/ipsec/ultot.c Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,82 @@ ++/* ++ * convert unsigned long to text ++ * Copyright (C) 2000 Henry Spencer. ++ * ++ * This library is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU Library General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. See . ++ * ++ * This library is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public ++ * License for more details. ++ * ++ * RCSID $Id: ultot.c,v 1.5 2004-07-10 07:48:37 mcr Exp $ ++ */ ++#include "openswan.h" ++ ++/* ++ - ultot - convert unsigned long to text ++ */ ++size_t /* length required for full conversion */ ++ultot(n, base, dst, dstlen) ++unsigned long n; ++int base; ++char *dst; /* need not be valid if dstlen is 0 */ ++size_t dstlen; ++{ ++ char buf[3*sizeof(unsigned long) + 1]; ++ char *bufend = buf + sizeof(buf); ++ size_t len; ++ char *p; ++ static char hex[] = "0123456789abcdef"; ++# define HEX32 (32/4) ++ ++ p = bufend; ++ *--p = '\0'; ++ switch (base) { ++ case 10: ++ case 'd': ++ do { ++ *--p = n%10 + '0'; ++ n /= 10; ++ } while (n != 0); ++ break; ++ case 16: ++ case 17: ++ case 'x': ++ do { ++ *--p = hex[n&0xf]; ++ n >>= 4; ++ } while (n != 0); ++ if (base == 17) ++ while (bufend - p < HEX32 + 1) ++ *--p = '0'; ++ if (base == 'x') { ++ *--p = 'x'; ++ *--p = '0'; ++ } ++ break; ++ case 8: ++ case 'o': ++ do { ++ *--p = (n&07) + '0'; ++ n >>= 3; ++ } while (n != 0); ++ if (base == 'o') ++ *--p = '0'; ++ break; ++ default: ++ return 0; ++ break; ++ } ++ ++ len = bufend - p; ++ if (dstlen > 0) { ++ if (len > dstlen) ++ *(p + dstlen - 1) = '\0'; ++ strcpy(dst, p); ++ } ++ return len; ++} +--- /dev/null Fri May 10 13:59:54 2002 ++++ linux/net/ipsec/Makefile.ver Sun Jul 28 22:10:40 2002 +@@ -0,0 +1 @@ ++IPSECVERSION=2.4.12 diff --git a/src/patches/openswan-2.4.12.kernel-2.6.20-cryptoalg.patch b/src/patches/openswan-2.4.12.kernel-2.6.20-cryptoalg.patch new file mode 100755 index 000000000..52ebd1bb5 --- /dev/null +++ b/src/patches/openswan-2.4.12.kernel-2.6.20-cryptoalg.patch @@ -0,0 +1,11 @@ +--- linux-2.6.20.oorig/net/ipsec/ipsec_alg_cryptoapi.c 2007-02-15 12:30:41.000000000 +0100 ++++ linux-2.6.20/net/ipsec/ipsec_alg_cryptoapi.c 2007-02-15 13:47:07.000000000 +0100 +@@ -197,7 +197,7 @@ static struct ipsec_alg_capi_cipher alg_ + */ + int setup_cipher(const char *ciphername) + { +- return crypto_alg_available(ciphername, 0); ++ return crypto_has_alg(ciphername, 0, CRYPTO_ALG_ASYNC); + } + + /* diff --git a/src/patches/openswan-2.4.12.kernel-2.6.20.21-natt.patch b/src/patches/openswan-2.4.12.kernel-2.6.20.21-natt.patch new file mode 100644 index 000000000..471eb3296 --- /dev/null +++ b/src/patches/openswan-2.4.12.kernel-2.6.20.21-natt.patch @@ -0,0 +1,122 @@ +packaging/utils/nattpatch 2.6 +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ nat-t/include/net/xfrmudp.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,10 @@ ++/* ++ * pointer to function for type that xfrm4_input wants, to permit ++ * decoupling of XFRM from udp.c ++ */ ++#define HAVE_XFRM4_UDP_REGISTER ++ ++typedef int (*xfrm4_rcv_encap_t)(struct sk_buff *skb, __u16 encap_type); ++extern int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func ++ , xfrm4_rcv_encap_t *oldfunc); ++extern int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func); +--- /distros/kernel/linux-2.6.11.2/net/ipv4/Kconfig 2005-03-09 03:12:33.000000000 -0500 ++++ swan26/net/ipv4/Kconfig 2005-04-04 18:46:13.000000000 -0400 +@@ -351,2 +351,8 @@ + ++config IPSEC_NAT_TRAVERSAL ++ bool "IPSEC NAT-Traversal (KLIPS compatible)" ++ depends on INET ++ ---help--- ++ Includes support for RFC3947/RFC3948 NAT-Traversal of ESP over UDP. ++ + config IP_TCPDIAG +--- plain26/net/ipv4/udp.c.orig 2006-01-02 22:21:10.000000000 -0500 ++++ plain26/net/ipv4/udp.c 2006-01-12 20:18:57.000000000 -0500 +@@ -108,6 +108,7 @@ + */ + + DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; ++#include + + struct hlist_head udp_hash[UDP_HTABLE_SIZE]; + DEFINE_RWLOCK(udp_hash_lock); +@@ -914,6 +915,44 @@ + return 0; + } + ++#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++ ++/* if XFRM isn't a module, then register it directly. */ ++#if !defined(CONFIG_XFRM_MODULE) ++static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = xfrm4_rcv_encap; ++#else ++static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = NULL; ++#endif ++ ++static xfrm4_rcv_encap_t xfrm4_rcv_encap_func; ++ ++int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func ++ , xfrm4_rcv_encap_t *oldfunc) ++{ ++ if(oldfunc != NULL) { ++ *oldfunc = xfrm4_rcv_encap_func; ++ } ++ ++#if 0 ++ if(xfrm4_rcv_encap_func != NULL) ++ return -1; ++#endif ++ ++ xfrm4_rcv_encap_func = func; ++ return 0; ++} ++ ++int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func) ++{ ++ if(xfrm4_rcv_encap_func != func) ++ return -1; ++ ++ xfrm4_rcv_encap_func = NULL; ++ return 0; ++} ++#endif /* CONFIG_XFRM || defined(CONFIG_IPSEC_NAT_TRAVERSAL)*/ ++ ++ + /* return: + * 1 if the the UDP system should process it + * 0 if we should drop this packet +@@ -921,9 +960,9 @@ + */ + static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) + { +-#ifndef CONFIG_XFRM ++#if !defined(CONFIG_XFRM) && !defined(CONFIG_IPSEC_NAT_TRAVERSAL) + return 1; +-#else ++#else /* either CONFIG_XFRM or CONFIG_IPSEC_NAT_TRAVERSAL */ + struct udp_sock *up = udp_sk(sk); + struct udphdr *uh; + struct iphdr *iph; +@@ -1049,11 +1088,15 @@ + kfree_skb(skb); + return 0; + } +- if (ret < 0) { +- /* process the ESP packet */ +- ret = xfrm4_rcv_encap(skb, up->encap_type); +- UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); +- return -ret; ++ if (ret < 0) { ++ if(xfrm4_rcv_encap_func != NULL) { ++ ret = (*xfrm4_rcv_encap_func)(skb, up->encap_type); ++ UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); ++ } else { ++ UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); ++ ret = 1; ++ } ++ return ret; + } + /* FALLTHROUGH -- it's a UDP Packet */ + } +@@ -1732,3 +1775,8 @@ + EXPORT_SYMBOL(udp_proc_register); + EXPORT_SYMBOL(udp_proc_unregister); + #endif ++ ++#if defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++EXPORT_SYMBOL(udp4_register_esp_rcvencap); ++EXPORT_SYMBOL(udp4_unregister_esp_rcvencap); ++#endif diff --git a/src/patches/reiser4-for-2.6.20.patch b/src/patches/reiser4-for-2.6.20.patch new file mode 100755 index 000000000..187ea1f69 --- /dev/null +++ b/src/patches/reiser4-for-2.6.20.patch @@ -0,0 +1,81203 @@ + Documentation/Changes | 12 + + Documentation/filesystems/reiser4.txt | 75 + + arch/i386/lib/usercopy.c | 2 + + fs/Kconfig | 2 + + fs/Makefile | 1 + + fs/fs-writeback.c | 26 +- + fs/reiser4/Kconfig | 32 + + fs/reiser4/Makefile | 99 + + fs/reiser4/README | 125 + + fs/reiser4/as_ops.c | 339 +++ + fs/reiser4/block_alloc.c | 1137 ++++++++ + fs/reiser4/block_alloc.h | 175 ++ + fs/reiser4/blocknrset.c | 368 +++ + fs/reiser4/carry.c | 1391 +++++++++ + fs/reiser4/carry.h | 442 +++ + fs/reiser4/carry_ops.c | 2131 ++++++++++++++ + fs/reiser4/carry_ops.h | 42 + + fs/reiser4/context.c | 288 ++ + fs/reiser4/context.h | 228 ++ + fs/reiser4/coord.c | 935 ++++++ + fs/reiser4/coord.h | 389 +++ + fs/reiser4/debug.c | 308 ++ + fs/reiser4/debug.h | 350 +++ + fs/reiser4/dformat.h | 70 + + fs/reiser4/dscale.c | 174 ++ + fs/reiser4/dscale.h | 27 + + fs/reiser4/entd.c | 335 +++ + fs/reiser4/entd.h | 90 + + fs/reiser4/eottl.c | 509 ++++ + fs/reiser4/estimate.c | 120 + + fs/reiser4/export_ops.c | 295 ++ + fs/reiser4/flush.c | 3622 ++++++++++++++++++++++++ + fs/reiser4/flush.h | 274 ++ + fs/reiser4/flush_queue.c | 680 +++++ + fs/reiser4/forward.h | 256 ++ + fs/reiser4/fsdata.c | 804 ++++++ + fs/reiser4/fsdata.h | 207 ++ + fs/reiser4/init_super.c | 750 +++++ + fs/reiser4/inode.c | 709 +++++ + fs/reiser4/inode.h | 438 +++ + fs/reiser4/ioctl.h | 41 + + fs/reiser4/jnode.c | 1925 +++++++++++++ + fs/reiser4/jnode.h | 705 +++++ + fs/reiser4/kassign.c | 661 +++++ + fs/reiser4/kassign.h | 110 + + fs/reiser4/key.c | 137 + + fs/reiser4/key.h | 384 +++ + fs/reiser4/ktxnmgrd.c | 215 ++ + fs/reiser4/ktxnmgrd.h | 52 + + fs/reiser4/lock.c | 1232 ++++++++ + fs/reiser4/lock.h | 249 ++ + fs/reiser4/oid.c | 141 + + fs/reiser4/page_cache.c | 736 +++++ + fs/reiser4/page_cache.h | 68 + + fs/reiser4/plugin/Makefile | 26 + + fs/reiser4/plugin/cluster.c | 71 + + fs/reiser4/plugin/cluster.h | 343 +++ + fs/reiser4/plugin/compress/Makefile | 6 + + fs/reiser4/plugin/compress/compress.c | 381 +++ + fs/reiser4/plugin/compress/compress.h | 38 + + fs/reiser4/plugin/compress/compress_mode.c | 162 ++ + fs/reiser4/plugin/compress/lzoconf.h | 216 ++ + fs/reiser4/plugin/compress/minilzo.c | 1967 +++++++++++++ + fs/reiser4/plugin/compress/minilzo.h | 70 + + fs/reiser4/plugin/crypto/cipher.c | 37 + + fs/reiser4/plugin/crypto/cipher.h | 55 + + fs/reiser4/plugin/crypto/digest.c | 58 + + fs/reiser4/plugin/dir/Makefile | 5 + + fs/reiser4/plugin/dir/dir.h | 36 + + fs/reiser4/plugin/dir/hashed_dir.c | 81 + + fs/reiser4/plugin/dir/seekable_dir.c | 46 + + fs/reiser4/plugin/dir_plugin_common.c | 872 ++++++ + fs/reiser4/plugin/disk_format/Makefile | 5 + + fs/reiser4/plugin/disk_format/disk_format.c | 38 + + fs/reiser4/plugin/disk_format/disk_format.h | 27 + + fs/reiser4/plugin/disk_format/disk_format40.c | 655 +++++ + fs/reiser4/plugin/disk_format/disk_format40.h | 109 + + fs/reiser4/plugin/fibration.c | 175 ++ + fs/reiser4/plugin/fibration.h | 37 + + fs/reiser4/plugin/file/Makefile | 7 + + fs/reiser4/plugin/file/cryptcompress.c | 3760 +++++++++++++++++++++++++ + fs/reiser4/plugin/file/cryptcompress.h | 554 ++++ + fs/reiser4/plugin/file/file.c | 2820 ++++++++++++++++++ + fs/reiser4/plugin/file/file.h | 272 ++ + fs/reiser4/plugin/file/file_conversion.c | 594 ++++ + fs/reiser4/plugin/file/invert.c | 493 ++++ + fs/reiser4/plugin/file/symfile.c | 87 + + fs/reiser4/plugin/file/symlink.c | 95 + + fs/reiser4/plugin/file/tail_conversion.c | 726 +++++ + fs/reiser4/plugin/file_ops.c | 168 ++ + fs/reiser4/plugin/file_ops_readdir.c | 657 +++++ + fs/reiser4/plugin/file_plugin_common.c | 1007 +++++++ + fs/reiser4/plugin/hash.c | 353 +++ + fs/reiser4/plugin/inode_ops.c | 897 ++++++ + fs/reiser4/plugin/inode_ops_rename.c | 914 ++++++ + fs/reiser4/plugin/item/Makefile | 18 + + fs/reiser4/plugin/item/acl.h | 66 + + fs/reiser4/plugin/item/blackbox.c | 142 + + fs/reiser4/plugin/item/blackbox.h | 33 + + fs/reiser4/plugin/item/cde.c | 1008 +++++++ + fs/reiser4/plugin/item/cde.h | 87 + + fs/reiser4/plugin/item/ctail.c | 1570 +++++++++++ + fs/reiser4/plugin/item/ctail.h | 97 + + fs/reiser4/plugin/item/extent.c | 197 ++ + fs/reiser4/plugin/item/extent.h | 231 ++ + fs/reiser4/plugin/item/extent_file_ops.c | 1435 ++++++++++ + fs/reiser4/plugin/item/extent_flush_ops.c | 1028 +++++++ + fs/reiser4/plugin/item/extent_item_ops.c | 889 ++++++ + fs/reiser4/plugin/item/internal.c | 396 +++ + fs/reiser4/plugin/item/internal.h | 57 + + fs/reiser4/plugin/item/item.c | 719 +++++ + fs/reiser4/plugin/item/item.h | 400 +++ + fs/reiser4/plugin/item/sde.c | 190 ++ + fs/reiser4/plugin/item/sde.h | 66 + + fs/reiser4/plugin/item/static_stat.c | 1106 ++++++++ + fs/reiser4/plugin/item/static_stat.h | 224 ++ + fs/reiser4/plugin/item/tail.c | 812 ++++++ + fs/reiser4/plugin/item/tail.h | 58 + + fs/reiser4/plugin/node/Makefile | 5 + + fs/reiser4/plugin/node/node.c | 131 + + fs/reiser4/plugin/node/node.h | 272 ++ + fs/reiser4/plugin/node/node40.c | 2924 +++++++++++++++++++ + fs/reiser4/plugin/node/node40.h | 125 + + fs/reiser4/plugin/object.c | 516 ++++ + fs/reiser4/plugin/object.h | 121 + + fs/reiser4/plugin/plugin.c | 578 ++++ + fs/reiser4/plugin/plugin.h | 920 ++++++ + fs/reiser4/plugin/plugin_header.h | 144 + + fs/reiser4/plugin/plugin_set.c | 379 +++ + fs/reiser4/plugin/plugin_set.h | 77 + + fs/reiser4/plugin/security/Makefile | 4 + + fs/reiser4/plugin/security/perm.c | 44 + + fs/reiser4/plugin/security/perm.h | 82 + + fs/reiser4/plugin/space/Makefile | 4 + + fs/reiser4/plugin/space/bitmap.c | 1585 +++++++++++ + fs/reiser4/plugin/space/bitmap.h | 47 + + fs/reiser4/plugin/space/space_allocator.h | 80 + + fs/reiser4/plugin/tail_policy.c | 113 + + fs/reiser4/pool.c | 234 ++ + fs/reiser4/pool.h | 55 + + fs/reiser4/readahead.c | 138 + + fs/reiser4/readahead.h | 48 + + fs/reiser4/reiser4.h | 269 ++ + fs/reiser4/safe_link.c | 351 +++ + fs/reiser4/safe_link.h | 29 + + fs/reiser4/seal.c | 218 ++ + fs/reiser4/seal.h | 49 + + fs/reiser4/search.c | 1611 +++++++++++ + fs/reiser4/status_flags.c | 175 ++ + fs/reiser4/status_flags.h | 43 + + fs/reiser4/super.c | 316 +++ + fs/reiser4/super.h | 464 +++ + fs/reiser4/super_ops.c | 730 +++++ + fs/reiser4/tap.c | 377 +++ + fs/reiser4/tap.h | 70 + + fs/reiser4/tree.c | 1876 ++++++++++++ + fs/reiser4/tree.h | 577 ++++ + fs/reiser4/tree_mod.c | 386 +++ + fs/reiser4/tree_mod.h | 29 + + fs/reiser4/tree_walk.c | 927 ++++++ + fs/reiser4/tree_walk.h | 125 + + fs/reiser4/txnmgr.c | 3164 +++++++++++++++++++++ + fs/reiser4/txnmgr.h | 708 +++++ + fs/reiser4/type_safe_hash.h | 320 +++ + fs/reiser4/vfs_ops.c | 259 ++ + fs/reiser4/vfs_ops.h | 53 + + fs/reiser4/wander.c | 1797 ++++++++++++ + fs/reiser4/wander.h | 135 + + fs/reiser4/writeout.h | 21 + + fs/reiser4/znode.c | 1029 +++++++ + fs/reiser4/znode.h | 434 +++ + include/linux/fs.h | 3 + + lib/radix-tree.c | 1 + + mm/filemap.c | 5 + + mm/readahead.c | 1 + + 175 files changed, 79830 insertions(+), 10 deletions(-) + +diff --git a/Documentation/Changes b/Documentation/Changes +index 73a8617..49ee889 100644 +--- a/Documentation/Changes ++++ b/Documentation/Changes +@@ -36,6 +36,7 @@ o module-init-tools 0.9.10 # depmod -V + o e2fsprogs 1.29 # tune2fs + o jfsutils 1.1.3 # fsck.jfs -V + o reiserfsprogs 3.6.3 # reiserfsck -V 2>&1|grep reiserfsprogs ++o reiser4progs 1.0.0 # fsck.reiser4 -V + o xfsprogs 2.6.0 # xfs_db -V + o pcmciautils 004 # pccardctl -V + o quota-tools 3.09 # quota -V +@@ -144,6 +145,13 @@ The reiserfsprogs package should be used for reiserfs-3.6.x + versions of mkreiserfs, resize_reiserfs, debugreiserfs and + reiserfsck. These utils work on both i386 and alpha platforms. + ++Reiser4progs ++------------ ++ ++The reiser4progs package contains utilities for the reiser4 file system. ++Detailed instructions are provided in the README file located at: ++. ++ + Xfsprogs + -------- + +@@ -322,6 +330,10 @@ Reiserfsprogs + ------------- + o + ++Reiser4progs ++------------ ++o ++ + Xfsprogs + -------- + o +diff --git a/Documentation/filesystems/reiser4.txt b/Documentation/filesystems/reiser4.txt +new file mode 100644 +index 0000000..8e07c9e +--- /dev/null ++++ b/Documentation/filesystems/reiser4.txt +@@ -0,0 +1,75 @@ ++Reiser4 filesystem ++================== ++Reiser4 is a file system based on dancing tree algorithms, and is ++described at http://www.namesys.com ++ ++ ++References ++========== ++web page http://namesys.com/v4/v4.html ++source code ftp://ftp.namesys.com/pub/reiser4-for-2.6/ ++userland tools ftp://ftp.namesys.com/pub/reiser4progs/ ++install page http://www.namesys.com/install_v4.html ++ ++Compile options ++=============== ++Enable reiser4 debug mode ++ This checks everything imaginable while reiser4 ++ runs ++ ++Mount options ++============= ++tmgr.atom_max_size=N ++ Atoms containing more than N blocks will be forced to commit. ++ N is decimal. ++ Default is nr_free_pagecache_pages() / 2 at mount time. ++ ++tmgr.atom_max_age=N ++ Atoms older than N seconds will be forced to commit. N is decimal. ++ Default is 600. ++ ++tmgr.atom_max_flushers=N ++ Limit of concurrent flushers for one atom. 0 means no limit. ++ Default is 0. ++ ++tree.cbk_cache.nr_slots=N ++ Number of slots in the cbk cache. ++ ++flush.relocate_threshold=N ++ If flush finds more than N adjacent dirty leaf-level blocks it ++ will force them to be relocated. ++ Default is 64. ++ ++flush.relocate_distance=N ++ If flush finds can find a block allocation closer than at most ++ N from the preceder it will relocate to that position. ++ Default is 64. ++ ++flush.scan_maxnodes=N ++ The maximum number of nodes to scan left on a level during ++ flush. ++ Default is 10000. ++ ++optimal_io_size=N ++ Preferred IO size. This value is used to set st_blksize of ++ struct stat. ++ Default is 65536. ++ ++bsdgroups ++ Turn on BSD-style gid assignment. ++ ++32bittimes ++ By default file in reiser4 have 64 bit timestamps. Files ++ created when filesystem is mounted with 32bittimes mount ++ option will get 32 bit timestamps. ++ ++mtflush ++ Turn off concurrent flushing. ++ ++nopseudo ++ Disable pseudo files support. See ++ http://namesys.com/v4/pseudo.html for more about pseudo files. ++ ++dont_load_bitmap ++ Don't load all bitmap blocks at mount time, it is useful for ++ machines with tiny RAM and large disks. +diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c +index d22cfc9..bb4a75a 100644 +--- a/arch/i386/lib/usercopy.c ++++ b/arch/i386/lib/usercopy.c +@@ -812,6 +812,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, + #endif + return n; + } ++EXPORT_SYMBOL(__copy_from_user_ll_nocache); + + unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, + unsigned long n) +@@ -827,6 +828,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr + #endif + return n; + } ++EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); + + /** + * copy_to_user: - Copy a block of data into user space. +diff --git a/fs/Kconfig b/fs/Kconfig +index 8cd2417..5a97039 100644 +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -272,6 +272,8 @@ config FS_MBCACHE + default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y + default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m + ++source "fs/reiser4/Kconfig" ++ + config REISERFS_FS + tristate "Reiserfs support" + help +diff --git a/fs/Makefile b/fs/Makefile +index b9ffa63..b4c08ce 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -62,6 +62,7 @@ obj-$(CONFIG_DLM) += dlm/ + + # Do not add any filesystems before this line + obj-$(CONFIG_REISERFS_FS) += reiserfs/ ++obj-$(CONFIG_REISER4_FS) += reiser4/ + obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 + obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev + obj-$(CONFIG_JBD) += jbd/ +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index a4b142a..cdcff8c 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -296,8 +296,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) + * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so + * that it can be located for waiting on in __writeback_single_inode(). + * +- * Called under inode_lock. +- * + * If `bdi' is non-zero then we're being asked to writeback a specific queue. + * This function assumes that the blockdev superblock's inodes are backed by + * a variety of queues, so all inodes are searched. For other superblocks, +@@ -313,11 +311,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) + * on the writer throttling path, and we get decent balancing between many + * throttled threads: we don't want them all piling up on __wait_on_inode. + */ +-static void +-sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) ++void ++generic_sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) + { + const unsigned long start = jiffies; /* livelock avoidance */ + ++ spin_lock(&inode_lock); ++ + if (!wbc->for_kupdate || list_empty(&sb->s_io)) + list_splice_init(&sb->s_dirty, &sb->s_io); + +@@ -397,8 +397,19 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) + if (wbc->nr_to_write <= 0) + break; + } ++ spin_unlock(&inode_lock); + return; /* Leave any unwritten inodes on s_io */ + } ++EXPORT_SYMBOL(generic_sync_sb_inodes); ++ ++static void ++sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) ++{ ++ if (sb->s_op->sync_inodes) ++ sb->s_op->sync_inodes(sb, wbc); ++ else ++ generic_sync_sb_inodes(sb, wbc); ++} + + /* + * Start writeback of dirty pagecache data against all unlocked inodes. +@@ -439,11 +450,8 @@ restart: + * be unmounted by the time it is released. + */ + if (down_read_trylock(&sb->s_umount)) { +- if (sb->s_root) { +- spin_lock(&inode_lock); ++ if (sb->s_root) + sync_sb_inodes(sb, wbc); +- spin_unlock(&inode_lock); +- } + up_read(&sb->s_umount); + } + spin_lock(&sb_lock); +@@ -481,9 +489,7 @@ void sync_inodes_sb(struct super_block *sb, int wait) + (inodes_stat.nr_inodes - inodes_stat.nr_unused) + + nr_dirty + nr_unstable; + wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ +- spin_lock(&inode_lock); + sync_sb_inodes(sb, &wbc); +- spin_unlock(&inode_lock); + } + + /* +diff --git a/fs/reiser4/Kconfig b/fs/reiser4/Kconfig +new file mode 100644 +index 0000000..f6e5195 +--- /dev/null ++++ b/fs/reiser4/Kconfig +@@ -0,0 +1,32 @@ ++config REISER4_FS ++ tristate "Reiser4 (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ select ZLIB_INFLATE ++ select ZLIB_DEFLATE ++ select CRYPTO ++ help ++ Reiser4 is a filesystem that performs all filesystem operations ++ as atomic transactions, which means that it either performs a ++ write, or it does not, and in the event of a crash it does not ++ partially perform it or corrupt it. ++ ++ It stores files in dancing trees, which are like balanced trees but ++ faster. It packs small files together so that they share blocks ++ without wasting space. This means you can use it to store really ++ small files. It also means that it saves you disk space. It avoids ++ hassling you with anachronisms like having a maximum number of ++ inodes, and wasting space if you use less than that number. ++ ++ Reiser4 is a distinct filesystem type from reiserfs (V3). ++ It's therefore not possible to use reiserfs file systems ++ with reiser4. ++ ++ To learn more about reiser4, go to http://www.namesys.com ++ ++config REISER4_DEBUG ++ bool "Enable reiser4 debug mode" ++ depends on REISER4_FS ++ help ++ Don't use this unless you are debugging reiser4. ++ ++ If unsure, say N. +diff --git a/fs/reiser4/Makefile b/fs/reiser4/Makefile +new file mode 100644 +index 0000000..e78441e +--- /dev/null ++++ b/fs/reiser4/Makefile +@@ -0,0 +1,99 @@ ++# ++# reiser4/Makefile ++# ++ ++obj-$(CONFIG_REISER4_FS) += reiser4.o ++ ++reiser4-y := \ ++ debug.o \ ++ jnode.o \ ++ znode.o \ ++ key.o \ ++ pool.o \ ++ tree_mod.o \ ++ estimate.o \ ++ carry.o \ ++ carry_ops.o \ ++ lock.o \ ++ tree.o \ ++ context.o \ ++ tap.o \ ++ coord.o \ ++ block_alloc.o \ ++ txnmgr.o \ ++ kassign.o \ ++ flush.o \ ++ wander.o \ ++ eottl.o \ ++ search.o \ ++ page_cache.o \ ++ seal.o \ ++ dscale.o \ ++ flush_queue.o \ ++ ktxnmgrd.o \ ++ blocknrset.o \ ++ super.o \ ++ super_ops.o \ ++ fsdata.o \ ++ export_ops.o \ ++ oid.o \ ++ tree_walk.o \ ++ inode.o \ ++ vfs_ops.o \ ++ as_ops.o \ ++ entd.o\ ++ readahead.o \ ++ status_flags.o \ ++ init_super.o \ ++ safe_link.o \ ++ \ ++ plugin/plugin.o \ ++ plugin/plugin_set.o \ ++ plugin/node/node.o \ ++ plugin/object.o \ ++ plugin/cluster.o \ ++ plugin/inode_ops.o \ ++ plugin/inode_ops_rename.o \ ++ plugin/file_ops.o \ ++ plugin/file_ops_readdir.o \ ++ plugin/file_plugin_common.o \ ++ plugin/file/file.o \ ++ plugin/file/tail_conversion.o \ ++ plugin/file/file_conversion.o \ ++ plugin/file/symlink.o \ ++ plugin/file/cryptcompress.o \ ++ plugin/dir_plugin_common.o \ ++ plugin/dir/hashed_dir.o \ ++ plugin/dir/seekable_dir.o \ ++ plugin/node/node40.o \ ++ \ ++ plugin/crypto/cipher.o \ ++ plugin/crypto/digest.o \ ++ \ ++ plugin/compress/minilzo.o \ ++ plugin/compress/compress.o \ ++ plugin/compress/compress_mode.o \ ++ \ ++ plugin/item/static_stat.o \ ++ plugin/item/sde.o \ ++ plugin/item/cde.o \ ++ plugin/item/blackbox.o \ ++ plugin/item/internal.o \ ++ plugin/item/tail.o \ ++ plugin/item/ctail.o \ ++ plugin/item/extent.o \ ++ plugin/item/extent_item_ops.o \ ++ plugin/item/extent_file_ops.o \ ++ plugin/item/extent_flush_ops.o \ ++ \ ++ plugin/hash.o \ ++ plugin/fibration.o \ ++ plugin/tail_policy.o \ ++ plugin/item/item.o \ ++ \ ++ plugin/security/perm.o \ ++ plugin/space/bitmap.o \ ++ \ ++ plugin/disk_format/disk_format40.o \ ++ plugin/disk_format/disk_format.o ++ +diff --git a/fs/reiser4/README b/fs/reiser4/README +new file mode 100644 +index 0000000..4637f59 +--- /dev/null ++++ b/fs/reiser4/README +@@ -0,0 +1,125 @@ ++[LICENSING] ++ ++Reiser4 is hereby licensed under the GNU General ++Public License version 2. ++ ++Source code files that contain the phrase "licensing governed by ++reiser4/README" are "governed files" throughout this file. Governed ++files are licensed under the GPL. The portions of them owned by Hans ++Reiser, or authorized to be licensed by him, have been in the past, ++and likely will be in the future, licensed to other parties under ++other licenses. If you add your code to governed files, and don't ++want it to be owned by Hans Reiser, put your copyright label on that ++code so the poor blight and his customers can keep things straight. ++All portions of governed files not labeled otherwise are owned by Hans ++Reiser, and by adding your code to it, widely distributing it to ++others or sending us a patch, and leaving the sentence in stating that ++licensing is governed by the statement in this file, you accept this. ++It will be a kindness if you identify whether Hans Reiser is allowed ++to license code labeled as owned by you on your behalf other than ++under the GPL, because he wants to know if it is okay to do so and put ++a check in the mail to you (for non-trivial improvements) when he ++makes his next sale. He makes no guarantees as to the amount if any, ++though he feels motivated to motivate contributors, and you can surely ++discuss this with him before or after contributing. You have the ++right to decline to allow him to license your code contribution other ++than under the GPL. ++ ++Further licensing options are available for commercial and/or other ++interests directly from Hans Reiser: reiser@namesys.com. If you interpret ++the GPL as not allowing those additional licensing options, you read ++it wrongly, and Richard Stallman agrees with me, when carefully read ++you can see that those restrictions on additional terms do not apply ++to the owner of the copyright, and my interpretation of this shall ++govern for this license. ++ ++[END LICENSING] ++ ++Reiser4 is a file system based on dancing tree algorithms, and is ++described at http://www.namesys.com ++ ++mkfs.reiser4 and other utilities are on our webpage or wherever your ++Linux provider put them. You really want to be running the latest ++version off the website if you use fsck. ++ ++Yes, if you update your reiser4 kernel module you do have to ++recompile your kernel, most of the time. The errors you get will be ++quite cryptic if your forget to do so. ++ ++Hideous Commercial Pitch: Spread your development costs across other OS ++vendors. Select from the best in the world, not the best in your ++building, by buying from third party OS component suppliers. Leverage ++the software component development power of the internet. Be the most ++aggressive in taking advantage of the commercial possibilities of ++decentralized internet development, and add value through your branded ++integration that you sell as an operating system. Let your competitors ++be the ones to compete against the entire internet by themselves. Be ++hip, get with the new economic trend, before your competitors do. Send ++email to reiser@namesys.com ++ ++Hans Reiser was the primary architect of Reiser4, but a whole team ++chipped their ideas in. He invested everything he had into Namesys ++for 5.5 dark years of no money before Reiser3 finally started to work well ++enough to bring in money. He owns the copyright. ++ ++DARPA was the primary sponsor of Reiser4. DARPA does not endorse ++Reiser4, it merely sponsors it. DARPA is, in solely Hans's personal ++opinion, unique in its willingness to invest into things more ++theoretical than the VC community can readily understand, and more ++longterm than allows them to be sure that they will be the ones to ++extract the economic benefits from. DARPA also integrated us into a ++security community that transformed our security worldview. ++ ++Vladimir Saveliev is our lead programmer, with us from the beginning, ++and he worked long hours writing the cleanest code. This is why he is ++now the lead programmer after years of commitment to our work. He ++always made the effort to be the best he could be, and to make his ++code the best that it could be. What resulted was quite remarkable. I ++don't think that money can ever motivate someone to work the way he ++did, he is one of the most selfless men I know. ++ ++Alexander Lyamin was our sysadmin, and helped to educate us in ++security issues. Moscow State University and IMT were very generous ++in the internet access they provided us, and in lots of other little ++ways that a generous institution can be. ++ ++Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the ++locking code, the block allocator, and finished the flushing code. ++His code is always crystal clean and well structured. ++ ++Nikita Danilov wrote the core of the balancing code, the core of the ++plugins code, and the directory code. He worked a steady pace of long ++hours that produced a whole lot of well abstracted code. He is our ++senior computer scientist. ++ ++Vladimir Demidov wrote the parser. Writing an in kernel parser is ++something very few persons have the skills for, and it is thanks to ++him that we can say that the parser is really not so big compared to ++various bits of our other code, and making a parser work in the kernel ++was not so complicated as everyone would imagine mainly because it was ++him doing it... ++ ++Joshua McDonald wrote the transaction manager, and the flush code. ++The flush code unexpectedly turned out be extremely hairy for reasons ++you can read about on our web page, and he did a great job on an ++extremely difficult task. ++ ++Nina Reiser handled our accounting, government relations, and much ++more. ++ ++Ramon Reiser developed our website. ++ ++Beverly Palmer drew our graphics. ++ ++Vitaly Fertman developed librepair, userspace plugins repair code, fsck ++and worked with Umka on developing libreiser4 and userspace plugins. ++ ++Yury Umanets (aka Umka) developed libreiser4, userspace plugins and ++userspace tools (reiser4progs). ++ ++Oleg Drokin (aka Green) is the release manager who fixes everything. ++It is so nice to have someone like that on the team. He (plus Chris ++and Jeff) make it possible for the entire rest of the Namesys team to ++focus on Reiser4, and he fixed a whole lot of Reiser4 bugs also. It ++is just amazing to watch his talent for spotting bugs in action. ++ +diff --git a/fs/reiser4/as_ops.c b/fs/reiser4/as_ops.c +new file mode 100644 +index 0000000..b4f3375 +--- /dev/null ++++ b/fs/reiser4/as_ops.c +@@ -0,0 +1,339 @@ ++/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Interface to VFS. Reiser4 address_space_operations are defined here. */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "coord.h" ++#include "plugin/item/item.h" ++#include "plugin/file/file.h" ++#include "plugin/security/perm.h" ++#include "plugin/disk_format/disk_format.h" ++#include "plugin/plugin.h" ++#include "plugin/plugin_set.h" ++#include "plugin/object.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree.h" ++#include "vfs_ops.h" ++#include "inode.h" ++#include "page_cache.h" ++#include "ktxnmgrd.h" ++#include "super.h" ++#include "reiser4.h" ++#include "entd.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* address space operations */ ++ ++/** ++ * reiser4_set_page_dirty - set dirty bit, tag in page tree, dirty accounting ++ * @page: page to be dirtied ++ * ++ * Operation of struct address_space_operations. This implementation is used by ++ * unix and cryptcompress file plugins. ++ * ++ * This is called when reiser4 page gets dirtied outside of reiser4, for ++ * example, when dirty bit is moved from pte to physical page. ++ * ++ * Tags page in the mapping's page tree with special tag so that it is possible ++ * to do all the reiser4 specific work wrt dirty pages (jnode creation, ++ * capturing by an atom) later because it can not be done in the contexts where ++ * set_page_dirty is called. ++ */ ++int reiser4_set_page_dirty(struct page *page) ++{ ++ /* this page can be unformatted only */ ++ assert("vs-1734", (page->mapping && ++ page->mapping->host && ++ reiser4_get_super_fake(page->mapping->host->i_sb) != ++ page->mapping->host ++ && reiser4_get_cc_fake(page->mapping->host->i_sb) != ++ page->mapping->host ++ && reiser4_get_bitmap_fake(page->mapping->host->i_sb) != ++ page->mapping->host)); ++ ++ if (!TestSetPageDirty(page)) { ++ struct address_space *mapping = page->mapping; ++ ++ if (mapping) { ++ write_lock_irq(&mapping->tree_lock); ++ ++ /* check for race with truncate */ ++ if (page->mapping) { ++ assert("vs-1652", page->mapping == mapping); ++ if (mapping_cap_account_dirty(mapping)) ++ inc_zone_page_state(page, ++ NR_FILE_DIRTY); ++ radix_tree_tag_set(&mapping->page_tree, ++ page->index, ++ PAGECACHE_TAG_REISER4_MOVED); ++ } ++ write_unlock_irq(&mapping->tree_lock); ++ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); ++ } ++ } ++ return 0; ++} ++ ++/* ->invalidatepage method for reiser4 */ ++ ++/* ++ * this is called for each truncated page from ++ * truncate_inode_pages()->truncate_{complete,partial}_page(). ++ * ++ * At the moment of call, page is under lock, and outstanding io (if any) has ++ * completed. ++ */ ++ ++/** ++ * reiser4_invalidatepage ++ * @page: page to invalidate ++ * @offset: starting offset for partial invalidation ++ * ++ */ ++void reiser4_invalidatepage(struct page *page, unsigned long offset) ++{ ++ int ret = 0; ++ reiser4_context *ctx; ++ struct inode *inode; ++ jnode *node; ++ ++ /* ++ * This is called to truncate file's page. ++ * ++ * Originally, reiser4 implemented truncate in a standard way ++ * (vmtruncate() calls ->invalidatepage() on all truncated pages ++ * first, then file system ->truncate() call-back is invoked). ++ * ++ * This lead to the problem when ->invalidatepage() was called on a ++ * page with jnode that was captured into atom in ASTAGE_PRE_COMMIT ++ * process. That is, truncate was bypassing transactions. To avoid ++ * this, try_capture_page_to_invalidate() call was added here. ++ * ++ * After many troubles with vmtruncate() based truncate (including ++ * races with flush, tail conversion, etc.) it was re-written in the ++ * top-to-bottom style: items are killed in reiser4_cut_tree_object() ++ * and pages belonging to extent are invalidated in kill_hook_extent(). ++ * So probably now additional call to capture is not needed here. ++ */ ++ ++ assert("nikita-3137", PageLocked(page)); ++ assert("nikita-3138", !PageWriteback(page)); ++ inode = page->mapping->host; ++ ++ /* ++ * ->invalidatepage() should only be called for the unformatted ++ * jnodes. Destruction of all other types of jnodes is performed ++ * separately. But, during some corner cases (like handling errors ++ * during mount) it is simpler to let ->invalidatepage to be called on ++ * them. Check for this, and do nothing. ++ */ ++ if (reiser4_get_super_fake(inode->i_sb) == inode) ++ return; ++ if (reiser4_get_cc_fake(inode->i_sb) == inode) ++ return; ++ if (reiser4_get_bitmap_fake(inode->i_sb) == inode) ++ return; ++ assert("vs-1426", PagePrivate(page)); ++ assert("vs-1427", ++ page->mapping == jnode_get_mapping(jnode_by_page(page))); ++ assert("", jprivate(page) != NULL); ++ assert("", ergo(inode_file_plugin(inode) != ++ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID), ++ offset == 0)); ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return; ++ ++ node = jprivate(page); ++ spin_lock_jnode(node); ++ if (!(node->state & ((1 << JNODE_DIRTY) | (1<< JNODE_FLUSH_QUEUED) | ++ (1 << JNODE_WRITEBACK) | (1 << JNODE_OVRWR)))) { ++ /* there is not need to capture */ ++ jref(node); ++ JF_SET(node, JNODE_HEARD_BANSHEE); ++ page_clear_jnode(page, node); ++ reiser4_uncapture_jnode(node); ++ unhash_unformatted_jnode(node); ++ jput(node); ++ reiser4_exit_context(ctx); ++ return; ++ } ++ spin_unlock_jnode(node); ++ ++ /* capture page being truncated. */ ++ ret = try_capture_page_to_invalidate(page); ++ if (ret != 0) ++ warning("nikita-3141", "Cannot capture: %i", ret); ++ ++ if (offset == 0) { ++ /* remove jnode from transaction and detach it from page. */ ++ jref(node); ++ JF_SET(node, JNODE_HEARD_BANSHEE); ++ /* page cannot be detached from jnode concurrently, because it ++ * is locked */ ++ reiser4_uncapture_page(page); ++ ++ /* this detaches page from jnode, so that jdelete will not try ++ * to lock page which is already locked */ ++ spin_lock_jnode(node); ++ page_clear_jnode(page, node); ++ spin_unlock_jnode(node); ++ unhash_unformatted_jnode(node); ++ ++ jput(node); ++ } ++ ++ reiser4_exit_context(ctx); ++} ++ ++/* help function called from reiser4_releasepage(). It returns true if jnode ++ * can be detached from its page and page released. */ ++int jnode_is_releasable(jnode * node /* node to check */ ) ++{ ++ assert("nikita-2781", node != NULL); ++ assert_spin_locked(&(node->guard)); ++ assert_spin_locked(&(node->load)); ++ ++ /* is some thread is currently using jnode page, later cannot be ++ * detached */ ++ if (atomic_read(&node->d_count) != 0) { ++ return 0; ++ } ++ ++ assert("vs-1214", !jnode_is_loaded(node)); ++ ++ /* ++ * can only release page if real block number is assigned to it. Simple ++ * check for ->atom wouldn't do, because it is possible for node to be ++ * clean, not it atom yet, and still having fake block number. For ++ * example, node just created in jinit_new(). ++ */ ++ if (reiser4_blocknr_is_fake(jnode_get_block(node))) ++ return 0; ++ ++ /* ++ * pages prepared for write can not be released anyway, so avoid ++ * detaching jnode from the page ++ */ ++ if (JF_ISSET(node, JNODE_WRITE_PREPARED)) ++ return 0; ++ ++ /* ++ * dirty jnode cannot be released. It can however be submitted to disk ++ * as part of early flushing, but only after getting flush-prepped. ++ */ ++ if (JF_ISSET(node, JNODE_DIRTY)) ++ return 0; ++ ++ /* overwrite set is only written by log writer. */ ++ if (JF_ISSET(node, JNODE_OVRWR)) ++ return 0; ++ ++ /* jnode is already under writeback */ ++ if (JF_ISSET(node, JNODE_WRITEBACK)) ++ return 0; ++ ++ /* don't flush bitmaps or journal records */ ++ if (!jnode_is_znode(node) && !jnode_is_unformatted(node)) ++ return 0; ++ ++ return 1; ++} ++ ++/* ++ * ->releasepage method for reiser4 ++ * ++ * This is called by VM scanner when it comes across clean page. What we have ++ * to do here is to check whether page can really be released (freed that is) ++ * and if so, detach jnode from it and remove page from the page cache. ++ * ++ * Check for releasability is done by releasable() function. ++ */ ++int reiser4_releasepage(struct page *page, gfp_t gfp UNUSED_ARG) ++{ ++ jnode *node; ++ ++ assert("nikita-2257", PagePrivate(page)); ++ assert("nikita-2259", PageLocked(page)); ++ assert("nikita-2892", !PageWriteback(page)); ++ assert("nikita-3019", reiser4_schedulable()); ++ ++ /* NOTE-NIKITA: this can be called in the context of reiser4 call. It ++ is not clear what to do in this case. A lot of deadlocks seems be ++ possible. */ ++ if (page_count(page) > 3) ++ return 0; ++ ++ node = jnode_by_page(page); ++ assert("nikita-2258", node != NULL); ++ assert("reiser4-4", page->mapping != NULL); ++ assert("reiser4-5", page->mapping->host != NULL); ++ ++ if (PageDirty(page)) ++ return 0; ++ ++ /* extra page reference is used by reiser4 to protect ++ * jnode<->page link from this ->releasepage(). */ ++ if (page_count(page) > 3) ++ return 0; ++ ++ /* releasable() needs jnode lock, because it looks at the jnode fields ++ * and we need jload_lock here to avoid races with jload(). */ ++ spin_lock_jnode(node); ++ spin_lock(&(node->load)); ++ if (jnode_is_releasable(node)) { ++ struct address_space *mapping; ++ ++ mapping = page->mapping; ++ jref(node); ++ /* there is no need to synchronize against ++ * jnode_extent_write() here, because pages seen by ++ * jnode_extent_write() are !releasable(). */ ++ page_clear_jnode(page, node); ++ spin_unlock(&(node->load)); ++ spin_unlock_jnode(node); ++ ++ /* we are under memory pressure so release jnode also. */ ++ jput(node); ++ ++ return 1; ++ } else { ++ spin_unlock(&(node->load)); ++ spin_unlock_jnode(node); ++ assert("nikita-3020", reiser4_schedulable()); ++ return 0; ++ } ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/block_alloc.c b/fs/reiser4/block_alloc.c +new file mode 100644 +index 0000000..c405c5f +--- /dev/null ++++ b/fs/reiser4/block_alloc.c +@@ -0,0 +1,1137 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "debug.h" ++#include "dformat.h" ++#include "plugin/plugin.h" ++#include "txnmgr.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree.h" ++#include "super.h" ++ ++#include /* for __u?? */ ++#include /* for struct super_block */ ++#include ++ ++/* THE REISER4 DISK SPACE RESERVATION SCHEME. */ ++ ++/* We need to be able to reserve enough disk space to ensure that an atomic ++ operation will have enough disk space to flush (see flush.c and ++ http://namesys.com/v4/v4.html) and commit it once it is started. ++ ++ In our design a call for reserving disk space may fail but not an actual ++ block allocation. ++ ++ All free blocks, already allocated blocks, and all kinds of reserved blocks ++ are counted in different per-fs block counters. ++ ++ A reiser4 super block's set of block counters currently is: ++ ++ free -- free blocks, ++ used -- already allocated blocks, ++ ++ grabbed -- initially reserved for performing an fs operation, those blocks ++ are taken from free blocks, then grabbed disk space leaks from grabbed ++ blocks counter to other counters like "fake allocated", "flush ++ reserved", "used", the rest of not used grabbed space is returned to ++ free space at the end of fs operation; ++ ++ fake allocated -- counts all nodes without real disk block numbers assigned, ++ we have separate accounting for formatted and unformatted ++ nodes (for easier debugging); ++ ++ flush reserved -- disk space needed for flushing and committing an atom. ++ Each dirty already allocated block could be written as a ++ part of atom's overwrite set or as a part of atom's ++ relocate set. In both case one additional block is needed, ++ it is used as a wandered block if we do overwrite or as a ++ new location for a relocated block. ++ ++ In addition, blocks in some states are counted on per-thread and per-atom ++ basis. A reiser4 context has a counter of blocks grabbed by this transaction ++ and the sb's grabbed blocks counter is a sum of grabbed blocks counter values ++ of each reiser4 context. Each reiser4 atom has a counter of "flush reserved" ++ blocks, which are reserved for flush processing and atom commit. */ ++ ++/* AN EXAMPLE: suppose we insert new item to the reiser4 tree. We estimate ++ number of blocks to grab for most expensive case of balancing when the leaf ++ node we insert new item to gets split and new leaf node is allocated. ++ ++ So, we need to grab blocks for ++ ++ 1) one block for possible dirtying the node we insert an item to. That block ++ would be used for node relocation at flush time or for allocating of a ++ wandered one, it depends what will be a result (what set, relocate or ++ overwrite the node gets assigned to) of the node processing by the flush ++ algorithm. ++ ++ 2) one block for either allocating a new node, or dirtying of right or left ++ clean neighbor, only one case may happen. ++ ++ VS-FIXME-HANS: why can only one case happen? I would expect to see dirtying of left neighbor, right neighbor, current ++ node, and creation of new node. have I forgotten something? email me. ++ ++ These grabbed blocks are counted in both reiser4 context "grabbed blocks" ++ counter and in the fs-wide one (both ctx->grabbed_blocks and ++ sbinfo->blocks_grabbed get incremented by 2), sb's free blocks counter is ++ decremented by 2. ++ ++ Suppose both two blocks were spent for dirtying of an already allocated clean ++ node (one block went from "grabbed" to "flush reserved") and for new block ++ allocating (one block went from "grabbed" to "fake allocated formatted"). ++ ++ Inserting of a child pointer to the parent node caused parent node to be ++ split, the balancing code takes care about this grabbing necessary space ++ immediately by calling reiser4_grab with BA_RESERVED flag set which means ++ "can use the 5% reserved disk space". ++ ++ At this moment insertion completes and grabbed blocks (if they were not used) ++ should be returned to the free space counter. ++ ++ However the atom life-cycle is not completed. The atom had one "flush ++ reserved" block added by our insertion and the new fake allocated node is ++ counted as a "fake allocated formatted" one. The atom has to be fully ++ processed by flush before commit. Suppose that the flush moved the first, ++ already allocated node to the atom's overwrite list, the new fake allocated ++ node, obviously, went into the atom relocate set. The reiser4 flush ++ allocates the new node using one unit from "fake allocated formatted" ++ counter, the log writer uses one from "flush reserved" for wandered block ++ allocation. ++ ++ And, it is not the end. When the wandered block is deallocated after the ++ atom gets fully played (see wander.c for term description), the disk space ++ occupied for it is returned to free blocks. */ ++ ++/* BLOCK NUMBERS */ ++ ++/* Any reiser4 node has a block number assigned to it. We use these numbers for ++ indexing in hash tables, so if a block has not yet been assigned a location ++ on disk we need to give it a temporary fake block number. ++ ++ Current implementation of reiser4 uses 64-bit integers for block numbers. We ++ use highest bit in 64-bit block number to distinguish fake and real block ++ numbers. So, only 63 bits may be used to addressing of real device ++ blocks. That "fake" block numbers space is divided into subspaces of fake ++ block numbers for data blocks and for shadow (working) bitmap blocks. ++ ++ Fake block numbers for data blocks are generated by a cyclic counter, which ++ gets incremented after each real block allocation. We assume that it is ++ impossible to overload this counter during one transaction life. */ ++ ++/* Initialize a blocknr hint. */ ++void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint) ++{ ++ memset(hint, 0, sizeof(reiser4_blocknr_hint)); ++} ++ ++/* Release any resources of a blocknr hint. */ ++void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint UNUSED_ARG) ++{ ++ /* No resources should be freed in current blocknr_hint implementation. */ ++} ++ ++/* see above for explanation of fake block number. */ ++/* Audited by: green(2002.06.11) */ ++int reiser4_blocknr_is_fake(const reiser4_block_nr * da) ++{ ++ /* The reason for not simply returning result of '&' operation is that ++ while return value is (possibly 32bit) int, the reiser4_block_nr is ++ at least 64 bits long, and high bit (which is the only possible ++ non zero bit after the masking) would be stripped off */ ++ return (*da & REISER4_FAKE_BLOCKNR_BIT_MASK) ? 1 : 0; ++} ++ ++/* Static functions for / block counters ++ arithmetic. Mostly, they are isolated to not to code same assertions in ++ several places. */ ++static void sub_from_ctx_grabbed(reiser4_context * ctx, __u64 count) ++{ ++ BUG_ON(ctx->grabbed_blocks < count); ++ assert("zam-527", ctx->grabbed_blocks >= count); ++ ctx->grabbed_blocks -= count; ++} ++ ++static void add_to_ctx_grabbed(reiser4_context * ctx, __u64 count) ++{ ++ ctx->grabbed_blocks += count; ++} ++ ++static void sub_from_sb_grabbed(reiser4_super_info_data * sbinfo, __u64 count) ++{ ++ assert("zam-525", sbinfo->blocks_grabbed >= count); ++ sbinfo->blocks_grabbed -= count; ++} ++ ++/* Decrease the counter of block reserved for flush in super block. */ ++static void ++sub_from_sb_flush_reserved(reiser4_super_info_data * sbinfo, __u64 count) ++{ ++ assert("vpf-291", sbinfo->blocks_flush_reserved >= count); ++ sbinfo->blocks_flush_reserved -= count; ++} ++ ++static void ++sub_from_sb_fake_allocated(reiser4_super_info_data * sbinfo, __u64 count, ++ reiser4_ba_flags_t flags) ++{ ++ if (flags & BA_FORMATTED) { ++ assert("zam-806", sbinfo->blocks_fake_allocated >= count); ++ sbinfo->blocks_fake_allocated -= count; ++ } else { ++ assert("zam-528", ++ sbinfo->blocks_fake_allocated_unformatted >= count); ++ sbinfo->blocks_fake_allocated_unformatted -= count; ++ } ++} ++ ++static void sub_from_sb_used(reiser4_super_info_data * sbinfo, __u64 count) ++{ ++ assert("zam-530", ++ sbinfo->blocks_used >= count + sbinfo->min_blocks_used); ++ sbinfo->blocks_used -= count; ++} ++ ++static void ++sub_from_cluster_reserved(reiser4_super_info_data * sbinfo, __u64 count) ++{ ++ assert("edward-501", sbinfo->blocks_clustered >= count); ++ sbinfo->blocks_clustered -= count; ++} ++ ++/* Increase the counter of block reserved for flush in atom. */ ++static void add_to_atom_flush_reserved_nolock(txn_atom * atom, __u32 count) ++{ ++ assert("zam-772", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ atom->flush_reserved += count; ++} ++ ++/* Decrease the counter of block reserved for flush in atom. */ ++static void sub_from_atom_flush_reserved_nolock(txn_atom * atom, __u32 count) ++{ ++ assert("zam-774", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ assert("nikita-2790", atom->flush_reserved >= count); ++ atom->flush_reserved -= count; ++} ++ ++/* super block has 6 counters: free, used, grabbed, fake allocated ++ (formatted and unformatted) and flush reserved. Their sum must be ++ number of blocks on a device. This function checks this */ ++int reiser4_check_block_counters(const struct super_block *super) ++{ ++ __u64 sum; ++ ++ sum = reiser4_grabbed_blocks(super) + reiser4_free_blocks(super) + ++ reiser4_data_blocks(super) + reiser4_fake_allocated(super) + ++ reiser4_fake_allocated_unformatted(super) + reiser4_flush_reserved(super) + ++ reiser4_clustered_blocks(super); ++ if (reiser4_block_count(super) != sum) { ++ printk("super block counters: " ++ "used %llu, free %llu, " ++ "grabbed %llu, fake allocated (formatetd %llu, unformatted %llu), " ++ "reserved %llu, clustered %llu, sum %llu, must be (block count) %llu\n", ++ (unsigned long long)reiser4_data_blocks(super), ++ (unsigned long long)reiser4_free_blocks(super), ++ (unsigned long long)reiser4_grabbed_blocks(super), ++ (unsigned long long)reiser4_fake_allocated(super), ++ (unsigned long long) ++ reiser4_fake_allocated_unformatted(super), ++ (unsigned long long)reiser4_flush_reserved(super), ++ (unsigned long long)reiser4_clustered_blocks(super), ++ (unsigned long long)sum, ++ (unsigned long long)reiser4_block_count(super)); ++ return 0; ++ } ++ return 1; ++} ++ ++/* Adjust "working" free blocks counter for number of blocks we are going to ++ allocate. Record number of grabbed blocks in fs-wide and per-thread ++ counters. This function should be called before bitmap scanning or ++ allocating fake block numbers ++ ++ @super -- pointer to reiser4 super block; ++ @count -- number of blocks we reserve; ++ ++ @return -- 0 if success, -ENOSPC, if all ++ free blocks are preserved or already allocated. ++*/ ++ ++static int ++reiser4_grab(reiser4_context * ctx, __u64 count, reiser4_ba_flags_t flags) ++{ ++ __u64 free_blocks; ++ int ret = 0, use_reserved = flags & BA_RESERVED; ++ reiser4_super_info_data *sbinfo; ++ ++ assert("vs-1276", ctx == get_current_context()); ++ ++ /* Do not grab anything on ro-mounted fs. */ ++ if (rofs_super(ctx->super)) { ++ ctx->grab_enabled = 0; ++ return 0; ++ } ++ ++ sbinfo = get_super_private(ctx->super); ++ ++ spin_lock_reiser4_super(sbinfo); ++ ++ free_blocks = sbinfo->blocks_free; ++ ++ if ((use_reserved && free_blocks < count) || ++ (!use_reserved && free_blocks < count + sbinfo->blocks_reserved)) { ++ ret = RETERR(-ENOSPC); ++ goto unlock_and_ret; ++ } ++ ++ add_to_ctx_grabbed(ctx, count); ++ ++ sbinfo->blocks_grabbed += count; ++ sbinfo->blocks_free -= count; ++ ++#if REISER4_DEBUG ++ if (ctx->grabbed_initially == 0) ++ ctx->grabbed_initially = count; ++#endif ++ ++ assert("nikita-2986", reiser4_check_block_counters(ctx->super)); ++ ++ /* disable grab space in current context */ ++ ctx->grab_enabled = 0; ++ ++ unlock_and_ret: ++ spin_unlock_reiser4_super(sbinfo); ++ ++ return ret; ++} ++ ++int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags) ++{ ++ int ret; ++ reiser4_context *ctx; ++ ++ assert("nikita-2964", ergo(flags & BA_CAN_COMMIT, ++ lock_stack_isclean(get_current_lock_stack ++ ()))); ++ ctx = get_current_context(); ++ if (!(flags & BA_FORCE) && !is_grab_enabled(ctx)) { ++ return 0; ++ } ++ ++ ret = reiser4_grab(ctx, count, flags); ++ if (ret == -ENOSPC) { ++ ++ /* Trying to commit the all transactions if BA_CAN_COMMIT flag present */ ++ if (flags & BA_CAN_COMMIT) { ++ txnmgr_force_commit_all(ctx->super, 0); ++ ctx->grab_enabled = 1; ++ ret = reiser4_grab(ctx, count, flags); ++ } ++ } ++ /* ++ * allocation from reserved pool cannot fail. This is severe error. ++ */ ++ assert("nikita-3005", ergo(flags & BA_RESERVED, ret == 0)); ++ return ret; ++} ++ ++/* ++ * SPACE RESERVED FOR UNLINK/TRUNCATE ++ * ++ * Unlink and truncate require space in transaction (to update stat data, at ++ * least). But we don't want rm(1) to fail with "No space on device" error. ++ * ++ * Solution is to reserve 5% of disk space for truncates and ++ * unlinks. Specifically, normal space grabbing requests don't grab space from ++ * reserved area. Only requests with BA_RESERVED bit in flags are allowed to ++ * drain it. Per super block delete mutex is used to allow only one ++ * thread at a time to grab from reserved area. ++ * ++ * Grabbing from reserved area should always be performed with BA_CAN_COMMIT ++ * flag. ++ * ++ */ ++ ++int reiser4_grab_reserved(struct super_block *super, ++ __u64 count, reiser4_ba_flags_t flags) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(super); ++ ++ assert("nikita-3175", flags & BA_CAN_COMMIT); ++ ++ /* Check the delete mutex already taken by us, we assume that ++ * reading of machine word is atomic. */ ++ if (sbinfo->delete_mutex_owner == current) { ++ if (reiser4_grab_space ++ (count, (flags | BA_RESERVED) & ~BA_CAN_COMMIT)) { ++ warning("zam-1003", ++ "nested call of grab_reserved fails count=(%llu)", ++ (unsigned long long)count); ++ reiser4_release_reserved(super); ++ return RETERR(-ENOSPC); ++ } ++ return 0; ++ } ++ ++ if (reiser4_grab_space(count, flags)) { ++ mutex_lock(&sbinfo->delete_mutex); ++ assert("nikita-2929", sbinfo->delete_mutex_owner == NULL); ++ sbinfo->delete_mutex_owner = current; ++ ++ if (reiser4_grab_space(count, flags | BA_RESERVED)) { ++ warning("zam-833", ++ "reserved space is not enough (%llu)", ++ (unsigned long long)count); ++ reiser4_release_reserved(super); ++ return RETERR(-ENOSPC); ++ } ++ } ++ return 0; ++} ++ ++void reiser4_release_reserved(struct super_block *super) ++{ ++ reiser4_super_info_data *info; ++ ++ info = get_super_private(super); ++ if (info->delete_mutex_owner == current) { ++ info->delete_mutex_owner = NULL; ++ mutex_unlock(&info->delete_mutex); ++ } ++} ++ ++static reiser4_super_info_data *grabbed2fake_allocated_head(int count) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ ctx = get_current_context(); ++ sub_from_ctx_grabbed(ctx, count); ++ ++ sbinfo = get_super_private(ctx->super); ++ spin_lock_reiser4_super(sbinfo); ++ ++ sub_from_sb_grabbed(sbinfo, count); ++ /* return sbinfo locked */ ++ return sbinfo; ++} ++ ++/* is called after @count fake block numbers are allocated and pointer to ++ those blocks are inserted into tree. */ ++static void grabbed2fake_allocated_formatted(void) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = grabbed2fake_allocated_head(1); ++ sbinfo->blocks_fake_allocated++; ++ ++ assert("vs-922", reiser4_check_block_counters(reiser4_get_current_sb())); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++/** ++ * grabbed2fake_allocated_unformatted ++ * @count: ++ * ++ */ ++static void grabbed2fake_allocated_unformatted(int count) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = grabbed2fake_allocated_head(count); ++ sbinfo->blocks_fake_allocated_unformatted += count; ++ ++ assert("vs-9221", reiser4_check_block_counters(reiser4_get_current_sb())); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++void grabbed2cluster_reserved(int count) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ ctx = get_current_context(); ++ sub_from_ctx_grabbed(ctx, count); ++ ++ sbinfo = get_super_private(ctx->super); ++ spin_lock_reiser4_super(sbinfo); ++ ++ sub_from_sb_grabbed(sbinfo, count); ++ sbinfo->blocks_clustered += count; ++ ++ assert("edward-504", reiser4_check_block_counters(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++void cluster_reserved2grabbed(int count) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ ctx = get_current_context(); ++ ++ sbinfo = get_super_private(ctx->super); ++ spin_lock_reiser4_super(sbinfo); ++ ++ sub_from_cluster_reserved(sbinfo, count); ++ sbinfo->blocks_grabbed += count; ++ ++ assert("edward-505", reiser4_check_block_counters(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++ add_to_ctx_grabbed(ctx, count); ++} ++ ++void cluster_reserved2free(int count) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ cluster_reserved2grabbed(count); ++ grabbed2free(ctx, sbinfo, count); ++} ++ ++static DEFINE_SPINLOCK(fake_lock); ++static reiser4_block_nr fake_gen = 0; ++ ++/** ++ * assign_fake_blocknr ++ * @blocknr: ++ * @count: ++ * ++ * Obtain a fake block number for new node which will be used to refer to ++ * this newly allocated node until real allocation is done. ++ */ ++static void assign_fake_blocknr(reiser4_block_nr *blocknr, int count) ++{ ++ spin_lock(&fake_lock); ++ *blocknr = fake_gen; ++ fake_gen += count; ++ spin_unlock(&fake_lock); ++ ++ BUG_ON(*blocknr & REISER4_BLOCKNR_STATUS_BIT_MASK); ++ /**blocknr &= ~REISER4_BLOCKNR_STATUS_BIT_MASK;*/ ++ *blocknr |= REISER4_UNALLOCATED_STATUS_VALUE; ++ assert("zam-394", zlook(current_tree, blocknr) == NULL); ++} ++ ++int assign_fake_blocknr_formatted(reiser4_block_nr * blocknr) ++{ ++ assign_fake_blocknr(blocknr, 1); ++ grabbed2fake_allocated_formatted(); ++ return 0; ++} ++ ++/** ++ * fake_blocknrs_unformatted ++ * @count: number of fake numbers to get ++ * ++ * Allocates @count fake block numbers which will be assigned to jnodes ++ */ ++reiser4_block_nr fake_blocknr_unformatted(int count) ++{ ++ reiser4_block_nr blocknr; ++ ++ assign_fake_blocknr(&blocknr, count); ++ grabbed2fake_allocated_unformatted(count); ++ ++ return blocknr; ++} ++ ++/* adjust sb block counters, if real (on-disk) block allocation immediately ++ follows grabbing of free disk space. */ ++static void grabbed2used(reiser4_context *ctx, reiser4_super_info_data *sbinfo, ++ __u64 count) ++{ ++ sub_from_ctx_grabbed(ctx, count); ++ ++ spin_lock_reiser4_super(sbinfo); ++ ++ sub_from_sb_grabbed(sbinfo, count); ++ sbinfo->blocks_used += count; ++ ++ assert("nikita-2679", reiser4_check_block_counters(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++/* adjust sb block counters when @count unallocated blocks get mapped to disk */ ++static void fake_allocated2used(reiser4_super_info_data *sbinfo, __u64 count, ++ reiser4_ba_flags_t flags) ++{ ++ spin_lock_reiser4_super(sbinfo); ++ ++ sub_from_sb_fake_allocated(sbinfo, count, flags); ++ sbinfo->blocks_used += count; ++ ++ assert("nikita-2680", ++ reiser4_check_block_counters(reiser4_get_current_sb())); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++static void flush_reserved2used(txn_atom * atom, __u64 count) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ assert("zam-787", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ ++ sub_from_atom_flush_reserved_nolock(atom, (__u32) count); ++ ++ sbinfo = get_current_super_private(); ++ spin_lock_reiser4_super(sbinfo); ++ ++ sub_from_sb_flush_reserved(sbinfo, count); ++ sbinfo->blocks_used += count; ++ ++ assert("zam-789", ++ reiser4_check_block_counters(reiser4_get_current_sb())); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++/* update the per fs blocknr hint default value. */ ++void ++update_blocknr_hint_default(const struct super_block *s, ++ const reiser4_block_nr * block) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(s); ++ ++ assert("nikita-3342", !reiser4_blocknr_is_fake(block)); ++ ++ spin_lock_reiser4_super(sbinfo); ++ if (*block < sbinfo->block_count) { ++ sbinfo->blocknr_hint_default = *block; ++ } else { ++ warning("zam-676", ++ "block number %llu is too large to be used in a blocknr hint\n", ++ (unsigned long long)*block); ++ dump_stack(); ++ DEBUGON(1); ++ } ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++/* get current value of the default blocknr hint. */ ++void get_blocknr_hint_default(reiser4_block_nr * result) ++{ ++ reiser4_super_info_data *sbinfo = get_current_super_private(); ++ ++ spin_lock_reiser4_super(sbinfo); ++ *result = sbinfo->blocknr_hint_default; ++ assert("zam-677", *result < sbinfo->block_count); ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++/* Allocate "real" disk blocks by calling a proper space allocation plugin ++ * method. Blocks are allocated in one contiguous disk region. The plugin ++ * independent part accounts blocks by subtracting allocated amount from grabbed ++ * or fake block counter and add the same amount to the counter of allocated ++ * blocks. ++ * ++ * @hint -- a reiser4 blocknr hint object which contains further block ++ * allocation hints and parameters (search start, a stage of block ++ * which will be mapped to disk, etc.), ++ * @blk -- an out parameter for the beginning of the allocated region, ++ * @len -- in/out parameter, it should contain the maximum number of allocated ++ * blocks, after block allocation completes, it contains the length of ++ * allocated disk region. ++ * @flags -- see reiser4_ba_flags_t description. ++ * ++ * @return -- 0 if success, error code otherwise. ++ */ ++int ++reiser4_alloc_blocks(reiser4_blocknr_hint * hint, reiser4_block_nr * blk, ++ reiser4_block_nr * len, reiser4_ba_flags_t flags) ++{ ++ __u64 needed = *len; ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ int ret; ++ ++ assert("zam-986", hint != NULL); ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ /* For write-optimized data we use default search start value, which is ++ * close to last write location. */ ++ if (flags & BA_USE_DEFAULT_SEARCH_START) { ++ get_blocknr_hint_default(&hint->blk); ++ } ++ ++ /* VITALY: allocator should grab this for internal/tx-lists/similar only. */ ++/* VS-FIXME-HANS: why is this comment above addressed to vitaly (from vitaly)? */ ++ if (hint->block_stage == BLOCK_NOT_COUNTED) { ++ ret = reiser4_grab_space_force(*len, flags); ++ if (ret != 0) ++ return ret; ++ } ++ ++ ret = ++ sa_alloc_blocks(reiser4_get_space_allocator(ctx->super), ++ hint, (int)needed, blk, len); ++ ++ if (!ret) { ++ assert("zam-680", *blk < reiser4_block_count(ctx->super)); ++ assert("zam-681", ++ *blk + *len <= reiser4_block_count(ctx->super)); ++ ++ if (flags & BA_PERMANENT) { ++ /* we assume that current atom exists at this moment */ ++ txn_atom *atom = get_current_atom_locked(); ++ atom->nr_blocks_allocated += *len; ++ spin_unlock_atom(atom); ++ } ++ ++ switch (hint->block_stage) { ++ case BLOCK_NOT_COUNTED: ++ case BLOCK_GRABBED: ++ grabbed2used(ctx, sbinfo, *len); ++ break; ++ case BLOCK_UNALLOCATED: ++ fake_allocated2used(sbinfo, *len, flags); ++ break; ++ case BLOCK_FLUSH_RESERVED: ++ { ++ txn_atom *atom = get_current_atom_locked(); ++ flush_reserved2used(atom, *len); ++ spin_unlock_atom(atom); ++ } ++ break; ++ default: ++ impossible("zam-531", "wrong block stage"); ++ } ++ } else { ++ assert("zam-821", ++ ergo(hint->max_dist == 0 ++ && !hint->backward, ret != -ENOSPC)); ++ if (hint->block_stage == BLOCK_NOT_COUNTED) ++ grabbed2free(ctx, sbinfo, needed); ++ } ++ ++ return ret; ++} ++ ++/* used -> fake_allocated -> grabbed -> free */ ++ ++/* adjust sb block counters when @count unallocated blocks get unmapped from ++ disk */ ++static void ++used2fake_allocated(reiser4_super_info_data * sbinfo, __u64 count, ++ int formatted) ++{ ++ spin_lock_reiser4_super(sbinfo); ++ ++ if (formatted) ++ sbinfo->blocks_fake_allocated += count; ++ else ++ sbinfo->blocks_fake_allocated_unformatted += count; ++ ++ sub_from_sb_used(sbinfo, count); ++ ++ assert("nikita-2681", ++ reiser4_check_block_counters(reiser4_get_current_sb())); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++static void ++used2flush_reserved(reiser4_super_info_data * sbinfo, txn_atom * atom, ++ __u64 count, reiser4_ba_flags_t flags UNUSED_ARG) ++{ ++ assert("nikita-2791", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ ++ add_to_atom_flush_reserved_nolock(atom, (__u32) count); ++ ++ spin_lock_reiser4_super(sbinfo); ++ ++ sbinfo->blocks_flush_reserved += count; ++ /*add_to_sb_flush_reserved(sbinfo, count); */ ++ sub_from_sb_used(sbinfo, count); ++ ++ assert("nikita-2681", ++ reiser4_check_block_counters(reiser4_get_current_sb())); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++/* disk space, virtually used by fake block numbers is counted as "grabbed" again. */ ++static void ++fake_allocated2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo, ++ __u64 count, reiser4_ba_flags_t flags) ++{ ++ add_to_ctx_grabbed(ctx, count); ++ ++ spin_lock_reiser4_super(sbinfo); ++ ++ assert("nikita-2682", reiser4_check_block_counters(ctx->super)); ++ ++ sbinfo->blocks_grabbed += count; ++ sub_from_sb_fake_allocated(sbinfo, count, flags & BA_FORMATTED); ++ ++ assert("nikita-2683", reiser4_check_block_counters(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ fake_allocated2grabbed(ctx, sbinfo, count, flags); ++ grabbed2free(ctx, sbinfo, count); ++} ++ ++void grabbed2free_mark(__u64 mark) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ assert("nikita-3007", (__s64) mark >= 0); ++ assert("nikita-3006", ctx->grabbed_blocks >= mark); ++ grabbed2free(ctx, sbinfo, ctx->grabbed_blocks - mark); ++} ++ ++/** ++ * grabbed2free - adjust grabbed and free block counters ++ * @ctx: context to update grabbed block counter of ++ * @sbinfo: super block to update grabbed and free block counters of ++ * @count: number of blocks to adjust counters by ++ * ++ * Decreases context's and per filesystem's counters of grabbed ++ * blocks. Increases per filesystem's counter of free blocks. ++ */ ++void grabbed2free(reiser4_context *ctx, reiser4_super_info_data *sbinfo, ++ __u64 count) ++{ ++ sub_from_ctx_grabbed(ctx, count); ++ ++ spin_lock_reiser4_super(sbinfo); ++ ++ sub_from_sb_grabbed(sbinfo, count); ++ sbinfo->blocks_free += count; ++ assert("nikita-2684", reiser4_check_block_counters(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ assert("vs-1095", atom); ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ sub_from_ctx_grabbed(ctx, count); ++ ++ add_to_atom_flush_reserved_nolock(atom, count); ++ ++ spin_lock_reiser4_super(sbinfo); ++ ++ sbinfo->blocks_flush_reserved += count; ++ sub_from_sb_grabbed(sbinfo, count); ++ ++ assert("vpf-292", reiser4_check_block_counters(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++void grabbed2flush_reserved(__u64 count) ++{ ++ txn_atom *atom = get_current_atom_locked(); ++ ++ grabbed2flush_reserved_nolock(atom, count); ++ ++ spin_unlock_atom(atom); ++} ++ ++void flush_reserved2grabbed(txn_atom * atom, __u64 count) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ assert("nikita-2788", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ add_to_ctx_grabbed(ctx, count); ++ ++ sub_from_atom_flush_reserved_nolock(atom, (__u32) count); ++ ++ spin_lock_reiser4_super(sbinfo); ++ ++ sbinfo->blocks_grabbed += count; ++ sub_from_sb_flush_reserved(sbinfo, count); ++ ++ assert("vpf-292", reiser4_check_block_counters(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++/** ++ * all_grabbed2free - releases all blocks grabbed in context ++ * ++ * Decreases context's and super block's grabbed block counters by number of ++ * blocks grabbed by current context and increases super block's free block ++ * counter correspondingly. ++ */ ++void all_grabbed2free(void) ++{ ++ reiser4_context *ctx = get_current_context(); ++ ++ grabbed2free(ctx, get_super_private(ctx->super), ctx->grabbed_blocks); ++} ++ ++/* adjust sb block counters if real (on-disk) blocks do not become unallocated ++ after freeing, @count blocks become "grabbed". */ ++static void ++used2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo, ++ __u64 count) ++{ ++ add_to_ctx_grabbed(ctx, count); ++ ++ spin_lock_reiser4_super(sbinfo); ++ ++ sbinfo->blocks_grabbed += count; ++ sub_from_sb_used(sbinfo, count); ++ ++ assert("nikita-2685", reiser4_check_block_counters(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++/* this used to be done through used2grabbed and grabbed2free*/ ++static void used2free(reiser4_super_info_data * sbinfo, __u64 count) ++{ ++ spin_lock_reiser4_super(sbinfo); ++ ++ sbinfo->blocks_free += count; ++ sub_from_sb_used(sbinfo, count); ++ ++ assert("nikita-2685", ++ reiser4_check_block_counters(reiser4_get_current_sb())); ++ ++ spin_unlock_reiser4_super(sbinfo); ++} ++ ++#if REISER4_DEBUG ++ ++/* check "allocated" state of given block range */ ++static void ++reiser4_check_blocks(const reiser4_block_nr * start, ++ const reiser4_block_nr * len, int desired) ++{ ++ sa_check_blocks(start, len, desired); ++} ++ ++/* check "allocated" state of given block */ ++void reiser4_check_block(const reiser4_block_nr * block, int desired) ++{ ++ const reiser4_block_nr one = 1; ++ ++ reiser4_check_blocks(block, &one, desired); ++} ++ ++#endif ++ ++/* Blocks deallocation function may do an actual deallocation through space ++ plugin allocation or store deleted block numbers in atom's delete_set data ++ structure depend on @defer parameter. */ ++ ++/* if BA_DEFER bit is not turned on, @target_stage means the stage of blocks which ++ will be deleted from WORKING bitmap. They might be just unmapped from disk, or ++ freed but disk space is still grabbed by current thread, or these blocks must ++ not be counted in any reiser4 sb block counters, see block_stage_t comment */ ++ ++/* BA_FORMATTED bit is only used when BA_DEFER in not present: it is used to ++ distinguish blocks allocated for unformatted and formatted nodes */ ++ ++int ++reiser4_dealloc_blocks(const reiser4_block_nr * start, ++ const reiser4_block_nr * len, ++ block_stage_t target_stage, reiser4_ba_flags_t flags) ++{ ++ txn_atom *atom = NULL; ++ int ret; ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ if (REISER4_DEBUG) { ++ assert("zam-431", *len != 0); ++ assert("zam-432", *start != 0); ++ assert("zam-558", !reiser4_blocknr_is_fake(start)); ++ ++ spin_lock_reiser4_super(sbinfo); ++ assert("zam-562", *start < sbinfo->block_count); ++ spin_unlock_reiser4_super(sbinfo); ++ } ++ ++ if (flags & BA_DEFER) { ++ blocknr_set_entry *bsep = NULL; ++ ++ /* storing deleted block numbers in a blocknr set ++ datastructure for further actual deletion */ ++ do { ++ atom = get_current_atom_locked(); ++ assert("zam-430", atom != NULL); ++ ++ ret = ++ blocknr_set_add_extent(atom, &atom->delete_set, ++ &bsep, start, len); ++ ++ if (ret == -ENOMEM) ++ return ret; ++ ++ /* This loop might spin at most two times */ ++ } while (ret == -E_REPEAT); ++ ++ assert("zam-477", ret == 0); ++ assert("zam-433", atom != NULL); ++ ++ spin_unlock_atom(atom); ++ ++ } else { ++ assert("zam-425", get_current_super_private() != NULL); ++ sa_dealloc_blocks(reiser4_get_space_allocator(ctx->super), ++ *start, *len); ++ ++ if (flags & BA_PERMANENT) { ++ /* These blocks were counted as allocated, we have to revert it ++ * back if allocation is discarded. */ ++ txn_atom *atom = get_current_atom_locked(); ++ atom->nr_blocks_allocated -= *len; ++ spin_unlock_atom(atom); ++ } ++ ++ switch (target_stage) { ++ case BLOCK_NOT_COUNTED: ++ assert("vs-960", flags & BA_FORMATTED); ++ /* VITALY: This is what was grabbed for internal/tx-lists/similar only */ ++ used2free(sbinfo, *len); ++ break; ++ ++ case BLOCK_GRABBED: ++ used2grabbed(ctx, sbinfo, *len); ++ break; ++ ++ case BLOCK_UNALLOCATED: ++ used2fake_allocated(sbinfo, *len, flags & BA_FORMATTED); ++ break; ++ ++ case BLOCK_FLUSH_RESERVED:{ ++ txn_atom *atom; ++ ++ atom = get_current_atom_locked(); ++ used2flush_reserved(sbinfo, atom, *len, ++ flags & BA_FORMATTED); ++ spin_unlock_atom(atom); ++ break; ++ } ++ default: ++ impossible("zam-532", "wrong block stage"); ++ } ++ } ++ ++ return 0; ++} ++ ++/* wrappers for block allocator plugin methods */ ++int reiser4_pre_commit_hook(void) ++{ ++ assert("zam-502", get_current_super_private() != NULL); ++ sa_pre_commit_hook(); ++ return 0; ++} ++ ++/* an actor which applies delete set to block allocator data */ ++static int ++apply_dset(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a, ++ const reiser4_block_nr * b, void *data UNUSED_ARG) ++{ ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ __u64 len = 1; ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ assert("zam-877", atom->stage >= ASTAGE_PRE_COMMIT); ++ assert("zam-552", sbinfo != NULL); ++ ++ if (b != NULL) ++ len = *b; ++ ++ if (REISER4_DEBUG) { ++ spin_lock_reiser4_super(sbinfo); ++ ++ assert("zam-554", *a < reiser4_block_count(ctx->super)); ++ assert("zam-555", *a + len <= reiser4_block_count(ctx->super)); ++ ++ spin_unlock_reiser4_super(sbinfo); ++ } ++ ++ sa_dealloc_blocks(&sbinfo->space_allocator, *a, len); ++ /* adjust sb block counters */ ++ used2free(sbinfo, len); ++ return 0; ++} ++ ++void reiser4_post_commit_hook(void) ++{ ++ txn_atom *atom; ++ ++ atom = get_current_atom_locked(); ++ assert("zam-452", atom->stage == ASTAGE_POST_COMMIT); ++ spin_unlock_atom(atom); ++ ++ /* do the block deallocation which was deferred ++ until commit is done */ ++ blocknr_set_iterator(atom, &atom->delete_set, apply_dset, NULL, 1); ++ ++ assert("zam-504", get_current_super_private() != NULL); ++ sa_post_commit_hook(); ++} ++ ++void reiser4_post_write_back_hook(void) ++{ ++ assert("zam-504", get_current_super_private() != NULL); ++ ++ sa_post_commit_hook(); ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/block_alloc.h b/fs/reiser4/block_alloc.h +new file mode 100644 +index 0000000..f4b79f8 +--- /dev/null ++++ b/fs/reiser4/block_alloc.h +@@ -0,0 +1,175 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#if !defined (__FS_REISER4_BLOCK_ALLOC_H__) ++#define __FS_REISER4_BLOCK_ALLOC_H__ ++ ++#include "dformat.h" ++#include "forward.h" ++ ++#include /* for __u?? */ ++#include ++ ++/* Mask when is applied to given block number shows is that block number is a fake one */ ++#define REISER4_FAKE_BLOCKNR_BIT_MASK 0x8000000000000000ULL ++/* Mask which isolates a type of object this fake block number was assigned to */ ++#define REISER4_BLOCKNR_STATUS_BIT_MASK 0xC000000000000000ULL ++ ++/*result after applying the REISER4_BLOCKNR_STATUS_BIT_MASK should be compared ++ against these two values to understand is the object unallocated or bitmap ++ shadow object (WORKING BITMAP block, look at the plugin/space/bitmap.c) */ ++#define REISER4_UNALLOCATED_STATUS_VALUE 0xC000000000000000ULL ++#define REISER4_BITMAP_BLOCKS_STATUS_VALUE 0x8000000000000000ULL ++ ++/* specification how block allocation was counted in sb block counters */ ++typedef enum { ++ BLOCK_NOT_COUNTED = 0, /* reiser4 has no info about this block yet */ ++ BLOCK_GRABBED = 1, /* free space grabbed for further allocation ++ of this block */ ++ BLOCK_FLUSH_RESERVED = 2, /* block is reserved for flush needs. */ ++ BLOCK_UNALLOCATED = 3, /* block is used for existing in-memory object ++ ( unallocated formatted or unformatted ++ node) */ ++ BLOCK_ALLOCATED = 4 /* block is mapped to disk, real on-disk block ++ number assigned */ ++} block_stage_t; ++ ++/* a hint for block allocator */ ++struct reiser4_blocknr_hint { ++ /* FIXME: I think we want to add a longterm lock on the bitmap block here. This ++ is to prevent jnode_flush() calls from interleaving allocations on the same ++ bitmap, once a hint is established. */ ++ ++ /* search start hint */ ++ reiser4_block_nr blk; ++ /* if not zero, it is a region size we search for free blocks in */ ++ reiser4_block_nr max_dist; ++ /* level for allocation, may be useful have branch-level and higher ++ write-optimized. */ ++ tree_level level; ++ /* block allocator assumes that blocks, which will be mapped to disk, ++ are in this specified block_stage */ ++ block_stage_t block_stage; ++ /* If direction = 1 allocate blocks in backward direction from the end ++ * of disk to the beginning of disk. */ ++ unsigned int backward:1; ++ ++}; ++ ++/* These flags control block allocation/deallocation behavior */ ++enum reiser4_ba_flags { ++ /* do allocatations from reserved (5%) area */ ++ BA_RESERVED = (1 << 0), ++ ++ /* block allocator can do commit trying to recover free space */ ++ BA_CAN_COMMIT = (1 << 1), ++ ++ /* if operation will be applied to formatted block */ ++ BA_FORMATTED = (1 << 2), ++ ++ /* defer actual block freeing until transaction commit */ ++ BA_DEFER = (1 << 3), ++ ++ /* allocate blocks for permanent fs objects (formatted or unformatted), not ++ wandered of log blocks */ ++ BA_PERMANENT = (1 << 4), ++ ++ /* grab space even it was disabled */ ++ BA_FORCE = (1 << 5), ++ ++ /* use default start value for free blocks search. */ ++ BA_USE_DEFAULT_SEARCH_START = (1 << 6) ++}; ++ ++typedef enum reiser4_ba_flags reiser4_ba_flags_t; ++ ++extern void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint); ++extern void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint); ++extern void update_blocknr_hint_default(const struct super_block *, ++ const reiser4_block_nr *); ++extern void get_blocknr_hint_default(reiser4_block_nr *); ++ ++extern reiser4_block_nr reiser4_fs_reserved_space(struct super_block *super); ++ ++int assign_fake_blocknr_formatted(reiser4_block_nr *); ++reiser4_block_nr fake_blocknr_unformatted(int); ++ ++/* free -> grabbed -> fake_allocated -> used */ ++ ++int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags); ++void all_grabbed2free(void); ++void grabbed2free(reiser4_context *, reiser4_super_info_data *, __u64 count); ++void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags); ++void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count); ++void grabbed2flush_reserved(__u64 count); ++int reiser4_alloc_blocks(reiser4_blocknr_hint * hint, ++ reiser4_block_nr * start, ++ reiser4_block_nr * len, reiser4_ba_flags_t flags); ++int reiser4_dealloc_blocks(const reiser4_block_nr *, ++ const reiser4_block_nr *, ++ block_stage_t, reiser4_ba_flags_t flags); ++ ++static inline int reiser4_alloc_block(reiser4_blocknr_hint * hint, ++ reiser4_block_nr * start, ++ reiser4_ba_flags_t flags) ++{ ++ reiser4_block_nr one = 1; ++ return reiser4_alloc_blocks(hint, start, &one, flags); ++} ++ ++static inline int reiser4_dealloc_block(const reiser4_block_nr * block, ++ block_stage_t stage, ++ reiser4_ba_flags_t flags) ++{ ++ const reiser4_block_nr one = 1; ++ return reiser4_dealloc_blocks(block, &one, stage, flags); ++} ++ ++#define reiser4_grab_space_force(count, flags) \ ++ reiser4_grab_space(count, flags | BA_FORCE) ++ ++extern void grabbed2free_mark(__u64 mark); ++extern int reiser4_grab_reserved(struct super_block *, ++ __u64, reiser4_ba_flags_t); ++extern void reiser4_release_reserved(struct super_block *super); ++ ++/* grabbed -> fake_allocated */ ++ ++/* fake_allocated -> used */ ++ ++/* used -> fake_allocated -> grabbed -> free */ ++ ++extern void flush_reserved2grabbed(txn_atom * atom, __u64 count); ++ ++extern int reiser4_blocknr_is_fake(const reiser4_block_nr * da); ++ ++extern void grabbed2cluster_reserved(int count); ++extern void cluster_reserved2grabbed(int count); ++extern void cluster_reserved2free(int count); ++ ++extern int reiser4_check_block_counters(const struct super_block *); ++ ++#if REISER4_DEBUG ++ ++extern void reiser4_check_block(const reiser4_block_nr *, int); ++ ++#else ++ ++# define reiser4_check_block(beg, val) noop ++ ++#endif ++ ++extern int reiser4_pre_commit_hook(void); ++extern void reiser4_post_commit_hook(void); ++extern void reiser4_post_write_back_hook(void); ++ ++#endif /* __FS_REISER4_BLOCK_ALLOC_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/blocknrset.c b/fs/reiser4/blocknrset.c +new file mode 100644 +index 0000000..da50a5a +--- /dev/null ++++ b/fs/reiser4/blocknrset.c +@@ -0,0 +1,368 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* This file contains code for various block number sets used by the atom to ++ track the deleted set and wandered block mappings. */ ++ ++#include "debug.h" ++#include "dformat.h" ++#include "txnmgr.h" ++#include "context.h" ++ ++#include ++ ++/* The proposed data structure for storing unordered block number sets is a ++ list of elements, each of which contains an array of block number or/and ++ array of block number pairs. That element called blocknr_set_entry is used ++ to store block numbers from the beginning and for extents from the end of ++ the data field (char data[...]). The ->nr_blocks and ->nr_pairs fields ++ count numbers of blocks and extents. ++ ++ +------------------- blocknr_set_entry->data ------------------+ ++ |block1|block2| ... ... |pair3|pair2|pair1| ++ +------------------------------------------------------------+ ++ ++ When current blocknr_set_entry is full, allocate a new one. */ ++ ++/* Usage examples: blocknr sets are used in reiser4 for storing atom's delete ++ * set (single blocks and block extents), in that case blocknr pair represent an ++ * extent; atom's wandered map is also stored as a blocknr set, blocknr pairs ++ * there represent a (real block) -> (wandered block) mapping. */ ++ ++/* Protection: blocknr sets belong to reiser4 atom, and ++ * their modifications are performed with the atom lock held */ ++ ++typedef struct blocknr_pair blocknr_pair; ++ ++/* The total size of a blocknr_set_entry. */ ++#define BLOCKNR_SET_ENTRY_SIZE 128 ++ ++/* The number of blocks that can fit the blocknr data area. */ ++#define BLOCKNR_SET_ENTRIES_NUMBER \ ++ ((BLOCKNR_SET_ENTRY_SIZE - \ ++ 2 * sizeof (unsigned) - \ ++ sizeof(struct list_head)) / \ ++ sizeof(reiser4_block_nr)) ++ ++/* An entry of the blocknr_set */ ++struct blocknr_set_entry { ++ unsigned nr_singles; ++ unsigned nr_pairs; ++ struct list_head link; ++ reiser4_block_nr entries[BLOCKNR_SET_ENTRIES_NUMBER]; ++}; ++ ++/* A pair of blocks as recorded in the blocknr_set_entry data. */ ++struct blocknr_pair { ++ reiser4_block_nr a; ++ reiser4_block_nr b; ++}; ++ ++/* Return the number of blocknr slots available in a blocknr_set_entry. */ ++/* Audited by: green(2002.06.11) */ ++static unsigned bse_avail(blocknr_set_entry * bse) ++{ ++ unsigned used = bse->nr_singles + 2 * bse->nr_pairs; ++ ++ assert("jmacd-5088", BLOCKNR_SET_ENTRIES_NUMBER >= used); ++ cassert(sizeof(blocknr_set_entry) == BLOCKNR_SET_ENTRY_SIZE); ++ ++ return BLOCKNR_SET_ENTRIES_NUMBER - used; ++} ++ ++/* Initialize a blocknr_set_entry. */ ++static void bse_init(blocknr_set_entry *bse) ++{ ++ bse->nr_singles = 0; ++ bse->nr_pairs = 0; ++ INIT_LIST_HEAD(&bse->link); ++} ++ ++/* Allocate and initialize a blocknr_set_entry. */ ++/* Audited by: green(2002.06.11) */ ++static blocknr_set_entry *bse_alloc(void) ++{ ++ blocknr_set_entry *e; ++ ++ if ((e = (blocknr_set_entry *) kmalloc(sizeof(blocknr_set_entry), ++ reiser4_ctx_gfp_mask_get())) == NULL) ++ return NULL; ++ ++ bse_init(e); ++ ++ return e; ++} ++ ++/* Free a blocknr_set_entry. */ ++/* Audited by: green(2002.06.11) */ ++static void bse_free(blocknr_set_entry * bse) ++{ ++ kfree(bse); ++} ++ ++/* Add a block number to a blocknr_set_entry */ ++/* Audited by: green(2002.06.11) */ ++static void ++bse_put_single(blocknr_set_entry * bse, const reiser4_block_nr * block) ++{ ++ assert("jmacd-5099", bse_avail(bse) >= 1); ++ ++ bse->entries[bse->nr_singles++] = *block; ++} ++ ++/* Get a pair of block numbers */ ++/* Audited by: green(2002.06.11) */ ++static inline blocknr_pair *bse_get_pair(blocknr_set_entry * bse, unsigned pno) ++{ ++ assert("green-1", BLOCKNR_SET_ENTRIES_NUMBER >= 2 * (pno + 1)); ++ ++ return (blocknr_pair *) (bse->entries + BLOCKNR_SET_ENTRIES_NUMBER - ++ 2 * (pno + 1)); ++} ++ ++/* Add a pair of block numbers to a blocknr_set_entry */ ++/* Audited by: green(2002.06.11) */ ++static void ++bse_put_pair(blocknr_set_entry * bse, const reiser4_block_nr * a, ++ const reiser4_block_nr * b) ++{ ++ blocknr_pair *pair; ++ ++ assert("jmacd-5100", bse_avail(bse) >= 2 && a != NULL && b != NULL); ++ ++ pair = bse_get_pair(bse, bse->nr_pairs++); ++ ++ pair->a = *a; ++ pair->b = *b; ++} ++ ++/* Add either a block or pair of blocks to the block number set. The first ++ blocknr (@a) must be non-NULL. If @b is NULL a single blocknr is added, if ++ @b is non-NULL a pair is added. The block number set belongs to atom, and ++ the call is made with the atom lock held. There may not be enough space in ++ the current blocknr_set_entry. If new_bsep points to a non-NULL ++ blocknr_set_entry then it will be added to the blocknr_set and new_bsep ++ will be set to NULL. If new_bsep contains NULL then the atom lock will be ++ released and a new bse will be allocated in new_bsep. E_REPEAT will be ++ returned with the atom unlocked for the operation to be tried again. If ++ the operation succeeds, 0 is returned. If new_bsep is non-NULL and not ++ used during the call, it will be freed automatically. */ ++static int blocknr_set_add(txn_atom *atom, struct list_head *bset, ++ blocknr_set_entry **new_bsep, const reiser4_block_nr *a, ++ const reiser4_block_nr *b) ++{ ++ blocknr_set_entry *bse; ++ unsigned entries_needed; ++ ++ assert("jmacd-5101", a != NULL); ++ ++ entries_needed = (b == NULL) ? 1 : 2; ++ if (list_empty(bset) || ++ bse_avail(list_entry(bset->next, blocknr_set_entry, link)) < entries_needed) { ++ /* See if a bse was previously allocated. */ ++ if (*new_bsep == NULL) { ++ spin_unlock_atom(atom); ++ *new_bsep = bse_alloc(); ++ return (*new_bsep != NULL) ? -E_REPEAT : ++ RETERR(-ENOMEM); ++ } ++ ++ /* Put it on the head of the list. */ ++ list_add(&((*new_bsep)->link), bset); ++ ++ *new_bsep = NULL; ++ } ++ ++ /* Add the single or pair. */ ++ bse = list_entry(bset->next, blocknr_set_entry, link); ++ if (b == NULL) { ++ bse_put_single(bse, a); ++ } else { ++ bse_put_pair(bse, a, b); ++ } ++ ++ /* If new_bsep is non-NULL then there was an allocation race, free this copy. */ ++ if (*new_bsep != NULL) { ++ bse_free(*new_bsep); ++ *new_bsep = NULL; ++ } ++ ++ return 0; ++} ++ ++/* Add an extent to the block set. If the length is 1, it is treated as a ++ single block (e.g., reiser4_set_add_block). */ ++/* Audited by: green(2002.06.11) */ ++/* Auditor note: Entire call chain cannot hold any spinlocks, because ++ kmalloc might schedule. The only exception is atom spinlock, which is ++ properly freed. */ ++int ++blocknr_set_add_extent(txn_atom * atom, ++ struct list_head * bset, ++ blocknr_set_entry ** new_bsep, ++ const reiser4_block_nr * start, ++ const reiser4_block_nr * len) ++{ ++ assert("jmacd-5102", start != NULL && len != NULL && *len > 0); ++ return blocknr_set_add(atom, bset, new_bsep, start, ++ *len == 1 ? NULL : len); ++} ++ ++/* Add a block pair to the block set. It adds exactly a pair, which is checked ++ * by an assertion that both arguments are not null.*/ ++/* Audited by: green(2002.06.11) */ ++/* Auditor note: Entire call chain cannot hold any spinlocks, because ++ kmalloc might schedule. The only exception is atom spinlock, which is ++ properly freed. */ ++int ++blocknr_set_add_pair(txn_atom * atom, ++ struct list_head * bset, ++ blocknr_set_entry ** new_bsep, const reiser4_block_nr * a, ++ const reiser4_block_nr * b) ++{ ++ assert("jmacd-5103", a != NULL && b != NULL); ++ return blocknr_set_add(atom, bset, new_bsep, a, b); ++} ++ ++/* Initialize a blocknr_set. */ ++void blocknr_set_init(struct list_head *bset) ++{ ++ INIT_LIST_HEAD(bset); ++} ++ ++/* Release the entries of a blocknr_set. */ ++void blocknr_set_destroy(struct list_head *bset) ++{ ++ blocknr_set_entry *bse; ++ ++ while (!list_empty(bset)) { ++ bse = list_entry(bset->next, blocknr_set_entry, link); ++ list_del_init(&bse->link); ++ bse_free(bse); ++ } ++} ++ ++/* Merge blocknr_set entries out of @from into @into. */ ++/* Audited by: green(2002.06.11) */ ++/* Auditor comments: This merge does not know if merged sets contain ++ blocks pairs (As for wandered sets) or extents, so it cannot really merge ++ overlapping ranges if there is some. So I believe it may lead to ++ some blocks being presented several times in one blocknr_set. To help ++ debugging such problems it might help to check for duplicate entries on ++ actual processing of this set. Testing this kind of stuff right here is ++ also complicated by the fact that these sets are not sorted and going ++ through whole set on each element addition is going to be CPU-heavy task */ ++void blocknr_set_merge(struct list_head * from, struct list_head * into) ++{ ++ blocknr_set_entry *bse_into = NULL; ++ ++ /* If @from is empty, no work to perform. */ ++ if (list_empty(from)) ++ return; ++ /* If @into is not empty, try merging partial-entries. */ ++ if (!list_empty(into)) { ++ ++ /* Neither set is empty, pop the front to members and try to combine them. */ ++ blocknr_set_entry *bse_from; ++ unsigned into_avail; ++ ++ bse_into = list_entry(into->next, blocknr_set_entry, link); ++ list_del_init(&bse_into->link); ++ bse_from = list_entry(from->next, blocknr_set_entry, link); ++ list_del_init(&bse_from->link); ++ ++ /* Combine singles. */ ++ for (into_avail = bse_avail(bse_into); ++ into_avail != 0 && bse_from->nr_singles != 0; ++ into_avail -= 1) { ++ bse_put_single(bse_into, ++ &bse_from->entries[--bse_from-> ++ nr_singles]); ++ } ++ ++ /* Combine pairs. */ ++ for (; into_avail > 1 && bse_from->nr_pairs != 0; ++ into_avail -= 2) { ++ blocknr_pair *pair = ++ bse_get_pair(bse_from, --bse_from->nr_pairs); ++ bse_put_pair(bse_into, &pair->a, &pair->b); ++ } ++ ++ /* If bse_from is empty, delete it now. */ ++ if (bse_avail(bse_from) == BLOCKNR_SET_ENTRIES_NUMBER) { ++ bse_free(bse_from); ++ } else { ++ /* Otherwise, bse_into is full or nearly full (e.g., ++ it could have one slot avail and bse_from has one ++ pair left). Push it back onto the list. bse_from ++ becomes bse_into, which will be the new partial. */ ++ list_add(&bse_into->link, into); ++ bse_into = bse_from; ++ } ++ } ++ ++ /* Splice lists together. */ ++ list_splice_init(from, into->prev); ++ ++ /* Add the partial entry back to the head of the list. */ ++ if (bse_into != NULL) ++ list_add(&bse_into->link, into); ++} ++ ++/* Iterate over all blocknr set elements. */ ++int blocknr_set_iterator(txn_atom *atom, struct list_head *bset, ++ blocknr_set_actor_f actor, void *data, int delete) ++{ ++ ++ blocknr_set_entry *entry; ++ ++ assert("zam-429", atom != NULL); ++ assert("zam-430", atom_is_protected(atom)); ++ assert("zam-431", bset != 0); ++ assert("zam-432", actor != NULL); ++ ++ entry = list_entry(bset->next, blocknr_set_entry, link); ++ while (bset != &entry->link) { ++ blocknr_set_entry *tmp = list_entry(entry->link.next, blocknr_set_entry, link); ++ unsigned int i; ++ int ret; ++ ++ for (i = 0; i < entry->nr_singles; i++) { ++ ret = actor(atom, &entry->entries[i], NULL, data); ++ ++ /* We can't break a loop if delete flag is set. */ ++ if (ret != 0 && !delete) ++ return ret; ++ } ++ ++ for (i = 0; i < entry->nr_pairs; i++) { ++ struct blocknr_pair *ab; ++ ++ ab = bse_get_pair(entry, i); ++ ++ ret = actor(atom, &ab->a, &ab->b, data); ++ ++ if (ret != 0 && !delete) ++ return ret; ++ } ++ ++ if (delete) { ++ list_del(&entry->link); ++ bse_free(entry); ++ } ++ ++ entry = tmp; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/carry.c b/fs/reiser4/carry.c +new file mode 100644 +index 0000000..c90a0f0 +--- /dev/null ++++ b/fs/reiser4/carry.c +@@ -0,0 +1,1391 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++/* Functions to "carry" tree modification(s) upward. */ ++/* Tree is modified one level at a time. As we modify a level we accumulate a ++ set of changes that need to be propagated to the next level. We manage ++ node locking such that any searches that collide with carrying are ++ restarted, from the root if necessary. ++ ++ Insertion of a new item may result in items being moved among nodes and ++ this requires the delimiting key to be updated at the least common parent ++ of the nodes modified to preserve search tree invariants. Also, insertion ++ may require allocation of a new node. A pointer to the new node has to be ++ inserted into some node on the parent level, etc. ++ ++ Tree carrying is meant to be analogous to arithmetic carrying. ++ ++ A carry operation is always associated with some node (&carry_node). ++ ++ Carry process starts with some initial set of operations to be performed ++ and an initial set of already locked nodes. Operations are performed one ++ by one. Performing each single operation has following possible effects: ++ ++ - content of carry node associated with operation is modified ++ - new carry nodes are locked and involved into carry process on this level ++ - new carry operations are posted to the next level ++ ++ After all carry operations on this level are done, process is repeated for ++ the accumulated sequence on carry operations for the next level. This ++ starts by trying to lock (in left to right order) all carry nodes ++ associated with carry operations on the parent level. After this, we decide ++ whether more nodes are required on the left of already locked set. If so, ++ all locks taken on the parent level are released, new carry nodes are ++ added, and locking process repeats. ++ ++ It may happen that balancing process fails owing to unrecoverable error on ++ some of upper levels of a tree (possible causes are io error, failure to ++ allocate new node, etc.). In this case we should unmount the filesystem, ++ rebooting if it is the root, and possibly advise the use of fsck. ++ ++ USAGE: ++ ++ int some_tree_operation( znode *node, ... ) ++ { ++ // Allocate on a stack pool of carry objects: operations and nodes. ++ // Most carry processes will only take objects from here, without ++ // dynamic allocation. ++ ++I feel uneasy about this pool. It adds to code complexity, I understand why it exists, but.... -Hans ++ ++ carry_pool pool; ++ carry_level lowest_level; ++ carry_op *op; ++ ++ init_carry_pool( &pool ); ++ init_carry_level( &lowest_level, &pool ); ++ ++ // operation may be one of: ++ // COP_INSERT --- insert new item into node ++ // COP_CUT --- remove part of or whole node ++ // COP_PASTE --- increase size of item ++ // COP_DELETE --- delete pointer from parent node ++ // COP_UPDATE --- update delimiting key in least ++ // common ancestor of two ++ ++ op = reiser4_post_carry( &lowest_level, operation, node, 0 ); ++ if( IS_ERR( op ) || ( op == NULL ) ) { ++ handle error ++ } else { ++ // fill in remaining fields in @op, according to carry.h:carry_op ++ result = carry( &lowest_level, NULL ); ++ } ++ done_carry_pool( &pool ); ++ } ++ ++ When you are implementing node plugin method that participates in carry ++ (shifting, insertion, deletion, etc.), do the following: ++ ++ int foo_node_method( znode *node, ..., carry_level *todo ) ++ { ++ carry_op *op; ++ ++ .... ++ ++ // note, that last argument to reiser4_post_carry() is non-null ++ // here, because @op is to be applied to the parent of @node, rather ++ // than to the @node itself as in the previous case. ++ ++ op = node_post_carry( todo, operation, node, 1 ); ++ // fill in remaining fields in @op, according to carry.h:carry_op ++ ++ .... ++ ++ } ++ ++ BATCHING: ++ ++ One of the main advantages of level-by-level balancing implemented here is ++ ability to batch updates on a parent level and to peform them more ++ efficiently as a result. ++ ++ Description To Be Done (TBD). ++ ++ DIFFICULTIES AND SUBTLE POINTS: ++ ++ 1. complex plumbing is required, because: ++ ++ a. effective allocation through pools is needed ++ ++ b. target of operation is not exactly known when operation is ++ posted. This is worked around through bitfields in &carry_node and ++ logic in lock_carry_node() ++ ++ c. of interaction with locking code: node should be added into sibling ++ list when pointer to it is inserted into its parent, which is some time ++ after node was created. Between these moments, node is somewhat in ++ suspended state and is only registered in the carry lists ++ ++ 2. whole balancing logic is implemented here, in particular, insertion ++ logic is coded in make_space(). ++ ++ 3. special cases like insertion (reiser4_add_tree_root()) or deletion ++ (reiser4_kill_tree_root()) of tree root and morphing of paste into insert ++ (insert_paste()) have to be handled. ++ ++ 4. there is non-trivial interdependency between allocation of new nodes ++ and almost everything else. This is mainly due to the (1.c) above. I shall ++ write about this later. ++ ++*/ ++ ++#include "forward.h" ++#include "debug.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/item/item.h" ++#include "plugin/item/extent.h" ++#include "plugin/node/node.h" ++#include "jnode.h" ++#include "znode.h" ++#include "tree_mod.h" ++#include "tree_walk.h" ++#include "block_alloc.h" ++#include "pool.h" ++#include "tree.h" ++#include "carry.h" ++#include "carry_ops.h" ++#include "super.h" ++#include "reiser4.h" ++ ++#include ++ ++/* level locking/unlocking */ ++static int lock_carry_level(carry_level * level); ++static void unlock_carry_level(carry_level * level, int failure); ++static void done_carry_level(carry_level * level); ++static void unlock_carry_node(carry_level * level, carry_node * node, int fail); ++ ++int lock_carry_node(carry_level * level, carry_node * node); ++int lock_carry_node_tail(carry_node * node); ++ ++/* carry processing proper */ ++static int carry_on_level(carry_level * doing, carry_level * todo); ++ ++static carry_op *add_op(carry_level * level, pool_ordering order, ++ carry_op * reference); ++ ++/* handlers for carry operations. */ ++ ++static void fatal_carry_error(carry_level * doing, int ecode); ++static int add_new_root(carry_level * level, carry_node * node, znode * fake); ++ ++static void print_level(const char *prefix, carry_level * level); ++ ++#if REISER4_DEBUG ++typedef enum { ++ CARRY_TODO, ++ CARRY_DOING ++} carry_queue_state; ++static int carry_level_invariant(carry_level * level, carry_queue_state state); ++#endif ++ ++/* main entry point for tree balancing. ++ ++ Tree carry performs operations from @doing and while doing so accumulates ++ information about operations to be performed on the next level ("carried" ++ to the parent level). Carried operations are performed, causing possibly ++ more operations to be carried upward etc. carry() takes care about ++ locking and pinning znodes while operating on them. ++ ++ For usage, see comment at the top of fs/reiser4/carry.c ++ ++*/ ++int reiser4_carry(carry_level * doing /* set of carry operations to be ++ * performed */ , ++ carry_level * done /* set of nodes, already performed ++ * at the previous level. ++ * NULL in most cases */) ++{ ++ int result = 0; ++ /* queue of new requests */ ++ carry_level *todo; ++ ON_DEBUG(STORE_COUNTERS); ++ ++ assert("nikita-888", doing != NULL); ++ BUG_ON(done != NULL); ++ ++ todo = doing + 1; ++ init_carry_level(todo, doing->pool); ++ ++ /* queue of requests preformed on the previous level */ ++ done = todo + 1; ++ init_carry_level(done, doing->pool); ++ ++ /* iterate until there is nothing more to do */ ++ while (result == 0 && doing->ops_num > 0) { ++ carry_level *tmp; ++ ++ /* at this point @done is locked. */ ++ /* repeat lock/do/unlock while ++ ++ (1) lock_carry_level() fails due to deadlock avoidance, or ++ ++ (2) carry_on_level() decides that more nodes have to ++ be involved. ++ ++ (3) some unexpected error occurred while balancing on the ++ upper levels. In this case all changes are rolled back. ++ ++ */ ++ while (1) { ++ result = lock_carry_level(doing); ++ if (result == 0) { ++ /* perform operations from @doing and ++ accumulate new requests in @todo */ ++ result = carry_on_level(doing, todo); ++ if (result == 0) ++ break; ++ else if (result != -E_REPEAT || ++ !doing->restartable) { ++ warning("nikita-1043", ++ "Fatal error during carry: %i", ++ result); ++ print_level("done", done); ++ print_level("doing", doing); ++ print_level("todo", todo); ++ /* do some rough stuff like aborting ++ all pending transcrashes and thus ++ pushing tree back to the consistent ++ state. Alternatvely, just panic. ++ */ ++ fatal_carry_error(doing, result); ++ return result; ++ } ++ } else if (result != -E_REPEAT) { ++ fatal_carry_error(doing, result); ++ return result; ++ } ++ unlock_carry_level(doing, 1); ++ } ++ /* at this point @done can be safely unlocked */ ++ done_carry_level(done); ++ ++ /* cyclically shift queues */ ++ tmp = done; ++ done = doing; ++ doing = todo; ++ todo = tmp; ++ init_carry_level(todo, doing->pool); ++ ++ /* give other threads chance to run */ ++ reiser4_preempt_point(); ++ } ++ done_carry_level(done); ++ ++ /* all counters, but x_refs should remain the same. x_refs can change ++ owing to transaction manager */ ++ ON_DEBUG(CHECK_COUNTERS); ++ return result; ++} ++ ++/* perform carry operations on given level. ++ ++ Optimizations proposed by pooh: ++ ++ (1) don't lock all nodes from queue at the same time. Lock nodes lazily as ++ required; ++ ++ (2) unlock node if there are no more operations to be performed upon it and ++ node didn't add any operation to @todo. This can be implemented by ++ attaching to each node two counters: counter of operaions working on this ++ node and counter and operations carried upward from this node. ++ ++*/ ++static int carry_on_level(carry_level * doing /* queue of carry operations to ++ * do on this level */ , ++ carry_level * todo /* queue where new carry ++ * operations to be performed on ++ * the * parent level are ++ * accumulated during @doing ++ * processing. */ ) ++{ ++ int result; ++ int (*f) (carry_op *, carry_level *, carry_level *); ++ carry_op *op; ++ carry_op *tmp_op; ++ ++ assert("nikita-1034", doing != NULL); ++ assert("nikita-1035", todo != NULL); ++ ++ /* @doing->nodes are locked. */ ++ ++ /* This function can be split into two phases: analysis and modification. ++ ++ Analysis calculates precisely what items should be moved between ++ nodes. This information is gathered in some structures attached to ++ each carry_node in a @doing queue. Analysis also determines whether ++ new nodes are to be allocated etc. ++ ++ After analysis is completed, actual modification is performed. Here ++ we can take advantage of "batch modification": if there are several ++ operations acting on the same node, modifications can be performed ++ more efficiently when batched together. ++ ++ Above is an optimization left for the future. ++ */ ++ /* Important, but delayed optimization: it's possible to batch ++ operations together and perform them more efficiently as a ++ result. For example, deletion of several neighboring items from a ++ node can be converted to a single ->cut() operation. ++ ++ Before processing queue, it should be scanned and "mergeable" ++ operations merged. ++ */ ++ result = 0; ++ for_all_ops(doing, op, tmp_op) { ++ carry_opcode opcode; ++ ++ assert("nikita-1041", op != NULL); ++ opcode = op->op; ++ assert("nikita-1042", op->op < COP_LAST_OP); ++ f = op_dispatch_table[op->op].handler; ++ result = f(op, doing, todo); ++ /* locking can fail with -E_REPEAT. Any different error is fatal ++ and will be handled by fatal_carry_error() sledgehammer. ++ */ ++ if (result != 0) ++ break; ++ } ++ if (result == 0) { ++ carry_plugin_info info; ++ carry_node *scan; ++ carry_node *tmp_scan; ++ ++ info.doing = doing; ++ info.todo = todo; ++ ++ assert("nikita-3002", ++ carry_level_invariant(doing, CARRY_DOING)); ++ for_all_nodes(doing, scan, tmp_scan) { ++ znode *node; ++ ++ node = reiser4_carry_real(scan); ++ assert("nikita-2547", node != NULL); ++ if (node_is_empty(node)) { ++ result = ++ node_plugin_by_node(node)-> ++ prepare_removal(node, &info); ++ if (result != 0) ++ break; ++ } ++ } ++ } ++ return result; ++} ++ ++/* post carry operation ++ ++ This is main function used by external carry clients: node layout plugins ++ and tree operations to create new carry operation to be performed on some ++ level. ++ ++ New operation will be included in the @level queue. To actually perform it, ++ call carry( level, ... ). This function takes write lock on @node. Carry ++ manages all its locks by itself, don't worry about this. ++ ++ This function adds operation and node at the end of the queue. It is up to ++ caller to guarantee proper ordering of node queue. ++ ++*/ ++carry_op * reiser4_post_carry(carry_level * level /* queue where new operation ++ * is to be posted at */ , ++ carry_opcode op /* opcode of operation */ , ++ znode * node /* node on which this operation ++ * will operate */ , ++ int apply_to_parent_p /* whether operation will ++ * operate directly on @node ++ * or on it parent. */) ++{ ++ carry_op *result; ++ carry_node *child; ++ ++ assert("nikita-1046", level != NULL); ++ assert("nikita-1788", znode_is_write_locked(node)); ++ ++ result = add_op(level, POOLO_LAST, NULL); ++ if (IS_ERR(result)) ++ return result; ++ child = reiser4_add_carry(level, POOLO_LAST, NULL); ++ if (IS_ERR(child)) { ++ reiser4_pool_free(&level->pool->op_pool, &result->header); ++ return (carry_op *) child; ++ } ++ result->node = child; ++ result->op = op; ++ child->parent = apply_to_parent_p; ++ if (ZF_ISSET(node, JNODE_ORPHAN)) ++ child->left_before = 1; ++ child->node = node; ++ return result; ++} ++ ++/* initialize carry queue */ ++void init_carry_level(carry_level * level /* level to initialize */ , ++ carry_pool * pool /* pool @level will allocate objects ++ * from */ ) ++{ ++ assert("nikita-1045", level != NULL); ++ assert("nikita-967", pool != NULL); ++ ++ memset(level, 0, sizeof *level); ++ level->pool = pool; ++ ++ INIT_LIST_HEAD(&level->nodes); ++ INIT_LIST_HEAD(&level->ops); ++} ++ ++/* allocate carry pool and initialize pools within queue */ ++carry_pool *init_carry_pool(int size) ++{ ++ carry_pool *pool; ++ ++ assert("", size >= sizeof(carry_pool) + 3 * sizeof(carry_level)); ++ pool = kmalloc(size, reiser4_ctx_gfp_mask_get()); ++ if (pool == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ ++ reiser4_init_pool(&pool->op_pool, sizeof(carry_op), CARRIES_POOL_SIZE, ++ (char *)pool->op); ++ reiser4_init_pool(&pool->node_pool, sizeof(carry_node), ++ NODES_LOCKED_POOL_SIZE, (char *)pool->node); ++ return pool; ++} ++ ++/* finish with queue pools */ ++void done_carry_pool(carry_pool * pool /* pool to destroy */ ) ++{ ++ reiser4_done_pool(&pool->op_pool); ++ reiser4_done_pool(&pool->node_pool); ++ kfree(pool); ++} ++ ++/* add new carry node to the @level. ++ ++ Returns pointer to the new carry node allocated from pool. It's up to ++ callers to maintain proper order in the @level. Assumption is that if carry ++ nodes on one level are already sorted and modifications are peroformed from ++ left to right, carry nodes added on the parent level will be ordered ++ automatically. To control ordering use @order and @reference parameters. ++ ++*/ ++carry_node *reiser4_add_carry_skip(carry_level * level /* &carry_level to add ++ * node to */ , ++ pool_ordering order /* where to insert: ++ * at the beginning of ++ * @level, ++ * before @reference, ++ * after @reference, ++ * at the end of @level ++ */ , ++ carry_node * reference/* reference node for ++ * insertion */) ++{ ++ ON_DEBUG(carry_node * orig_ref = reference); ++ ++ if (order == POOLO_BEFORE) { ++ reference = find_left_carry(reference, level); ++ if (reference == NULL) ++ reference = list_entry(level->nodes.next, carry_node, ++ header.level_linkage); ++ else ++ reference = list_entry(reference->header.level_linkage.next, ++ carry_node, header.level_linkage); ++ } else if (order == POOLO_AFTER) { ++ reference = find_right_carry(reference, level); ++ if (reference == NULL) ++ reference = list_entry(level->nodes.prev, carry_node, ++ header.level_linkage); ++ else ++ reference = list_entry(reference->header.level_linkage.prev, ++ carry_node, header.level_linkage); ++ } ++ assert("nikita-2209", ++ ergo(orig_ref != NULL, ++ reiser4_carry_real(reference) == ++ reiser4_carry_real(orig_ref))); ++ return reiser4_add_carry(level, order, reference); ++} ++ ++carry_node *reiser4_add_carry(carry_level * level /* &carry_level to add node ++ * to */ , ++ pool_ordering order /* where to insert: at the ++ * beginning of @level, before ++ * @reference, after @reference, ++ * at the end of @level */ , ++ carry_node * reference /* reference node for ++ * insertion */ ) ++{ ++ carry_node *result; ++ ++ result = ++ (carry_node *) reiser4_add_obj(&level->pool->node_pool, ++ &level->nodes, ++ order, &reference->header); ++ if (!IS_ERR(result) && (result != NULL)) ++ ++level->nodes_num; ++ return result; ++} ++ ++/* add new carry operation to the @level. ++ ++ Returns pointer to the new carry operations allocated from pool. It's up to ++ callers to maintain proper order in the @level. To control ordering use ++ @order and @reference parameters. ++ ++*/ ++static carry_op *add_op(carry_level * level /* &carry_level to add node to */ , ++ pool_ordering order /* where to insert: at the beginning of ++ * @level, before @reference, after ++ * @reference, at the end of @level */ , ++ carry_op * ++ reference /* reference node for insertion */ ) ++{ ++ carry_op *result; ++ ++ result = ++ (carry_op *) reiser4_add_obj(&level->pool->op_pool, &level->ops, ++ order, &reference->header); ++ if (!IS_ERR(result) && (result != NULL)) ++ ++level->ops_num; ++ return result; ++} ++ ++/* Return node on the right of which @node was created. ++ ++ Each node is created on the right of some existing node (or it is new root, ++ which is special case not handled here). ++ ++ @node is new node created on some level, but not yet inserted into its ++ parent, it has corresponding bit (JNODE_ORPHAN) set in zstate. ++ ++*/ ++static carry_node *find_begetting_brother(carry_node * node /* node to start search ++ * from */ , ++ carry_level * kin UNUSED_ARG /* level to ++ * scan */ ) ++{ ++ carry_node *scan; ++ ++ assert("nikita-1614", node != NULL); ++ assert("nikita-1615", kin != NULL); ++ assert("nikita-1616", LOCK_CNT_GTZ(rw_locked_tree)); ++ assert("nikita-1619", ergo(reiser4_carry_real(node) != NULL, ++ ZF_ISSET(reiser4_carry_real(node), ++ JNODE_ORPHAN))); ++ for (scan = node;; ++ scan = list_entry(scan->header.level_linkage.prev, carry_node, ++ header.level_linkage)) { ++ assert("nikita-1617", &kin->nodes != &scan->header.level_linkage); ++ if ((scan->node != node->node) && ++ !ZF_ISSET(scan->node, JNODE_ORPHAN)) { ++ assert("nikita-1618", reiser4_carry_real(scan) != NULL); ++ break; ++ } ++ } ++ return scan; ++} ++ ++static cmp_t ++carry_node_cmp(carry_level * level, carry_node * n1, carry_node * n2) ++{ ++ assert("nikita-2199", n1 != NULL); ++ assert("nikita-2200", n2 != NULL); ++ ++ if (n1 == n2) ++ return EQUAL_TO; ++ while (1) { ++ n1 = carry_node_next(n1); ++ if (carry_node_end(level, n1)) ++ return GREATER_THAN; ++ if (n1 == n2) ++ return LESS_THAN; ++ } ++ impossible("nikita-2201", "End of level reached"); ++} ++ ++carry_node *find_carry_node(carry_level * level, const znode * node) ++{ ++ carry_node *scan; ++ carry_node *tmp_scan; ++ ++ assert("nikita-2202", level != NULL); ++ assert("nikita-2203", node != NULL); ++ ++ for_all_nodes(level, scan, tmp_scan) { ++ if (reiser4_carry_real(scan) == node) ++ return scan; ++ } ++ return NULL; ++} ++ ++znode *reiser4_carry_real(const carry_node * node) ++{ ++ assert("nikita-3061", node != NULL); ++ ++ return node->lock_handle.node; ++} ++ ++carry_node *insert_carry_node(carry_level * doing, carry_level * todo, ++ const znode * node) ++{ ++ carry_node *base; ++ carry_node *scan; ++ carry_node *tmp_scan; ++ carry_node *proj; ++ ++ base = find_carry_node(doing, node); ++ assert("nikita-2204", base != NULL); ++ ++ for_all_nodes(todo, scan, tmp_scan) { ++ proj = find_carry_node(doing, scan->node); ++ assert("nikita-2205", proj != NULL); ++ if (carry_node_cmp(doing, proj, base) != LESS_THAN) ++ break; ++ } ++ return scan; ++} ++ ++static carry_node *add_carry_atplace(carry_level * doing, carry_level * todo, ++ znode * node) ++{ ++ carry_node *reference; ++ ++ assert("nikita-2994", doing != NULL); ++ assert("nikita-2995", todo != NULL); ++ assert("nikita-2996", node != NULL); ++ ++ reference = insert_carry_node(doing, todo, node); ++ assert("nikita-2997", reference != NULL); ++ ++ return reiser4_add_carry(todo, POOLO_BEFORE, reference); ++} ++ ++/* like reiser4_post_carry(), but designed to be called from node plugin methods. ++ This function is different from reiser4_post_carry() in that it finds proper ++ place to insert node in the queue. */ ++carry_op *node_post_carry(carry_plugin_info * info /* carry parameters ++ * passed down to node ++ * plugin */ , ++ carry_opcode op /* opcode of operation */ , ++ znode * node /* node on which this ++ * operation will operate */ , ++ int apply_to_parent_p /* whether operation will ++ * operate directly on @node ++ * or on it parent. */ ) ++{ ++ carry_op *result; ++ carry_node *child; ++ ++ assert("nikita-2207", info != NULL); ++ assert("nikita-2208", info->todo != NULL); ++ ++ if (info->doing == NULL) ++ return reiser4_post_carry(info->todo, op, node, ++ apply_to_parent_p); ++ ++ result = add_op(info->todo, POOLO_LAST, NULL); ++ if (IS_ERR(result)) ++ return result; ++ child = add_carry_atplace(info->doing, info->todo, node); ++ if (IS_ERR(child)) { ++ reiser4_pool_free(&info->todo->pool->op_pool, &result->header); ++ return (carry_op *) child; ++ } ++ result->node = child; ++ result->op = op; ++ child->parent = apply_to_parent_p; ++ if (ZF_ISSET(node, JNODE_ORPHAN)) ++ child->left_before = 1; ++ child->node = node; ++ return result; ++} ++ ++/* lock all carry nodes in @level */ ++static int lock_carry_level(carry_level * level /* level to lock */ ) ++{ ++ int result; ++ carry_node *node; ++ carry_node *tmp_node; ++ ++ assert("nikita-881", level != NULL); ++ assert("nikita-2229", carry_level_invariant(level, CARRY_TODO)); ++ ++ /* lock nodes from left to right */ ++ result = 0; ++ for_all_nodes(level, node, tmp_node) { ++ result = lock_carry_node(level, node); ++ if (result != 0) ++ break; ++ } ++ return result; ++} ++ ++/* Synchronize delimiting keys between @node and its left neighbor. ++ ++ To reduce contention on dk key and simplify carry code, we synchronize ++ delimiting keys only when carry ultimately leaves tree level (carrying ++ changes upward) and unlocks nodes at this level. ++ ++ This function first finds left neighbor of @node and then updates left ++ neighbor's right delimiting key to conincide with least key in @node. ++ ++*/ ++ ++ON_DEBUG(extern atomic_t delim_key_version; ++ ) ++ ++static void sync_dkeys(znode * spot /* node to update */ ) ++{ ++ reiser4_key pivot; ++ reiser4_tree *tree; ++ ++ assert("nikita-1610", spot != NULL); ++ assert("nikita-1612", LOCK_CNT_NIL(rw_locked_dk)); ++ ++ tree = znode_get_tree(spot); ++ read_lock_tree(tree); ++ write_lock_dk(tree); ++ ++ assert("nikita-2192", znode_is_loaded(spot)); ++ ++ /* sync left delimiting key of @spot with key in its leftmost item */ ++ if (node_is_empty(spot)) ++ pivot = *znode_get_rd_key(spot); ++ else ++ leftmost_key_in_node(spot, &pivot); ++ ++ znode_set_ld_key(spot, &pivot); ++ ++ /* there can be sequence of empty nodes pending removal on the left of ++ @spot. Scan them and update their left and right delimiting keys to ++ match left delimiting key of @spot. Also, update right delimiting ++ key of first non-empty left neighbor. ++ */ ++ while (1) { ++ if (!ZF_ISSET(spot, JNODE_LEFT_CONNECTED)) ++ break; ++ ++ spot = spot->left; ++ if (spot == NULL) ++ break; ++ ++ znode_set_rd_key(spot, &pivot); ++ /* don't sink into the domain of another balancing */ ++ if (!znode_is_write_locked(spot)) ++ break; ++ if (ZF_ISSET(spot, JNODE_HEARD_BANSHEE)) ++ znode_set_ld_key(spot, &pivot); ++ else ++ break; ++ } ++ ++ write_unlock_dk(tree); ++ read_unlock_tree(tree); ++} ++ ++/* unlock all carry nodes in @level */ ++static void unlock_carry_level(carry_level * level /* level to unlock */ , ++ int failure /* true if unlocking owing to ++ * failure */ ) ++{ ++ carry_node *node; ++ carry_node *tmp_node; ++ ++ assert("nikita-889", level != NULL); ++ ++ if (!failure) { ++ znode *spot; ++ ++ spot = NULL; ++ /* update delimiting keys */ ++ for_all_nodes(level, node, tmp_node) { ++ if (reiser4_carry_real(node) != spot) { ++ spot = reiser4_carry_real(node); ++ sync_dkeys(spot); ++ } ++ } ++ } ++ ++ /* nodes can be unlocked in arbitrary order. In preemptible ++ environment it's better to unlock in reverse order of locking, ++ though. ++ */ ++ for_all_nodes_back(level, node, tmp_node) { ++ /* all allocated nodes should be already linked to their ++ parents at this moment. */ ++ assert("nikita-1631", ++ ergo(!failure, !ZF_ISSET(reiser4_carry_real(node), ++ JNODE_ORPHAN))); ++ ON_DEBUG(check_dkeys(reiser4_carry_real(node))); ++ unlock_carry_node(level, node, failure); ++ } ++ level->new_root = NULL; ++} ++ ++/* finish with @level ++ ++ Unlock nodes and release all allocated resources */ ++static void done_carry_level(carry_level * level /* level to finish */ ) ++{ ++ carry_node *node; ++ carry_node *tmp_node; ++ carry_op *op; ++ carry_op *tmp_op; ++ ++ assert("nikita-1076", level != NULL); ++ ++ unlock_carry_level(level, 0); ++ for_all_nodes(level, node, tmp_node) { ++ assert("nikita-2113", list_empty_careful(&node->lock_handle.locks_link)); ++ assert("nikita-2114", list_empty_careful(&node->lock_handle.owners_link)); ++ reiser4_pool_free(&level->pool->node_pool, &node->header); ++ } ++ for_all_ops(level, op, tmp_op) ++ reiser4_pool_free(&level->pool->op_pool, &op->header); ++} ++ ++/* helper function to complete locking of carry node ++ ++ Finish locking of carry node. There are several ways in which new carry ++ node can be added into carry level and locked. Normal is through ++ lock_carry_node(), but also from find_{left|right}_neighbor(). This ++ function factors out common final part of all locking scenarios. It ++ supposes that @node -> lock_handle is lock handle for lock just taken and ++ fills ->real_node from this lock handle. ++ ++*/ ++int lock_carry_node_tail(carry_node * node /* node to complete locking of */ ) ++{ ++ assert("nikita-1052", node != NULL); ++ assert("nikita-1187", reiser4_carry_real(node) != NULL); ++ assert("nikita-1188", !node->unlock); ++ ++ node->unlock = 1; ++ /* Load node content into memory and install node plugin by ++ looking at the node header. ++ ++ Most of the time this call is cheap because the node is ++ already in memory. ++ ++ Corresponding zrelse() is in unlock_carry_node() ++ */ ++ return zload(reiser4_carry_real(node)); ++} ++ ++/* lock carry node ++ ++ "Resolve" node to real znode, lock it and mark as locked. ++ This requires recursive locking of znodes. ++ ++ When operation is posted to the parent level, node it will be applied to is ++ not yet known. For example, when shifting data between two nodes, ++ delimiting has to be updated in parent or parents of nodes involved. But ++ their parents is not yet locked and, moreover said nodes can be reparented ++ by concurrent balancing. ++ ++ To work around this, carry operation is applied to special "carry node" ++ rather than to the znode itself. Carry node consists of some "base" or ++ "reference" znode and flags indicating how to get to the target of carry ++ operation (->real_node field of carry_node) from base. ++ ++*/ ++int lock_carry_node(carry_level * level /* level @node is in */ , ++ carry_node * node /* node to lock */ ) ++{ ++ int result; ++ znode *reference_point; ++ lock_handle lh; ++ lock_handle tmp_lh; ++ reiser4_tree *tree; ++ ++ assert("nikita-887", level != NULL); ++ assert("nikita-882", node != NULL); ++ ++ result = 0; ++ reference_point = node->node; ++ init_lh(&lh); ++ init_lh(&tmp_lh); ++ if (node->left_before) { ++ /* handling of new nodes, allocated on the previous level: ++ ++ some carry ops were propably posted from the new node, but ++ this node neither has parent pointer set, nor is ++ connected. This will be done in ->create_hook() for ++ internal item. ++ ++ No then less, parent of new node has to be locked. To do ++ this, first go to the "left" in the carry order. This ++ depends on the decision to always allocate new node on the ++ right of existing one. ++ ++ Loop handles case when multiple nodes, all orphans, were ++ inserted. ++ ++ Strictly speaking, taking tree lock is not necessary here, ++ because all nodes scanned by loop in ++ find_begetting_brother() are write-locked by this thread, ++ and thus, their sibling linkage cannot change. ++ ++ */ ++ tree = znode_get_tree(reference_point); ++ read_lock_tree(tree); ++ reference_point = find_begetting_brother(node, level)->node; ++ read_unlock_tree(tree); ++ assert("nikita-1186", reference_point != NULL); ++ } ++ if (node->parent && (result == 0)) { ++ result = ++ reiser4_get_parent(&tmp_lh, reference_point, ++ ZNODE_WRITE_LOCK); ++ if (result != 0) { ++ ; /* nothing */ ++ } else if (znode_get_level(tmp_lh.node) == 0) { ++ assert("nikita-1347", znode_above_root(tmp_lh.node)); ++ result = add_new_root(level, node, tmp_lh.node); ++ if (result == 0) { ++ reference_point = level->new_root; ++ move_lh(&lh, &node->lock_handle); ++ } ++ } else if ((level->new_root != NULL) ++ && (level->new_root != ++ znode_parent_nolock(reference_point))) { ++ /* parent of node exists, but this level aready ++ created different new root, so */ ++ warning("nikita-1109", ++ /* it should be "radicis", but tradition is ++ tradition. do banshees read latin? */ ++ "hodie natus est radici frater"); ++ result = -EIO; ++ } else { ++ move_lh(&lh, &tmp_lh); ++ reference_point = lh.node; ++ } ++ } ++ if (node->left && (result == 0)) { ++ assert("nikita-1183", node->parent); ++ assert("nikita-883", reference_point != NULL); ++ result = ++ reiser4_get_left_neighbor(&tmp_lh, reference_point, ++ ZNODE_WRITE_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ if (result == 0) { ++ done_lh(&lh); ++ move_lh(&lh, &tmp_lh); ++ reference_point = lh.node; ++ } ++ } ++ if (!node->parent && !node->left && !node->left_before) { ++ result = ++ longterm_lock_znode(&lh, reference_point, ZNODE_WRITE_LOCK, ++ ZNODE_LOCK_HIPRI); ++ } ++ if (result == 0) { ++ move_lh(&node->lock_handle, &lh); ++ result = lock_carry_node_tail(node); ++ } ++ done_lh(&tmp_lh); ++ done_lh(&lh); ++ return result; ++} ++ ++/* release a lock on &carry_node. ++ ++ Release if necessary lock on @node. This opearion is pair of ++ lock_carry_node() and is idempotent: you can call it more than once on the ++ same node. ++ ++*/ ++static void ++unlock_carry_node(carry_level * level, ++ carry_node * node /* node to be released */ , ++ int failure /* 0 if node is unlocked due ++ * to some error */ ) ++{ ++ znode *real_node; ++ ++ assert("nikita-884", node != NULL); ++ ++ real_node = reiser4_carry_real(node); ++ /* pair to zload() in lock_carry_node_tail() */ ++ zrelse(real_node); ++ if (node->unlock && (real_node != NULL)) { ++ assert("nikita-899", real_node == node->lock_handle.node); ++ longterm_unlock_znode(&node->lock_handle); ++ } ++ if (failure) { ++ if (node->deallocate && (real_node != NULL)) { ++ /* free node in bitmap ++ ++ Prepare node for removal. Last zput() will finish ++ with it. ++ */ ++ ZF_SET(real_node, JNODE_HEARD_BANSHEE); ++ } ++ if (node->free) { ++ assert("nikita-2177", ++ list_empty_careful(&node->lock_handle.locks_link)); ++ assert("nikita-2112", ++ list_empty_careful(&node->lock_handle.owners_link)); ++ reiser4_pool_free(&level->pool->node_pool, ++ &node->header); ++ } ++ } ++} ++ ++/* fatal_carry_error() - all-catching error handling function ++ ++ It is possible that carry faces unrecoverable error, like unability to ++ insert pointer at the internal level. Our simple solution is just panic in ++ this situation. More sophisticated things like attempt to remount ++ file-system as read-only can be implemented without much difficlties. ++ ++ It is believed, that: ++ ++ 1. in stead of panicking, all current transactions can be aborted rolling ++ system back to the consistent state. ++ ++Umm, if you simply panic without doing anything more at all, then all current ++transactions are aborted and the system is rolled back to a consistent state, ++by virtue of the design of the transactional mechanism. Well, wait, let's be ++precise. If an internal node is corrupted on disk due to hardware failure, ++then there may be no consistent state that can be rolled back to, so instead ++we should say that it will rollback the transactions, which barring other ++factors means rolling back to a consistent state. ++ ++# Nikita: there is a subtle difference between panic and aborting ++# transactions: machine doesn't reboot. Processes aren't killed. Processes ++# don't using reiser4 (not that we care about such processes), or using other ++# reiser4 mounts (about them we do care) will simply continue to run. With ++# some luck, even application using aborted file system can survive: it will ++# get some error, like EBADF, from each file descriptor on failed file system, ++# but applications that do care about tolerance will cope with this (squid ++# will). ++ ++It would be a nice feature though to support rollback without rebooting ++followed by remount, but this can wait for later versions. ++ ++ 2. once isolated transactions will be implemented it will be possible to ++ roll back offending transaction. ++ ++2. is additional code complexity of inconsistent value (it implies that a broken tree should be kept in operation), so we must think about ++it more before deciding if it should be done. -Hans ++ ++*/ ++static void fatal_carry_error(carry_level * doing UNUSED_ARG /* carry level ++ * where ++ * unrecoverable ++ * error ++ * occurred */ , ++ int ecode /* error code */ ) ++{ ++ assert("nikita-1230", doing != NULL); ++ assert("nikita-1231", ecode < 0); ++ ++ reiser4_panic("nikita-1232", "Carry failed: %i", ecode); ++} ++ ++/* add new root to the tree ++ ++ This function itself only manages changes in carry structures and delegates ++ all hard work (allocation of znode for new root, changes of parent and ++ sibling pointers to the reiser4_add_tree_root(). ++ ++ Locking: old tree root is locked by carry at this point. Fake znode is also ++ locked. ++ ++*/ ++static int add_new_root(carry_level * level /* carry level in context of which ++ * operation is performed */ , ++ carry_node * node /* carry node for existing root */ , ++ znode * fake /* "fake" znode already locked by ++ * us */ ) ++{ ++ int result; ++ ++ assert("nikita-1104", level != NULL); ++ assert("nikita-1105", node != NULL); ++ ++ assert("nikita-1403", znode_is_write_locked(node->node)); ++ assert("nikita-1404", znode_is_write_locked(fake)); ++ ++ /* trying to create new root. */ ++ /* @node is root and it's already locked by us. This ++ means that nobody else can be trying to add/remove ++ tree root right now. ++ */ ++ if (level->new_root == NULL) ++ level->new_root = reiser4_add_tree_root(node->node, fake); ++ if (!IS_ERR(level->new_root)) { ++ assert("nikita-1210", znode_is_root(level->new_root)); ++ node->deallocate = 1; ++ result = ++ longterm_lock_znode(&node->lock_handle, level->new_root, ++ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI); ++ if (result == 0) ++ zput(level->new_root); ++ } else { ++ result = PTR_ERR(level->new_root); ++ level->new_root = NULL; ++ } ++ return result; ++} ++ ++/* allocate new znode and add the operation that inserts the ++ pointer to it into the parent node into the todo level ++ ++ Allocate new znode, add it into carry queue and post into @todo queue ++ request to add pointer to new node into its parent. ++ ++ This is carry related routing that calls reiser4_new_node() to allocate new ++ node. ++*/ ++carry_node *add_new_znode(znode * brother /* existing left neighbor of new ++ * node */ , ++ carry_node * ref /* carry node after which new ++ * carry node is to be inserted ++ * into queue. This affects ++ * locking. */ , ++ carry_level * doing /* carry queue where new node is ++ * to be added */ , ++ carry_level * todo /* carry queue where COP_INSERT ++ * operation to add pointer to ++ * new node will ne added */ ) ++{ ++ carry_node *fresh; ++ znode *new_znode; ++ carry_op *add_pointer; ++ carry_plugin_info info; ++ ++ assert("nikita-1048", brother != NULL); ++ assert("nikita-1049", todo != NULL); ++ ++ /* There is a lot of possible variations here: to what parent ++ new node will be attached and where. For simplicity, always ++ do the following: ++ ++ (1) new node and @brother will have the same parent. ++ ++ (2) new node is added on the right of @brother ++ ++ */ ++ ++ fresh = reiser4_add_carry_skip(doing, ++ ref ? POOLO_AFTER : POOLO_LAST, ref); ++ if (IS_ERR(fresh)) ++ return fresh; ++ ++ fresh->deallocate = 1; ++ fresh->free = 1; ++ ++ new_znode = reiser4_new_node(brother, znode_get_level(brother)); ++ if (IS_ERR(new_znode)) ++ /* @fresh will be deallocated automatically by error ++ handling code in the caller. */ ++ return (carry_node *) new_znode; ++ ++ /* new_znode returned znode with x_count 1. Caller has to decrease ++ it. make_space() does. */ ++ ++ ZF_SET(new_znode, JNODE_ORPHAN); ++ fresh->node = new_znode; ++ ++ while (ZF_ISSET(reiser4_carry_real(ref), JNODE_ORPHAN)) { ++ ref = carry_node_prev(ref); ++ assert("nikita-1606", !carry_node_end(doing, ref)); ++ } ++ ++ info.todo = todo; ++ info.doing = doing; ++ add_pointer = node_post_carry(&info, COP_INSERT, ++ reiser4_carry_real(ref), 1); ++ if (IS_ERR(add_pointer)) { ++ /* no need to deallocate @new_znode here: it will be ++ deallocated during carry error handling. */ ++ return (carry_node *) add_pointer; ++ } ++ ++ add_pointer->u.insert.type = COPT_CHILD; ++ add_pointer->u.insert.child = fresh; ++ add_pointer->u.insert.brother = brother; ++ /* initially new node spawns empty key range */ ++ write_lock_dk(znode_get_tree(brother)); ++ znode_set_ld_key(new_znode, ++ znode_set_rd_key(new_znode, ++ znode_get_rd_key(brother))); ++ write_unlock_dk(znode_get_tree(brother)); ++ return fresh; ++} ++ ++/* DEBUGGING FUNCTIONS. ++ ++ Probably we also should leave them on even when ++ debugging is turned off to print dumps at errors. ++*/ ++#if REISER4_DEBUG ++static int carry_level_invariant(carry_level * level, carry_queue_state state) ++{ ++ carry_node *node; ++ carry_node *tmp_node; ++ ++ if (level == NULL) ++ return 0; ++ ++ if (level->track_type != 0 && ++ level->track_type != CARRY_TRACK_NODE && ++ level->track_type != CARRY_TRACK_CHANGE) ++ return 0; ++ ++ /* check that nodes are in ascending order */ ++ for_all_nodes(level, node, tmp_node) { ++ znode *left; ++ znode *right; ++ ++ reiser4_key lkey; ++ reiser4_key rkey; ++ ++ if (node != carry_node_front(level)) { ++ if (state == CARRY_TODO) { ++ right = node->node; ++ left = carry_node_prev(node)->node; ++ } else { ++ right = reiser4_carry_real(node); ++ left = reiser4_carry_real(carry_node_prev(node)); ++ } ++ if (right == NULL || left == NULL) ++ continue; ++ if (node_is_empty(right) || node_is_empty(left)) ++ continue; ++ if (!keyle(leftmost_key_in_node(left, &lkey), ++ leftmost_key_in_node(right, &rkey))) { ++ warning("", "wrong key order"); ++ return 0; ++ } ++ } ++ } ++ return 1; ++} ++#endif ++ ++/* get symbolic name for boolean */ ++static const char *tf(int boolean /* truth value */ ) ++{ ++ return boolean ? "t" : "f"; ++} ++ ++/* symbolic name for carry operation */ ++static const char *carry_op_name(carry_opcode op /* carry opcode */ ) ++{ ++ switch (op) { ++ case COP_INSERT: ++ return "COP_INSERT"; ++ case COP_DELETE: ++ return "COP_DELETE"; ++ case COP_CUT: ++ return "COP_CUT"; ++ case COP_PASTE: ++ return "COP_PASTE"; ++ case COP_UPDATE: ++ return "COP_UPDATE"; ++ case COP_EXTENT: ++ return "COP_EXTENT"; ++ case COP_INSERT_FLOW: ++ return "COP_INSERT_FLOW"; ++ default:{ ++ /* not mt safe, but who cares? */ ++ static char buf[20]; ++ ++ sprintf(buf, "unknown op: %x", op); ++ return buf; ++ } ++ } ++} ++ ++/* dump information about carry node */ ++static void print_carry(const char *prefix /* prefix to print */ , ++ carry_node * node /* node to print */ ) ++{ ++ if (node == NULL) { ++ printk("%s: null\n", prefix); ++ return; ++ } ++ printk ++ ("%s: %p parent: %s, left: %s, unlock: %s, free: %s, dealloc: %s\n", ++ prefix, node, tf(node->parent), tf(node->left), tf(node->unlock), ++ tf(node->free), tf(node->deallocate)); ++} ++ ++/* dump information about carry operation */ ++static void print_op(const char *prefix /* prefix to print */ , ++ carry_op * op /* operation to print */ ) ++{ ++ if (op == NULL) { ++ printk("%s: null\n", prefix); ++ return; ++ } ++ printk("%s: %p carry_opcode: %s\n", prefix, op, carry_op_name(op->op)); ++ print_carry("\tnode", op->node); ++ switch (op->op) { ++ case COP_INSERT: ++ case COP_PASTE: ++ print_coord("\tcoord", ++ op->u.insert.d ? op->u.insert.d->coord : NULL, 0); ++ reiser4_print_key("\tkey", ++ op->u.insert.d ? op->u.insert.d->key : NULL); ++ print_carry("\tchild", op->u.insert.child); ++ break; ++ case COP_DELETE: ++ print_carry("\tchild", op->u.delete.child); ++ break; ++ case COP_CUT: ++ if (op->u.cut_or_kill.is_cut) { ++ print_coord("\tfrom", ++ op->u.cut_or_kill.u.kill->params.from, 0); ++ print_coord("\tto", op->u.cut_or_kill.u.kill->params.to, ++ 0); ++ } else { ++ print_coord("\tfrom", ++ op->u.cut_or_kill.u.cut->params.from, 0); ++ print_coord("\tto", op->u.cut_or_kill.u.cut->params.to, ++ 0); ++ } ++ break; ++ case COP_UPDATE: ++ print_carry("\tleft", op->u.update.left); ++ break; ++ default: ++ /* do nothing */ ++ break; ++ } ++} ++ ++/* dump information about all nodes and operations in a @level */ ++static void print_level(const char *prefix /* prefix to print */ , ++ carry_level * level /* level to print */ ) ++{ ++ carry_node *node; ++ carry_node *tmp_node; ++ carry_op *op; ++ carry_op *tmp_op; ++ ++ if (level == NULL) { ++ printk("%s: null\n", prefix); ++ return; ++ } ++ printk("%s: %p, restartable: %s\n", ++ prefix, level, tf(level->restartable)); ++ ++ for_all_nodes(level, node, tmp_node) ++ print_carry("\tcarry node", node); ++ for_all_ops(level, op, tmp_op) ++ print_op("\tcarry op", op); ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/carry.h b/fs/reiser4/carry.h +new file mode 100644 +index 0000000..6341d73 +--- /dev/null ++++ b/fs/reiser4/carry.h +@@ -0,0 +1,442 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Functions and data types to "carry" tree modification(s) upward. ++ See fs/reiser4/carry.c for details. */ ++ ++#if !defined( __FS_REISER4_CARRY_H__ ) ++#define __FS_REISER4_CARRY_H__ ++ ++#include "forward.h" ++#include "debug.h" ++#include "pool.h" ++#include "znode.h" ++ ++#include ++ ++/* &carry_node - "location" of carry node. ++ ++ "location" of node that is involved or going to be involved into ++ carry process. Node where operation will be carried to on the ++ parent level cannot be recorded explicitly. Operation will be carried ++ usually to the parent of some node (where changes are performed at ++ the current level) or, to the left neighbor of its parent. But while ++ modifications are performed at the current level, parent may ++ change. So, we have to allow some indirection (or, positevly, ++ flexibility) in locating carry nodes. ++ ++*/ ++typedef struct carry_node { ++ /* pool linkage */ ++ reiser4_pool_header header; ++ ++ /* base node from which real_node is calculated. See ++ fs/reiser4/carry.c:lock_carry_node(). */ ++ znode *node; ++ ++ /* how to get ->real_node */ ++ /* to get ->real_node obtain parent of ->node */ ++ __u32 parent:1; ++ /* to get ->real_node obtain left neighbor of parent of ++ ->node */ ++ __u32 left:1; ++ __u32 left_before:1; ++ ++ /* locking */ ++ ++ /* this node was locked by carry process and should be ++ unlocked when carry leaves a level */ ++ __u32 unlock:1; ++ ++ /* disk block for this node was allocated by carry process and ++ should be deallocated when carry leaves a level */ ++ __u32 deallocate:1; ++ /* this carry node was allocated by carry process and should be ++ freed when carry leaves a level */ ++ __u32 free:1; ++ ++ /* type of lock we want to take on this node */ ++ lock_handle lock_handle; ++} carry_node; ++ ++/* &carry_opcode - elementary operations that can be carried upward ++ ++ Operations that carry() can handle. This list is supposed to be ++ expanded. ++ ++ Each carry operation (cop) is handled by appropriate function defined ++ in fs/reiser4/carry.c. For example COP_INSERT is handled by ++ fs/reiser4/carry.c:carry_insert() etc. These functions in turn ++ call plugins of nodes affected by operation to modify nodes' content ++ and to gather operations to be performed on the next level. ++ ++*/ ++typedef enum { ++ /* insert new item into node. */ ++ COP_INSERT, ++ /* delete pointer from parent node */ ++ COP_DELETE, ++ /* remove part of or whole node. */ ++ COP_CUT, ++ /* increase size of item. */ ++ COP_PASTE, ++ /* insert extent (that is sequence of unformatted nodes). */ ++ COP_EXTENT, ++ /* update delimiting key in least common ancestor of two ++ nodes. This is performed when items are moved between two ++ nodes. ++ */ ++ COP_UPDATE, ++ /* insert flow */ ++ COP_INSERT_FLOW, ++ COP_LAST_OP, ++} carry_opcode; ++ ++#define CARRY_FLOW_NEW_NODES_LIMIT 20 ++ ++/* mode (or subtype) of COP_{INSERT|PASTE} operation. Specifies how target ++ item is determined. */ ++typedef enum { ++ /* target item is one containing pointer to the ->child node */ ++ COPT_CHILD, ++ /* target item is given explicitly by @coord */ ++ COPT_ITEM_DATA, ++ /* target item is given by key */ ++ COPT_KEY, ++ /* see insert_paste_common() for more comments on this. */ ++ COPT_PASTE_RESTARTED, ++} cop_insert_pos_type; ++ ++/* flags to cut and delete */ ++typedef enum { ++ /* don't kill node even if it became completely empty as results of ++ * cut. This is needed for eottl handling. See carry_extent() for ++ * details. */ ++ DELETE_RETAIN_EMPTY = (1 << 0) ++} cop_delete_flag; ++ ++/* ++ * carry() implements "lock handle tracking" feature. ++ * ++ * Callers supply carry with node where to perform initial operation and lock ++ * handle on this node. Trying to optimize node utilization carry may actually ++ * move insertion point to different node. Callers expect that lock handle ++ * will rebe transferred to the new node also. ++ * ++ */ ++typedef enum { ++ /* transfer lock handle along with insertion point */ ++ CARRY_TRACK_CHANGE = 1, ++ /* acquire new lock handle to the node where insertion point is. This ++ * is used when carry() client doesn't initially possess lock handle ++ * on the insertion point node, for example, by extent insertion ++ * code. See carry_extent(). */ ++ CARRY_TRACK_NODE = 2 ++} carry_track_type; ++ ++/* data supplied to COP_{INSERT|PASTE} by callers */ ++typedef struct carry_insert_data { ++ /* position where new item is to be inserted */ ++ coord_t *coord; ++ /* new item description */ ++ reiser4_item_data *data; ++ /* key of new item */ ++ const reiser4_key *key; ++} carry_insert_data; ++ ++/* cut and kill are similar, so carry_cut_data and carry_kill_data share the below structure of parameters */ ++struct cut_kill_params { ++ /* coord where cut starts (inclusive) */ ++ coord_t *from; ++ /* coord where cut stops (inclusive, this item/unit will also be ++ * cut) */ ++ coord_t *to; ++ /* starting key. This is necessary when item and unit pos don't ++ * uniquely identify what portion or tree to remove. For example, this ++ * indicates what portion of extent unit will be affected. */ ++ const reiser4_key *from_key; ++ /* exclusive stop key */ ++ const reiser4_key *to_key; ++ /* if this is not NULL, smallest actually removed key is stored ++ * here. */ ++ reiser4_key *smallest_removed; ++ /* kill_node_content() is called for file truncate */ ++ int truncate; ++}; ++ ++struct carry_cut_data { ++ struct cut_kill_params params; ++}; ++ ++struct carry_kill_data { ++ struct cut_kill_params params; ++ /* parameter to be passed to the ->kill_hook() method of item ++ * plugin */ ++ /*void *iplug_params; *//* FIXME: unused currently */ ++ /* if not NULL---inode whose items are being removed. This is needed ++ * for ->kill_hook() of extent item to update VM structures when ++ * removing pages. */ ++ struct inode *inode; ++ /* sibling list maintenance is complicated by existence of eottl. When ++ * eottl whose left and right neighbors are formatted leaves is ++ * removed, one has to connect said leaves in the sibling list. This ++ * cannot be done when extent removal is just started as locking rules ++ * require sibling list update to happen atomically with removal of ++ * extent item. Therefore: 1. pointers to left and right neighbors ++ * have to be passed down to the ->kill_hook() of extent item, and ++ * 2. said neighbors have to be locked. */ ++ lock_handle *left; ++ lock_handle *right; ++ /* flags modifying behavior of kill. Currently, it may have DELETE_RETAIN_EMPTY set. */ ++ unsigned flags; ++ char *buf; ++}; ++ ++/* &carry_tree_op - operation to "carry" upward. ++ ++ Description of an operation we want to "carry" to the upper level of ++ a tree: e.g, when we insert something and there is not enough space ++ we allocate a new node and "carry" the operation of inserting a ++ pointer to the new node to the upper level, on removal of empty node, ++ we carry up operation of removing appropriate entry from parent. ++ ++ There are two types of carry ops: when adding or deleting node we ++ node at the parent level where appropriate modification has to be ++ performed is known in advance. When shifting items between nodes ++ (split, merge), delimiting key should be changed in the least common ++ parent of the nodes involved that is not known in advance. ++ ++ For the operations of the first type we store in &carry_op pointer to ++ the &carry_node at the parent level. For the operation of the second ++ type we store &carry_node or parents of the left and right nodes ++ modified and keep track of them upward until they coincide. ++ ++*/ ++typedef struct carry_op { ++ /* pool linkage */ ++ reiser4_pool_header header; ++ carry_opcode op; ++ /* node on which operation is to be performed: ++ ++ for insert, paste: node where new item is to be inserted ++ ++ for delete: node where pointer is to be deleted ++ ++ for cut: node to cut from ++ ++ for update: node where delimiting key is to be modified ++ ++ for modify: parent of modified node ++ ++ */ ++ carry_node *node; ++ union { ++ struct { ++ /* (sub-)type of insertion/paste. Taken from ++ cop_insert_pos_type. */ ++ __u8 type; ++ /* various operation flags. Taken from ++ cop_insert_flag. */ ++ __u8 flags; ++ carry_insert_data *d; ++ carry_node *child; ++ znode *brother; ++ } insert, paste, extent; ++ ++ struct { ++ int is_cut; ++ union { ++ carry_kill_data *kill; ++ carry_cut_data *cut; ++ } u; ++ } cut_or_kill; ++ ++ struct { ++ carry_node *left; ++ } update; ++ struct { ++ /* changed child */ ++ carry_node *child; ++ /* bitmask of changes. See &cop_modify_flag */ ++ __u32 flag; ++ } modify; ++ struct { ++ /* flags to deletion operation. Are taken from ++ cop_delete_flag */ ++ __u32 flags; ++ /* child to delete from parent. If this is ++ NULL, delete op->node. */ ++ carry_node *child; ++ } delete; ++ struct { ++ /* various operation flags. Taken from ++ cop_insert_flag. */ ++ __u32 flags; ++ flow_t *flow; ++ coord_t *insert_point; ++ reiser4_item_data *data; ++ /* flow insertion is limited by number of new blocks ++ added in that operation which do not get any data ++ but part of flow. This limit is set by macro ++ CARRY_FLOW_NEW_NODES_LIMIT. This field stores number ++ of nodes added already during one carry_flow */ ++ int new_nodes; ++ } insert_flow; ++ } u; ++} carry_op; ++ ++/* &carry_op_pool - preallocated pool of carry operations, and nodes */ ++typedef struct carry_pool { ++ carry_op op[CARRIES_POOL_SIZE]; ++ reiser4_pool op_pool; ++ carry_node node[NODES_LOCKED_POOL_SIZE]; ++ reiser4_pool node_pool; ++} carry_pool; ++ ++/* &carry_tree_level - carry process on given level ++ ++ Description of balancing process on the given level. ++ ++ No need for locking here, as carry_tree_level is essentially per ++ thread thing (for now). ++ ++*/ ++struct carry_level { ++ /* this level may be restarted */ ++ __u32 restartable:1; ++ /* list of carry nodes on this level, ordered by key order */ ++ struct list_head nodes; ++ struct list_head ops; ++ /* pool where new objects are allocated from */ ++ carry_pool *pool; ++ int ops_num; ++ int nodes_num; ++ /* new root created on this level, if any */ ++ znode *new_root; ++ /* This is set by caller (insert_by_key(), rreiser4_esize_item(), etc.) ++ when they want ->tracked to automagically wander to the node where ++ insertion point moved after insert or paste. ++ */ ++ carry_track_type track_type; ++ /* lock handle supplied by user that we are tracking. See ++ above. */ ++ lock_handle *tracked; ++}; ++ ++/* information carry passes to plugin methods that may add new operations to ++ the @todo queue */ ++struct carry_plugin_info { ++ carry_level *doing; ++ carry_level *todo; ++}; ++ ++int reiser4_carry(carry_level * doing, carry_level * done); ++ ++carry_node *reiser4_add_carry(carry_level * level, pool_ordering order, ++ carry_node * reference); ++carry_node *reiser4_add_carry_skip(carry_level * level, pool_ordering order, ++ carry_node * reference); ++ ++extern carry_node *insert_carry_node(carry_level * doing, ++ carry_level * todo, const znode * node); ++ ++extern carry_pool *init_carry_pool(int); ++extern void done_carry_pool(carry_pool * pool); ++ ++extern void init_carry_level(carry_level * level, carry_pool * pool); ++ ++extern carry_op *reiser4_post_carry(carry_level * level, carry_opcode op, ++ znode * node, int apply_to_parent); ++extern carry_op *node_post_carry(carry_plugin_info * info, carry_opcode op, ++ znode * node, int apply_to_parent_p); ++ ++carry_node *add_new_znode(znode * brother, carry_node * reference, ++ carry_level * doing, carry_level * todo); ++ ++carry_node *find_carry_node(carry_level * level, const znode * node); ++ ++extern znode *reiser4_carry_real(const carry_node * node); ++ ++/* helper macros to iterate over carry queues */ ++ ++#define carry_node_next( node ) \ ++ list_entry((node)->header.level_linkage.next, carry_node, \ ++ header.level_linkage) ++ ++#define carry_node_prev( node ) \ ++ list_entry((node)->header.level_linkage.prev, carry_node, \ ++ header.level_linkage) ++ ++#define carry_node_front( level ) \ ++ list_entry((level)->nodes.next, carry_node, header.level_linkage) ++ ++#define carry_node_back( level ) \ ++ list_entry((level)->nodes.prev, carry_node, header.level_linkage) ++ ++#define carry_node_end( level, node ) \ ++ (&(level)->nodes == &(node)->header.level_linkage) ++ ++/* macro to iterate over all operations in a @level */ ++#define for_all_ops( level /* carry level (of type carry_level *) */, \ ++ op /* pointer to carry operation, modified by loop (of \ ++ * type carry_op *) */, \ ++ tmp /* pointer to carry operation (of type carry_op *), \ ++ * used to make iterator stable in the face of \ ++ * deletions from the level */ ) \ ++for (op = list_entry(level->ops.next, carry_op, header.level_linkage), \ ++ tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage); \ ++ &op->header.level_linkage != &level->ops; \ ++ op = tmp, \ ++ tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage)) ++ ++#if 0 ++for( op = ( carry_op * ) pool_level_list_front( &level -> ops ), \ ++ tmp = ( carry_op * ) pool_level_list_next( &op -> header ) ; \ ++ ! pool_level_list_end( &level -> ops, &op -> header ) ; \ ++ op = tmp, tmp = ( carry_op * ) pool_level_list_next( &op -> header ) ) ++#endif ++ ++/* macro to iterate over all nodes in a @level */ \ ++#define for_all_nodes( level /* carry level (of type carry_level *) */, \ ++ node /* pointer to carry node, modified by loop (of \ ++ * type carry_node *) */, \ ++ tmp /* pointer to carry node (of type carry_node *), \ ++ * used to make iterator stable in the face of * \ ++ * deletions from the level */ ) \ ++for (node = list_entry(level->nodes.next, carry_node, header.level_linkage), \ ++ tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage); \ ++ &node->header.level_linkage != &level->nodes; \ ++ node = tmp, \ ++ tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage)) ++ ++#if 0 ++for( node = carry_node_front( level ), \ ++ tmp = carry_node_next( node ) ; ! carry_node_end( level, node ) ; \ ++ node = tmp, tmp = carry_node_next( node ) ) ++#endif ++ ++/* macro to iterate over all nodes in a @level in reverse order ++ ++ This is used, because nodes are unlocked in reversed order of locking */ ++#define for_all_nodes_back( level /* carry level (of type carry_level *) */, \ ++ node /* pointer to carry node, modified by loop \ ++ * (of type carry_node *) */, \ ++ tmp /* pointer to carry node (of type carry_node \ ++ * *), used to make iterator stable in the \ ++ * face of deletions from the level */ ) \ ++for( node = carry_node_back( level ), \ ++ tmp = carry_node_prev( node ) ; ! carry_node_end( level, node ) ; \ ++ node = tmp, tmp = carry_node_prev( node ) ) ++ ++/* __FS_REISER4_CARRY_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/carry_ops.c b/fs/reiser4/carry_ops.c +new file mode 100644 +index 0000000..8ce8e95 +--- /dev/null ++++ b/fs/reiser4/carry_ops.c +@@ -0,0 +1,2131 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* implementation of carry operations */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/item/item.h" ++#include "plugin/node/node.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree_walk.h" ++#include "pool.h" ++#include "tree_mod.h" ++#include "carry.h" ++#include "carry_ops.h" ++#include "tree.h" ++#include "super.h" ++#include "reiser4.h" ++ ++#include ++#include ++ ++static int carry_shift_data(sideof side, coord_t * insert_coord, znode * node, ++ carry_level * doing, carry_level * todo, ++ unsigned int including_insert_coord_p); ++ ++extern int lock_carry_node(carry_level * level, carry_node * node); ++extern int lock_carry_node_tail(carry_node * node); ++ ++/* find left neighbor of a carry node ++ ++ Look for left neighbor of @node and add it to the @doing queue. See ++ comments in the body. ++ ++*/ ++static carry_node *find_left_neighbor(carry_op * op /* node to find left ++ * neighbor of */ , ++ carry_level * doing /* level to scan */ ) ++{ ++ int result; ++ carry_node *node; ++ carry_node *left; ++ int flags; ++ reiser4_tree *tree; ++ ++ node = op->node; ++ ++ tree = current_tree; ++ read_lock_tree(tree); ++ /* first, check whether left neighbor is already in a @doing queue */ ++ if (reiser4_carry_real(node)->left != NULL) { ++ /* NOTE: there is locking subtlety here. Look into ++ * find_right_neighbor() for more info */ ++ if (find_carry_node(doing, ++ reiser4_carry_real(node)->left) != NULL) { ++ read_unlock_tree(tree); ++ left = node; ++ do { ++ left = list_entry(left->header.level_linkage.prev, ++ carry_node, header.level_linkage); ++ assert("nikita-3408", !carry_node_end(doing, ++ left)); ++ } while (reiser4_carry_real(left) == ++ reiser4_carry_real(node)); ++ return left; ++ } ++ } ++ read_unlock_tree(tree); ++ ++ left = reiser4_add_carry_skip(doing, POOLO_BEFORE, node); ++ if (IS_ERR(left)) ++ return left; ++ ++ left->node = node->node; ++ left->free = 1; ++ ++ flags = GN_TRY_LOCK; ++ if (!op->u.insert.flags & COPI_LOAD_LEFT) ++ flags |= GN_NO_ALLOC; ++ ++ /* then, feeling lucky, peek left neighbor in the cache. */ ++ result = reiser4_get_left_neighbor(&left->lock_handle, ++ reiser4_carry_real(node), ++ ZNODE_WRITE_LOCK, flags); ++ if (result == 0) { ++ /* ok, node found and locked. */ ++ result = lock_carry_node_tail(left); ++ if (result != 0) ++ left = ERR_PTR(result); ++ } else if (result == -E_NO_NEIGHBOR || result == -ENOENT) { ++ /* node is leftmost node in a tree, or neighbor wasn't in ++ cache, or there is an extent on the left. */ ++ reiser4_pool_free(&doing->pool->node_pool, &left->header); ++ left = NULL; ++ } else if (doing->restartable) { ++ /* if left neighbor is locked, and level is restartable, add ++ new node to @doing and restart. */ ++ assert("nikita-913", node->parent != 0); ++ assert("nikita-914", node->node != NULL); ++ left->left = 1; ++ left->free = 0; ++ left = ERR_PTR(-E_REPEAT); ++ } else { ++ /* left neighbor is locked, level cannot be restarted. Just ++ ignore left neighbor. */ ++ reiser4_pool_free(&doing->pool->node_pool, &left->header); ++ left = NULL; ++ } ++ return left; ++} ++ ++/* find right neighbor of a carry node ++ ++ Look for right neighbor of @node and add it to the @doing queue. See ++ comments in the body. ++ ++*/ ++static carry_node *find_right_neighbor(carry_op * op /* node to find right ++ * neighbor of */ , ++ carry_level * doing /* level to scan */ ) ++{ ++ int result; ++ carry_node *node; ++ carry_node *right; ++ lock_handle lh; ++ int flags; ++ reiser4_tree *tree; ++ ++ init_lh(&lh); ++ ++ node = op->node; ++ ++ tree = current_tree; ++ read_lock_tree(tree); ++ /* first, check whether right neighbor is already in a @doing queue */ ++ if (reiser4_carry_real(node)->right != NULL) { ++ /* ++ * Tree lock is taken here anyway, because, even if _outcome_ ++ * of (find_carry_node() != NULL) doesn't depends on ++ * concurrent updates to ->right, find_carry_node() cannot ++ * work with second argument NULL. Hence, following comment is ++ * of historic importance only. ++ * ++ * Subtle: ++ * ++ * Q: why don't we need tree lock here, looking for the right ++ * neighbor? ++ * ++ * A: even if value of node->real_node->right were changed ++ * during find_carry_node() execution, outcome of execution ++ * wouldn't change, because (in short) other thread cannot add ++ * elements to the @doing, and if node->real_node->right ++ * already was in @doing, value of node->real_node->right ++ * couldn't change, because node cannot be inserted between ++ * locked neighbors. ++ */ ++ if (find_carry_node(doing, ++ reiser4_carry_real(node)->right) != NULL) { ++ read_unlock_tree(tree); ++ /* ++ * What we are doing here (this is also applicable to ++ * the find_left_neighbor()). ++ * ++ * tree_walk.c code requires that insertion of a ++ * pointer to a child, modification of parent pointer ++ * in the child, and insertion of the child into ++ * sibling list are atomic (see ++ * plugin/item/internal.c:create_hook_internal()). ++ * ++ * carry allocates new node long before pointer to it ++ * is inserted into parent and, actually, long before ++ * parent is even known. Such allocated-but-orphaned ++ * nodes are only trackable through carry level lists. ++ * ++ * Situation that is handled here is following: @node ++ * has valid ->right pointer, but there is ++ * allocated-but-orphaned node in the carry queue that ++ * is logically between @node and @node->right. Here ++ * we are searching for it. Critical point is that ++ * this is only possible if @node->right is also in ++ * the carry queue (this is checked above), because ++ * this is the only way new orphaned node could be ++ * inserted between them (before inserting new node, ++ * make_space() first tries to shift to the right, so, ++ * right neighbor will be locked and queued). ++ * ++ */ ++ right = node; ++ do { ++ right = list_entry(right->header.level_linkage.next, ++ carry_node, header.level_linkage); ++ assert("nikita-3408", !carry_node_end(doing, ++ right)); ++ } while (reiser4_carry_real(right) == ++ reiser4_carry_real(node)); ++ return right; ++ } ++ } ++ read_unlock_tree(tree); ++ ++ flags = GN_CAN_USE_UPPER_LEVELS; ++ if (!op->u.insert.flags & COPI_LOAD_RIGHT) ++ flags = GN_NO_ALLOC; ++ ++ /* then, try to lock right neighbor */ ++ init_lh(&lh); ++ result = reiser4_get_right_neighbor(&lh, ++ reiser4_carry_real(node), ++ ZNODE_WRITE_LOCK, flags); ++ if (result == 0) { ++ /* ok, node found and locked. */ ++ right = reiser4_add_carry_skip(doing, POOLO_AFTER, node); ++ if (!IS_ERR(right)) { ++ right->node = lh.node; ++ move_lh(&right->lock_handle, &lh); ++ right->free = 1; ++ result = lock_carry_node_tail(right); ++ if (result != 0) ++ right = ERR_PTR(result); ++ } ++ } else if ((result == -E_NO_NEIGHBOR) || (result == -ENOENT)) { ++ /* node is rightmost node in a tree, or neighbor wasn't in ++ cache, or there is an extent on the right. */ ++ right = NULL; ++ } else ++ right = ERR_PTR(result); ++ done_lh(&lh); ++ return right; ++} ++ ++/* how much free space in a @node is needed for @op ++ ++ How much space in @node is required for completion of @op, where @op is ++ insert or paste operation. ++*/ ++static unsigned int space_needed_for_op(znode * node /* znode data are ++ * inserted or ++ * pasted in */ , ++ carry_op * op /* carry ++ operation */ ) ++{ ++ assert("nikita-919", op != NULL); ++ ++ switch (op->op) { ++ default: ++ impossible("nikita-1701", "Wrong opcode"); ++ case COP_INSERT: ++ return space_needed(node, NULL, op->u.insert.d->data, 1); ++ case COP_PASTE: ++ return space_needed(node, op->u.insert.d->coord, ++ op->u.insert.d->data, 0); ++ } ++} ++ ++/* how much space in @node is required to insert or paste @data at ++ @coord. */ ++unsigned int space_needed(const znode * node /* node data are inserted or ++ * pasted in */ , ++ const coord_t * coord /* coord where data are ++ * inserted or pasted ++ * at */ , ++ const reiser4_item_data * data /* data to insert or ++ * paste */ , ++ int insertion /* non-0 is inserting, 0---paste */ ) ++{ ++ int result; ++ item_plugin *iplug; ++ ++ assert("nikita-917", node != NULL); ++ assert("nikita-918", node_plugin_by_node(node) != NULL); ++ assert("vs-230", !insertion || (coord == NULL)); ++ ++ result = 0; ++ iplug = data->iplug; ++ if (iplug->b.estimate != NULL) { ++ /* ask item plugin how much space is needed to insert this ++ item */ ++ result += iplug->b.estimate(insertion ? NULL : coord, data); ++ } else { ++ /* reasonable default */ ++ result += data->length; ++ } ++ if (insertion) { ++ node_plugin *nplug; ++ ++ nplug = node->nplug; ++ /* and add node overhead */ ++ if (nplug->item_overhead != NULL) { ++ result += nplug->item_overhead(node, NULL); ++ } ++ } ++ return result; ++} ++ ++/* find &coord in parent where pointer to new child is to be stored. */ ++static int find_new_child_coord(carry_op * op /* COP_INSERT carry operation to ++ * insert pointer to new ++ * child */ ) ++{ ++ int result; ++ znode *node; ++ znode *child; ++ ++ assert("nikita-941", op != NULL); ++ assert("nikita-942", op->op == COP_INSERT); ++ ++ node = reiser4_carry_real(op->node); ++ assert("nikita-943", node != NULL); ++ assert("nikita-944", node_plugin_by_node(node) != NULL); ++ ++ child = reiser4_carry_real(op->u.insert.child); ++ result = ++ find_new_child_ptr(node, child, op->u.insert.brother, ++ op->u.insert.d->coord); ++ ++ build_child_ptr_data(child, op->u.insert.d->data); ++ return result; ++} ++ ++/* additional amount of free space in @node required to complete @op */ ++static int free_space_shortage(znode * node /* node to check */ , ++ carry_op * op /* operation being performed */ ) ++{ ++ assert("nikita-1061", node != NULL); ++ assert("nikita-1062", op != NULL); ++ ++ switch (op->op) { ++ default: ++ impossible("nikita-1702", "Wrong opcode"); ++ case COP_INSERT: ++ case COP_PASTE: ++ return space_needed_for_op(node, op) - znode_free_space(node); ++ case COP_EXTENT: ++ /* when inserting extent shift data around until insertion ++ point is utmost in the node. */ ++ if (coord_wrt(op->u.insert.d->coord) == COORD_INSIDE) ++ return +1; ++ else ++ return -1; ++ } ++} ++ ++/* helper function: update node pointer in operation after insertion ++ point was probably shifted into @target. */ ++static znode *sync_op(carry_op * op, carry_node * target) ++{ ++ znode *insertion_node; ++ ++ /* reget node from coord: shift might move insertion coord to ++ the neighbor */ ++ insertion_node = op->u.insert.d->coord->node; ++ /* if insertion point was actually moved into new node, ++ update carry node pointer in operation. */ ++ if (insertion_node != reiser4_carry_real(op->node)) { ++ op->node = target; ++ assert("nikita-2540", ++ reiser4_carry_real(target) == insertion_node); ++ } ++ assert("nikita-2541", ++ reiser4_carry_real(op->node) == op->u.insert.d->coord->node); ++ return insertion_node; ++} ++ ++/* ++ * complete make_space() call: update tracked lock handle if necessary. See ++ * comments for fs/reiser4/carry.h:carry_track_type ++ */ ++static int ++make_space_tail(carry_op * op, carry_level * doing, znode * orig_node) ++{ ++ int result; ++ carry_track_type tracking; ++ znode *node; ++ ++ tracking = doing->track_type; ++ node = op->u.insert.d->coord->node; ++ ++ if (tracking == CARRY_TRACK_NODE || ++ (tracking == CARRY_TRACK_CHANGE && node != orig_node)) { ++ /* inserting or pasting into node different from ++ original. Update lock handle supplied by caller. */ ++ assert("nikita-1417", doing->tracked != NULL); ++ done_lh(doing->tracked); ++ init_lh(doing->tracked); ++ result = longterm_lock_znode(doing->tracked, node, ++ ZNODE_WRITE_LOCK, ++ ZNODE_LOCK_HIPRI); ++ } else ++ result = 0; ++ return result; ++} ++ ++/* This is insertion policy function. It shifts data to the left and right ++ neighbors of insertion coord and allocates new nodes until there is enough ++ free space to complete @op. ++ ++ See comments in the body. ++ ++ Assumes that the node format favors insertions at the right end of the node ++ as node40 does. ++ ++ See carry_flow() on detail about flow insertion ++*/ ++static int make_space(carry_op * op /* carry operation, insert or paste */ , ++ carry_level * doing /* current carry queue */ , ++ carry_level * todo /* carry queue on the parent level */ ) ++{ ++ znode *node; ++ int result; ++ int not_enough_space; ++ int blk_alloc; ++ znode *orig_node; ++ __u32 flags; ++ ++ coord_t *coord; ++ ++ assert("nikita-890", op != NULL); ++ assert("nikita-891", todo != NULL); ++ assert("nikita-892", ++ op->op == COP_INSERT || ++ op->op == COP_PASTE || op->op == COP_EXTENT); ++ assert("nikita-1607", ++ reiser4_carry_real(op->node) == op->u.insert.d->coord->node); ++ ++ flags = op->u.insert.flags; ++ ++ /* NOTE check that new node can only be allocated after checking left ++ * and right neighbors. This is necessary for proper work of ++ * find_{left,right}_neighbor(). */ ++ assert("nikita-3410", ergo(flags & COPI_DONT_ALLOCATE, ++ flags & COPI_DONT_SHIFT_LEFT)); ++ assert("nikita-3411", ergo(flags & COPI_DONT_ALLOCATE, ++ flags & COPI_DONT_SHIFT_RIGHT)); ++ ++ coord = op->u.insert.d->coord; ++ orig_node = node = coord->node; ++ ++ assert("nikita-908", node != NULL); ++ assert("nikita-909", node_plugin_by_node(node) != NULL); ++ ++ result = 0; ++ /* If there is not enough space in a node, try to shift something to ++ the left neighbor. This is a bit tricky, as locking to the left is ++ low priority. This is handled by restart logic in carry(). ++ */ ++ not_enough_space = free_space_shortage(node, op); ++ if (not_enough_space <= 0) ++ /* it is possible that carry was called when there actually ++ was enough space in the node. For example, when inserting ++ leftmost item so that delimiting keys have to be updated. ++ */ ++ return make_space_tail(op, doing, orig_node); ++ if (!(flags & COPI_DONT_SHIFT_LEFT)) { ++ carry_node *left; ++ /* make note in statistics of an attempt to move ++ something into the left neighbor */ ++ left = find_left_neighbor(op, doing); ++ if (unlikely(IS_ERR(left))) { ++ if (PTR_ERR(left) == -E_REPEAT) ++ return -E_REPEAT; ++ else { ++ /* some error other than restart request ++ occurred. This shouldn't happen. Issue a ++ warning and continue as if left neighbor ++ weren't existing. ++ */ ++ warning("nikita-924", ++ "Error accessing left neighbor: %li", ++ PTR_ERR(left)); ++ } ++ } else if (left != NULL) { ++ ++ /* shift everything possible on the left of and ++ including insertion coord into the left neighbor */ ++ result = carry_shift_data(LEFT_SIDE, coord, ++ reiser4_carry_real(left), ++ doing, todo, ++ flags & COPI_GO_LEFT); ++ ++ /* reget node from coord: shift_left() might move ++ insertion coord to the left neighbor */ ++ node = sync_op(op, left); ++ ++ not_enough_space = free_space_shortage(node, op); ++ /* There is not enough free space in @node, but ++ may be, there is enough free space in ++ @left. Various balancing decisions are valid here. ++ The same for the shifiting to the right. ++ */ ++ } ++ } ++ /* If there still is not enough space, shift to the right */ ++ if (not_enough_space > 0 && !(flags & COPI_DONT_SHIFT_RIGHT)) { ++ carry_node *right; ++ ++ right = find_right_neighbor(op, doing); ++ if (IS_ERR(right)) { ++ warning("nikita-1065", ++ "Error accessing right neighbor: %li", ++ PTR_ERR(right)); ++ } else if (right != NULL) { ++ /* node containing insertion point, and its right ++ neighbor node are write locked by now. ++ ++ shift everything possible on the right of but ++ excluding insertion coord into the right neighbor ++ */ ++ result = carry_shift_data(RIGHT_SIDE, coord, ++ reiser4_carry_real(right), ++ doing, todo, ++ flags & COPI_GO_RIGHT); ++ /* reget node from coord: shift_right() might move ++ insertion coord to the right neighbor */ ++ node = sync_op(op, right); ++ not_enough_space = free_space_shortage(node, op); ++ } ++ } ++ /* If there is still not enough space, allocate new node(s). ++ ++ We try to allocate new blocks if COPI_DONT_ALLOCATE is not set in ++ the carry operation flags (currently this is needed during flush ++ only). ++ */ ++ for (blk_alloc = 0; ++ not_enough_space > 0 && result == 0 && blk_alloc < 2 && ++ !(flags & COPI_DONT_ALLOCATE); ++blk_alloc) { ++ carry_node *fresh; /* new node we are allocating */ ++ coord_t coord_shadow; /* remembered insertion point before ++ * shifting data into new node */ ++ carry_node *node_shadow; /* remembered insertion node before ++ * shifting */ ++ unsigned int gointo; /* whether insertion point should move ++ * into newly allocated node */ ++ ++ /* allocate new node on the right of @node. Znode and disk ++ fake block number for new node are allocated. ++ ++ add_new_znode() posts carry operation COP_INSERT with ++ COPT_CHILD option to the parent level to add ++ pointer to newly created node to its parent. ++ ++ Subtle point: if several new nodes are required to complete ++ insertion operation at this level, they will be inserted ++ into their parents in the order of creation, which means ++ that @node will be valid "cookie" at the time of insertion. ++ ++ */ ++ fresh = add_new_znode(node, op->node, doing, todo); ++ if (IS_ERR(fresh)) ++ return PTR_ERR(fresh); ++ ++ /* Try to shift into new node. */ ++ result = lock_carry_node(doing, fresh); ++ zput(reiser4_carry_real(fresh)); ++ if (result != 0) { ++ warning("nikita-947", ++ "Cannot lock new node: %i", result); ++ return result; ++ } ++ ++ /* both nodes are write locked by now. ++ ++ shift everything possible on the right of and ++ including insertion coord into the right neighbor. ++ */ ++ coord_dup(&coord_shadow, op->u.insert.d->coord); ++ node_shadow = op->node; ++ /* move insertion point into newly created node if: ++ ++ . insertion point is rightmost in the source node, or ++ . this is not the first node we are allocating in a row. ++ */ ++ gointo = ++ (blk_alloc > 0) || ++ coord_is_after_rightmost(op->u.insert.d->coord); ++ ++ if (gointo && ++ op->op == COP_PASTE && ++ coord_is_existing_item(op->u.insert.d->coord) && ++ is_solid_item((item_plugin_by_coord(op->u.insert.d->coord)))) { ++ /* paste into solid (atomic) item, which can contain ++ only one unit, so we need to shift it right, where ++ insertion point supposed to be */ ++ ++ assert("edward-1444", op->u.insert.d->data->iplug == ++ item_plugin_by_id(STATIC_STAT_DATA_ID)); ++ assert("edward-1445", ++ op->u.insert.d->data->length > ++ node_plugin_by_node(coord->node)->free_space ++ (coord->node)); ++ ++ op->u.insert.d->coord->between = BEFORE_UNIT; ++ } ++ ++ result = carry_shift_data(RIGHT_SIDE, coord, ++ reiser4_carry_real(fresh), ++ doing, todo, gointo); ++ /* if insertion point was actually moved into new node, ++ update carry node pointer in operation. */ ++ node = sync_op(op, fresh); ++ not_enough_space = free_space_shortage(node, op); ++ if ((not_enough_space > 0) && (node != coord_shadow.node)) { ++ /* there is not enough free in new node. Shift ++ insertion point back to the @shadow_node so that ++ next new node would be inserted between ++ @shadow_node and @fresh. ++ */ ++ coord_normalize(&coord_shadow); ++ coord_dup(coord, &coord_shadow); ++ node = coord->node; ++ op->node = node_shadow; ++ if (1 || (flags & COPI_STEP_BACK)) { ++ /* still not enough space?! Maybe there is ++ enough space in the source node (i.e., node ++ data are moved from) now. ++ */ ++ not_enough_space = ++ free_space_shortage(node, op); ++ } ++ } ++ } ++ if (not_enough_space > 0) { ++ if (!(flags & COPI_DONT_ALLOCATE)) ++ warning("nikita-948", "Cannot insert new item"); ++ result = -E_NODE_FULL; ++ } ++ assert("nikita-1622", ergo(result == 0, ++ reiser4_carry_real(op->node) == coord->node)); ++ assert("nikita-2616", coord == op->u.insert.d->coord); ++ if (result == 0) ++ result = make_space_tail(op, doing, orig_node); ++ return result; ++} ++ ++/* insert_paste_common() - common part of insert and paste operations ++ ++ This function performs common part of COP_INSERT and COP_PASTE. ++ ++ There are two ways in which insertion/paste can be requested: ++ ++ . by directly supplying reiser4_item_data. In this case, op -> ++ u.insert.type is set to COPT_ITEM_DATA. ++ ++ . by supplying child pointer to which is to inserted into parent. In this ++ case op -> u.insert.type == COPT_CHILD. ++ ++ . by supplying key of new item/unit. This is currently only used during ++ extent insertion ++ ++ This is required, because when new node is allocated we don't know at what ++ position pointer to it is to be stored in the parent. Actually, we don't ++ even know what its parent will be, because parent can be re-balanced ++ concurrently and new node re-parented, and because parent can be full and ++ pointer to the new node will go into some other node. ++ ++ insert_paste_common() resolves pointer to child node into position in the ++ parent by calling find_new_child_coord(), that fills ++ reiser4_item_data. After this, insertion/paste proceeds uniformly. ++ ++ Another complication is with finding free space during pasting. It may ++ happen that while shifting items to the neighbors and newly allocated ++ nodes, insertion coord can no longer be in the item we wanted to paste ++ into. At this point, paste becomes (morphs) into insert. Moreover free ++ space analysis has to be repeated, because amount of space required for ++ insertion is different from that of paste (item header overhead, etc). ++ ++ This function "unifies" different insertion modes (by resolving child ++ pointer or key into insertion coord), and then calls make_space() to free ++ enough space in the node by shifting data to the left and right and by ++ allocating new nodes if necessary. Carry operation knows amount of space ++ required for its completion. After enough free space is obtained, caller of ++ this function (carry_{insert,paste,etc.}) performs actual insertion/paste ++ by calling item plugin method. ++ ++*/ ++static int insert_paste_common(carry_op * op /* carry operation being ++ * performed */ , ++ carry_level * doing /* current carry level */ , ++ carry_level * todo /* next carry level */ , ++ carry_insert_data * cdata /* pointer to ++ * cdata */ , ++ coord_t * coord /* insertion/paste coord */ , ++ reiser4_item_data * data /* data to be ++ * inserted/pasted */ ) ++{ ++ assert("nikita-981", op != NULL); ++ assert("nikita-980", todo != NULL); ++ assert("nikita-979", (op->op == COP_INSERT) || (op->op == COP_PASTE) ++ || (op->op == COP_EXTENT)); ++ ++ if (op->u.insert.type == COPT_PASTE_RESTARTED) { ++ /* nothing to do. Fall through to make_space(). */ ++ ; ++ } else if (op->u.insert.type == COPT_KEY) { ++ node_search_result intra_node; ++ znode *node; ++ /* Problem with doing batching at the lowest level, is that ++ operations here are given by coords where modification is ++ to be performed, and one modification can invalidate coords ++ of all following operations. ++ ++ So, we are implementing yet another type for operation that ++ will use (the only) "locator" stable across shifting of ++ data between nodes, etc.: key (COPT_KEY). ++ ++ This clause resolves key to the coord in the node. ++ ++ But node can change also. Probably some pieces have to be ++ added to the lock_carry_node(), to lock node by its key. ++ ++ */ ++ /* NOTE-NIKITA Lookup bias is fixed to FIND_EXACT. Complain ++ if you need something else. */ ++ op->u.insert.d->coord = coord; ++ node = reiser4_carry_real(op->node); ++ intra_node = node_plugin_by_node(node)->lookup ++ (node, op->u.insert.d->key, FIND_EXACT, ++ op->u.insert.d->coord); ++ if ((intra_node != NS_FOUND) && (intra_node != NS_NOT_FOUND)) { ++ warning("nikita-1715", "Intra node lookup failure: %i", ++ intra_node); ++ return intra_node; ++ } ++ } else if (op->u.insert.type == COPT_CHILD) { ++ /* if we are asked to insert pointer to the child into ++ internal node, first convert pointer to the child into ++ coord within parent node. ++ */ ++ znode *child; ++ int result; ++ ++ op->u.insert.d = cdata; ++ op->u.insert.d->coord = coord; ++ op->u.insert.d->data = data; ++ op->u.insert.d->coord->node = reiser4_carry_real(op->node); ++ result = find_new_child_coord(op); ++ child = reiser4_carry_real(op->u.insert.child); ++ if (result != NS_NOT_FOUND) { ++ warning("nikita-993", ++ "Cannot find a place for child pointer: %i", ++ result); ++ return result; ++ } ++ /* This only happens when we did multiple insertions at ++ the previous level, trying to insert single item and ++ it so happened, that insertion of pointers to all new ++ nodes before this one already caused parent node to ++ split (may be several times). ++ ++ I am going to come up with better solution. ++ ++ You are not expected to understand this. ++ -- v6root/usr/sys/ken/slp.c ++ ++ Basically, what happens here is the following: carry came ++ to the parent level and is about to insert internal item ++ pointing to the child node that it just inserted in the ++ level below. Position where internal item is to be inserted ++ was found by find_new_child_coord() above, but node of the ++ current carry operation (that is, parent node of child ++ inserted on the previous level), was determined earlier in ++ the lock_carry_level/lock_carry_node. It could so happen ++ that other carry operations already performed on the parent ++ level already split parent node, so that insertion point ++ moved into another node. Handle this by creating new carry ++ node for insertion point if necessary. ++ */ ++ if (reiser4_carry_real(op->node) != ++ op->u.insert.d->coord->node) { ++ pool_ordering direction; ++ znode *z1; ++ znode *z2; ++ reiser4_key k1; ++ reiser4_key k2; ++ ++ /* ++ * determine in what direction insertion point ++ * moved. Do this by comparing delimiting keys. ++ */ ++ z1 = op->u.insert.d->coord->node; ++ z2 = reiser4_carry_real(op->node); ++ if (keyle(leftmost_key_in_node(z1, &k1), ++ leftmost_key_in_node(z2, &k2))) ++ /* insertion point moved to the left */ ++ direction = POOLO_BEFORE; ++ else ++ /* insertion point moved to the right */ ++ direction = POOLO_AFTER; ++ ++ op->node = reiser4_add_carry_skip(doing, ++ direction, op->node); ++ if (IS_ERR(op->node)) ++ return PTR_ERR(op->node); ++ op->node->node = op->u.insert.d->coord->node; ++ op->node->free = 1; ++ result = lock_carry_node(doing, op->node); ++ if (result != 0) ++ return result; ++ } ++ ++ /* ++ * set up key of an item being inserted: we are inserting ++ * internal item and its key is (by the very definition of ++ * search tree) is leftmost key in the child node. ++ */ ++ write_lock_dk(znode_get_tree(child)); ++ op->u.insert.d->key = leftmost_key_in_node(child, ++ znode_get_ld_key(child)); ++ write_unlock_dk(znode_get_tree(child)); ++ op->u.insert.d->data->arg = op->u.insert.brother; ++ } else { ++ assert("vs-243", op->u.insert.d->coord != NULL); ++ op->u.insert.d->coord->node = reiser4_carry_real(op->node); ++ } ++ ++ /* find free space. */ ++ return make_space(op, doing, todo); ++} ++ ++/* handle carry COP_INSERT operation. ++ ++ Insert new item into node. New item can be given in one of two ways: ++ ++ - by passing &tree_coord and &reiser4_item_data as part of @op. This is ++ only applicable at the leaf/twig level. ++ ++ - by passing a child node pointer to which is to be inserted by this ++ operation. ++ ++*/ ++static int carry_insert(carry_op * op /* operation to perform */ , ++ carry_level * doing /* queue of operations @op ++ * is part of */ , ++ carry_level * todo /* queue where new operations ++ * are accumulated */ ) ++{ ++ znode *node; ++ carry_insert_data cdata; ++ coord_t coord; ++ reiser4_item_data data; ++ carry_plugin_info info; ++ int result; ++ ++ assert("nikita-1036", op != NULL); ++ assert("nikita-1037", todo != NULL); ++ assert("nikita-1038", op->op == COP_INSERT); ++ ++ coord_init_zero(&coord); ++ ++ /* perform common functionality of insert and paste. */ ++ result = insert_paste_common(op, doing, todo, &cdata, &coord, &data); ++ if (result != 0) ++ return result; ++ ++ node = op->u.insert.d->coord->node; ++ assert("nikita-1039", node != NULL); ++ assert("nikita-1040", node_plugin_by_node(node) != NULL); ++ ++ assert("nikita-949", ++ space_needed_for_op(node, op) <= znode_free_space(node)); ++ ++ /* ask node layout to create new item. */ ++ info.doing = doing; ++ info.todo = todo; ++ result = node_plugin_by_node(node)->create_item ++ (op->u.insert.d->coord, op->u.insert.d->key, op->u.insert.d->data, ++ &info); ++ doing->restartable = 0; ++ znode_make_dirty(node); ++ ++ return result; ++} ++ ++/* ++ * Flow insertion code. COP_INSERT_FLOW is special tree operation that is ++ * supplied with a "flow" (that is, a stream of data) and inserts it into tree ++ * by slicing into multiple items. ++ */ ++ ++#define flow_insert_point(op) ( ( op ) -> u.insert_flow.insert_point ) ++#define flow_insert_flow(op) ( ( op ) -> u.insert_flow.flow ) ++#define flow_insert_data(op) ( ( op ) -> u.insert_flow.data ) ++ ++static size_t item_data_overhead(carry_op * op) ++{ ++ if (flow_insert_data(op)->iplug->b.estimate == NULL) ++ return 0; ++ return (flow_insert_data(op)->iplug->b. ++ estimate(NULL /* estimate insertion */ , flow_insert_data(op)) - ++ flow_insert_data(op)->length); ++} ++ ++/* FIXME-VS: this is called several times during one make_flow_for_insertion ++ and it will always return the same result. Some optimization could be made ++ by calculating this value once at the beginning and passing it around. That ++ would reduce some flexibility in future changes ++*/ ++static int can_paste(coord_t *, const reiser4_key *, const reiser4_item_data *); ++static size_t flow_insertion_overhead(carry_op * op) ++{ ++ znode *node; ++ size_t insertion_overhead; ++ ++ node = flow_insert_point(op)->node; ++ insertion_overhead = 0; ++ if (node->nplug->item_overhead && ++ !can_paste(flow_insert_point(op), &flow_insert_flow(op)->key, ++ flow_insert_data(op))) ++ insertion_overhead = ++ node->nplug->item_overhead(node, NULL) + ++ item_data_overhead(op); ++ return insertion_overhead; ++} ++ ++/* how many bytes of flow does fit to the node */ ++static int what_can_fit_into_node(carry_op * op) ++{ ++ size_t free, overhead; ++ ++ overhead = flow_insertion_overhead(op); ++ free = znode_free_space(flow_insert_point(op)->node); ++ if (free <= overhead) ++ return 0; ++ free -= overhead; ++ /* FIXME: flow->length is loff_t only to not get overflowed in case of expandign truncate */ ++ if (free < op->u.insert_flow.flow->length) ++ return free; ++ return (int)op->u.insert_flow.flow->length; ++} ++ ++/* in make_space_for_flow_insertion we need to check either whether whole flow ++ fits into a node or whether minimal fraction of flow fits into a node */ ++static int enough_space_for_whole_flow(carry_op * op) ++{ ++ return (unsigned)what_can_fit_into_node(op) == ++ op->u.insert_flow.flow->length; ++} ++ ++#define MIN_FLOW_FRACTION 1 ++static int enough_space_for_min_flow_fraction(carry_op * op) ++{ ++ assert("vs-902", coord_is_after_rightmost(flow_insert_point(op))); ++ ++ return what_can_fit_into_node(op) >= MIN_FLOW_FRACTION; ++} ++ ++/* this returns 0 if left neighbor was obtained successfully and everything ++ upto insertion point including it were shifted and left neighbor still has ++ some free space to put minimal fraction of flow into it */ ++static int ++make_space_by_shift_left(carry_op * op, carry_level * doing, carry_level * todo) ++{ ++ carry_node *left; ++ znode *orig; ++ ++ left = find_left_neighbor(op, doing); ++ if (unlikely(IS_ERR(left))) { ++ warning("vs-899", ++ "make_space_by_shift_left: " ++ "error accessing left neighbor: %li", PTR_ERR(left)); ++ return 1; ++ } ++ if (left == NULL) ++ /* left neighbor either does not exist or is unformatted ++ node */ ++ return 1; ++ ++ orig = flow_insert_point(op)->node; ++ /* try to shift content of node @orig from its head upto insert point ++ including insertion point into the left neighbor */ ++ carry_shift_data(LEFT_SIDE, flow_insert_point(op), ++ reiser4_carry_real(left), doing, todo, ++ 1 /* including insert point */); ++ if (reiser4_carry_real(left) != flow_insert_point(op)->node) { ++ /* insertion point did not move */ ++ return 1; ++ } ++ ++ /* insertion point is set after last item in the node */ ++ assert("vs-900", coord_is_after_rightmost(flow_insert_point(op))); ++ ++ if (!enough_space_for_min_flow_fraction(op)) { ++ /* insertion point node does not have enough free space to put ++ even minimal portion of flow into it, therefore, move ++ insertion point back to orig node (before first item) */ ++ coord_init_before_first_item(flow_insert_point(op), orig); ++ return 1; ++ } ++ ++ /* part of flow is to be written to the end of node */ ++ op->node = left; ++ return 0; ++} ++ ++/* this returns 0 if right neighbor was obtained successfully and everything to ++ the right of insertion point was shifted to it and node got enough free ++ space to put minimal fraction of flow into it */ ++static int ++make_space_by_shift_right(carry_op * op, carry_level * doing, ++ carry_level * todo) ++{ ++ carry_node *right; ++ ++ right = find_right_neighbor(op, doing); ++ if (unlikely(IS_ERR(right))) { ++ warning("nikita-1065", "shift_right_excluding_insert_point: " ++ "error accessing right neighbor: %li", PTR_ERR(right)); ++ return 1; ++ } ++ if (right) { ++ /* shift everything possible on the right of but excluding ++ insertion coord into the right neighbor */ ++ carry_shift_data(RIGHT_SIDE, flow_insert_point(op), ++ reiser4_carry_real(right), doing, todo, ++ 0 /* not including insert point */); ++ } else { ++ /* right neighbor either does not exist or is unformatted ++ node */ ++ ; ++ } ++ if (coord_is_after_rightmost(flow_insert_point(op))) { ++ if (enough_space_for_min_flow_fraction(op)) { ++ /* part of flow is to be written to the end of node */ ++ return 0; ++ } ++ } ++ ++ /* new node is to be added if insert point node did not get enough ++ space for whole flow */ ++ return 1; ++} ++ ++/* this returns 0 when insert coord is set at the node end and fraction of flow ++ fits into that node */ ++static int ++make_space_by_new_nodes(carry_op * op, carry_level * doing, carry_level * todo) ++{ ++ int result; ++ znode *node; ++ carry_node *new; ++ ++ node = flow_insert_point(op)->node; ++ ++ if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT) ++ return RETERR(-E_NODE_FULL); ++ /* add new node after insert point node */ ++ new = add_new_znode(node, op->node, doing, todo); ++ if (unlikely(IS_ERR(new))) { ++ return PTR_ERR(new); ++ } ++ result = lock_carry_node(doing, new); ++ zput(reiser4_carry_real(new)); ++ if (unlikely(result)) { ++ return result; ++ } ++ op->u.insert_flow.new_nodes++; ++ if (!coord_is_after_rightmost(flow_insert_point(op))) { ++ carry_shift_data(RIGHT_SIDE, flow_insert_point(op), ++ reiser4_carry_real(new), doing, todo, ++ 0 /* not including insert point */); ++ assert("vs-901", ++ coord_is_after_rightmost(flow_insert_point(op))); ++ ++ if (enough_space_for_min_flow_fraction(op)) { ++ return 0; ++ } ++ if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT) ++ return RETERR(-E_NODE_FULL); ++ ++ /* add one more new node */ ++ new = add_new_znode(node, op->node, doing, todo); ++ if (unlikely(IS_ERR(new))) { ++ return PTR_ERR(new); ++ } ++ result = lock_carry_node(doing, new); ++ zput(reiser4_carry_real(new)); ++ if (unlikely(result)) { ++ return result; ++ } ++ op->u.insert_flow.new_nodes++; ++ } ++ ++ /* move insertion point to new node */ ++ coord_init_before_first_item(flow_insert_point(op), ++ reiser4_carry_real(new)); ++ op->node = new; ++ return 0; ++} ++ ++static int ++make_space_for_flow_insertion(carry_op * op, carry_level * doing, ++ carry_level * todo) ++{ ++ __u32 flags = op->u.insert_flow.flags; ++ ++ if (enough_space_for_whole_flow(op)) { ++ /* whole flow fits into insert point node */ ++ return 0; ++ } ++ ++ if (!(flags & COPI_DONT_SHIFT_LEFT) ++ && (make_space_by_shift_left(op, doing, todo) == 0)) { ++ /* insert point is shifted to left neighbor of original insert ++ point node and is set after last unit in that node. It has ++ enough space to fit at least minimal fraction of flow. */ ++ return 0; ++ } ++ ++ if (enough_space_for_whole_flow(op)) { ++ /* whole flow fits into insert point node */ ++ return 0; ++ } ++ ++ if (!(flags & COPI_DONT_SHIFT_RIGHT) ++ && (make_space_by_shift_right(op, doing, todo) == 0)) { ++ /* insert point is still set to the same node, but there is ++ nothing to the right of insert point. */ ++ return 0; ++ } ++ ++ if (enough_space_for_whole_flow(op)) { ++ /* whole flow fits into insert point node */ ++ return 0; ++ } ++ ++ return make_space_by_new_nodes(op, doing, todo); ++} ++ ++/* implements COP_INSERT_FLOW operation */ ++static int ++carry_insert_flow(carry_op * op, carry_level * doing, carry_level * todo) ++{ ++ int result; ++ flow_t *f; ++ coord_t *insert_point; ++ node_plugin *nplug; ++ carry_plugin_info info; ++ znode *orig_node; ++ lock_handle *orig_lh; ++ ++ f = op->u.insert_flow.flow; ++ result = 0; ++ ++ /* carry system needs this to work */ ++ info.doing = doing; ++ info.todo = todo; ++ ++ orig_node = flow_insert_point(op)->node; ++ orig_lh = doing->tracked; ++ ++ while (f->length) { ++ result = make_space_for_flow_insertion(op, doing, todo); ++ if (result) ++ break; ++ ++ insert_point = flow_insert_point(op); ++ nplug = node_plugin_by_node(insert_point->node); ++ ++ /* compose item data for insertion/pasting */ ++ flow_insert_data(op)->data = f->data; ++ flow_insert_data(op)->length = what_can_fit_into_node(op); ++ ++ if (can_paste(insert_point, &f->key, flow_insert_data(op))) { ++ /* insert point is set to item of file we are writing to and we have to append to it */ ++ assert("vs-903", insert_point->between == AFTER_UNIT); ++ nplug->change_item_size(insert_point, ++ flow_insert_data(op)->length); ++ flow_insert_data(op)->iplug->b.paste(insert_point, ++ flow_insert_data ++ (op), &info); ++ } else { ++ /* new item must be inserted */ ++ pos_in_node_t new_pos; ++ flow_insert_data(op)->length += item_data_overhead(op); ++ ++ /* FIXME-VS: this is because node40_create_item changes ++ insert_point for obscure reasons */ ++ switch (insert_point->between) { ++ case AFTER_ITEM: ++ new_pos = insert_point->item_pos + 1; ++ break; ++ case EMPTY_NODE: ++ new_pos = 0; ++ break; ++ case BEFORE_ITEM: ++ assert("vs-905", insert_point->item_pos == 0); ++ new_pos = 0; ++ break; ++ default: ++ impossible("vs-906", ++ "carry_insert_flow: invalid coord"); ++ new_pos = 0; ++ break; ++ } ++ ++ nplug->create_item(insert_point, &f->key, ++ flow_insert_data(op), &info); ++ coord_set_item_pos(insert_point, new_pos); ++ } ++ coord_init_after_item_end(insert_point); ++ doing->restartable = 0; ++ znode_make_dirty(insert_point->node); ++ ++ move_flow_forward(f, (unsigned)flow_insert_data(op)->length); ++ } ++ ++ if (orig_node != flow_insert_point(op)->node) { ++ /* move lock to new insert point */ ++ done_lh(orig_lh); ++ init_lh(orig_lh); ++ result = ++ longterm_lock_znode(orig_lh, flow_insert_point(op)->node, ++ ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI); ++ } ++ ++ return result; ++} ++ ++/* implements COP_DELETE operation ++ ++ Remove pointer to @op -> u.delete.child from it's parent. ++ ++ This function also handles killing of a tree root is last pointer from it ++ was removed. This is complicated by our handling of "twig" level: root on ++ twig level is never killed. ++ ++*/ ++static int carry_delete(carry_op * op /* operation to be performed */ , ++ carry_level * doing UNUSED_ARG /* current carry ++ * level */ , ++ carry_level * todo /* next carry level */ ) ++{ ++ int result; ++ coord_t coord; ++ coord_t coord2; ++ znode *parent; ++ znode *child; ++ carry_plugin_info info; ++ reiser4_tree *tree; ++ ++ /* ++ * This operation is called to delete internal item pointing to the ++ * child node that was removed by carry from the tree on the previous ++ * tree level. ++ */ ++ ++ assert("nikita-893", op != NULL); ++ assert("nikita-894", todo != NULL); ++ assert("nikita-895", op->op == COP_DELETE); ++ ++ coord_init_zero(&coord); ++ coord_init_zero(&coord2); ++ ++ parent = reiser4_carry_real(op->node); ++ child = op->u.delete.child ? ++ reiser4_carry_real(op->u.delete.child) : op->node->node; ++ tree = znode_get_tree(child); ++ read_lock_tree(tree); ++ ++ /* ++ * @parent was determined when carry entered parent level ++ * (lock_carry_level/lock_carry_node). Since then, actual parent of ++ * @child node could change due to other carry operations performed on ++ * the parent level. Check for this. ++ */ ++ ++ if (znode_parent(child) != parent) { ++ /* NOTE-NIKITA add stat counter for this. */ ++ parent = znode_parent(child); ++ assert("nikita-2581", find_carry_node(doing, parent)); ++ } ++ read_unlock_tree(tree); ++ ++ assert("nikita-1213", znode_get_level(parent) > LEAF_LEVEL); ++ ++ /* Twig level horrors: tree should be of height at least 2. So, last ++ pointer from the root at twig level is preserved even if child is ++ empty. This is ugly, but so it was architectured. ++ */ ++ ++ if (znode_is_root(parent) && ++ znode_get_level(parent) <= REISER4_MIN_TREE_HEIGHT && ++ node_num_items(parent) == 1) { ++ /* Delimiting key manipulations. */ ++ write_lock_dk(tree); ++ znode_set_ld_key(child, znode_set_ld_key(parent, reiser4_min_key())); ++ znode_set_rd_key(child, znode_set_rd_key(parent, reiser4_max_key())); ++ ZF_SET(child, JNODE_DKSET); ++ write_unlock_dk(tree); ++ ++ /* @child escaped imminent death! */ ++ ZF_CLR(child, JNODE_HEARD_BANSHEE); ++ return 0; ++ } ++ ++ /* convert child pointer to the coord_t */ ++ result = find_child_ptr(parent, child, &coord); ++ if (result != NS_FOUND) { ++ warning("nikita-994", "Cannot find child pointer: %i", result); ++ print_coord_content("coord", &coord); ++ return result; ++ } ++ ++ coord_dup(&coord2, &coord); ++ info.doing = doing; ++ info.todo = todo; ++ { ++ /* ++ * Actually kill internal item: prepare structure with ++ * arguments for ->cut_and_kill() method... ++ */ ++ ++ struct carry_kill_data kdata; ++ kdata.params.from = &coord; ++ kdata.params.to = &coord2; ++ kdata.params.from_key = NULL; ++ kdata.params.to_key = NULL; ++ kdata.params.smallest_removed = NULL; ++ kdata.params.truncate = 1; ++ kdata.flags = op->u.delete.flags; ++ kdata.inode = NULL; ++ kdata.left = NULL; ++ kdata.right = NULL; ++ kdata.buf = NULL; ++ /* ... and call it. */ ++ result = node_plugin_by_node(parent)->cut_and_kill(&kdata, ++ &info); ++ } ++ doing->restartable = 0; ++ ++ /* check whether root should be killed violently */ ++ if (znode_is_root(parent) && ++ /* don't kill roots at and lower than twig level */ ++ znode_get_level(parent) > REISER4_MIN_TREE_HEIGHT && ++ node_num_items(parent) == 1) { ++ result = reiser4_kill_tree_root(coord.node); ++ } ++ ++ return result < 0 ? : 0; ++} ++ ++/* implements COP_CUT opration ++ ++ Cuts part or whole content of node. ++ ++*/ ++static int carry_cut(carry_op * op /* operation to be performed */ , ++ carry_level * doing /* current carry level */ , ++ carry_level * todo /* next carry level */ ) ++{ ++ int result; ++ carry_plugin_info info; ++ node_plugin *nplug; ++ ++ assert("nikita-896", op != NULL); ++ assert("nikita-897", todo != NULL); ++ assert("nikita-898", op->op == COP_CUT); ++ ++ info.doing = doing; ++ info.todo = todo; ++ ++ nplug = node_plugin_by_node(reiser4_carry_real(op->node)); ++ if (op->u.cut_or_kill.is_cut) ++ result = nplug->cut(op->u.cut_or_kill.u.cut, &info); ++ else ++ result = nplug->cut_and_kill(op->u.cut_or_kill.u.kill, &info); ++ ++ doing->restartable = 0; ++ return result < 0 ? : 0; ++} ++ ++/* helper function for carry_paste(): returns true if @op can be continued as ++ paste */ ++static int ++can_paste(coord_t * icoord, const reiser4_key * key, ++ const reiser4_item_data * data) ++{ ++ coord_t circa; ++ item_plugin *new_iplug; ++ item_plugin *old_iplug; ++ int result = 0; /* to keep gcc shut */ ++ ++ assert("", icoord->between != AT_UNIT); ++ ++ /* obviously, one cannot paste when node is empty---there is nothing ++ to paste into. */ ++ if (node_is_empty(icoord->node)) ++ return 0; ++ /* if insertion point is at the middle of the item, then paste */ ++ if (!coord_is_between_items(icoord)) ++ return 1; ++ coord_dup(&circa, icoord); ++ circa.between = AT_UNIT; ++ ++ old_iplug = item_plugin_by_coord(&circa); ++ new_iplug = data->iplug; ++ ++ /* check whether we can paste to the item @icoord is "at" when we ++ ignore ->between field */ ++ if (old_iplug == new_iplug && item_can_contain_key(&circa, key, data)) { ++ result = 1; ++ } else if (icoord->between == BEFORE_UNIT ++ || icoord->between == BEFORE_ITEM) { ++ /* otherwise, try to glue to the item at the left, if any */ ++ coord_dup(&circa, icoord); ++ if (coord_set_to_left(&circa)) { ++ result = 0; ++ coord_init_before_item(icoord); ++ } else { ++ old_iplug = item_plugin_by_coord(&circa); ++ result = (old_iplug == new_iplug) ++ && item_can_contain_key(icoord, key, data); ++ if (result) { ++ coord_dup(icoord, &circa); ++ icoord->between = AFTER_UNIT; ++ } ++ } ++ } else if (icoord->between == AFTER_UNIT ++ || icoord->between == AFTER_ITEM) { ++ coord_dup(&circa, icoord); ++ /* otherwise, try to glue to the item at the right, if any */ ++ if (coord_set_to_right(&circa)) { ++ result = 0; ++ coord_init_after_item(icoord); ++ } else { ++ int (*cck) (const coord_t *, const reiser4_key *, ++ const reiser4_item_data *); ++ ++ old_iplug = item_plugin_by_coord(&circa); ++ ++ cck = old_iplug->b.can_contain_key; ++ if (cck == NULL) ++ /* item doesn't define ->can_contain_key ++ method? So it is not expandable. */ ++ result = 0; ++ else { ++ result = (old_iplug == new_iplug) ++ && cck(&circa /*icoord */ , key, data); ++ if (result) { ++ coord_dup(icoord, &circa); ++ icoord->between = BEFORE_UNIT; ++ } ++ } ++ } ++ } else ++ impossible("nikita-2513", "Nothing works"); ++ if (result) { ++ if (icoord->between == BEFORE_ITEM) { ++ assert("vs-912", icoord->unit_pos == 0); ++ icoord->between = BEFORE_UNIT; ++ } else if (icoord->between == AFTER_ITEM) { ++ coord_init_after_item_end(icoord); ++ } ++ } ++ return result; ++} ++ ++/* implements COP_PASTE operation ++ ++ Paste data into existing item. This is complicated by the fact that after ++ we shifted something to the left or right neighbors trying to free some ++ space, item we were supposed to paste into can be in different node than ++ insertion coord. If so, we are no longer doing paste, but insert. See ++ comments in insert_paste_common(). ++ ++*/ ++static int carry_paste(carry_op * op /* operation to be performed */ , ++ carry_level * doing UNUSED_ARG /* current carry ++ * level */ , ++ carry_level * todo /* next carry level */ ) ++{ ++ znode *node; ++ carry_insert_data cdata; ++ coord_t dcoord; ++ reiser4_item_data data; ++ int result; ++ int real_size; ++ item_plugin *iplug; ++ carry_plugin_info info; ++ coord_t *coord; ++ ++ assert("nikita-982", op != NULL); ++ assert("nikita-983", todo != NULL); ++ assert("nikita-984", op->op == COP_PASTE); ++ ++ coord_init_zero(&dcoord); ++ ++ result = insert_paste_common(op, doing, todo, &cdata, &dcoord, &data); ++ if (result != 0) ++ return result; ++ ++ coord = op->u.insert.d->coord; ++ ++ /* handle case when op -> u.insert.coord doesn't point to the item ++ of required type. restart as insert. */ ++ if (!can_paste(coord, op->u.insert.d->key, op->u.insert.d->data)) { ++ op->op = COP_INSERT; ++ op->u.insert.type = COPT_PASTE_RESTARTED; ++ result = op_dispatch_table[COP_INSERT].handler(op, doing, todo); ++ ++ return result; ++ } ++ ++ node = coord->node; ++ iplug = item_plugin_by_coord(coord); ++ assert("nikita-992", iplug != NULL); ++ ++ assert("nikita-985", node != NULL); ++ assert("nikita-986", node_plugin_by_node(node) != NULL); ++ ++ assert("nikita-987", ++ space_needed_for_op(node, op) <= znode_free_space(node)); ++ ++ assert("nikita-1286", coord_is_existing_item(coord)); ++ ++ /* ++ * if item is expanded as a result of this operation, we should first ++ * change item size, than call ->b.paste item method. If item is ++ * shrunk, it should be done other way around: first call ->b.paste ++ * method, then reduce item size. ++ */ ++ ++ real_size = space_needed_for_op(node, op); ++ if (real_size > 0) ++ node->nplug->change_item_size(coord, real_size); ++ ++ doing->restartable = 0; ++ info.doing = doing; ++ info.todo = todo; ++ ++ result = iplug->b.paste(coord, op->u.insert.d->data, &info); ++ ++ if (real_size < 0) ++ node->nplug->change_item_size(coord, real_size); ++ ++ /* if we pasted at the beginning of the item, update item's key. */ ++ if (coord->unit_pos == 0 && coord->between != AFTER_UNIT) ++ node->nplug->update_item_key(coord, op->u.insert.d->key, &info); ++ ++ znode_make_dirty(node); ++ return result; ++} ++ ++/* handle carry COP_EXTENT operation. */ ++static int carry_extent(carry_op * op /* operation to perform */ , ++ carry_level * doing /* queue of operations @op ++ * is part of */ , ++ carry_level * todo /* queue where new operations ++ * are accumulated */ ) ++{ ++ znode *node; ++ carry_insert_data cdata; ++ coord_t coord; ++ reiser4_item_data data; ++ carry_op *delete_dummy; ++ carry_op *insert_extent; ++ int result; ++ carry_plugin_info info; ++ ++ assert("nikita-1751", op != NULL); ++ assert("nikita-1752", todo != NULL); ++ assert("nikita-1753", op->op == COP_EXTENT); ++ ++ /* extent insertion overview: ++ ++ extents live on the TWIG LEVEL, which is level one above the leaf ++ one. This complicates extent insertion logic somewhat: it may ++ happen (and going to happen all the time) that in logical key ++ ordering extent has to be placed between items I1 and I2, located ++ at the leaf level, but I1 and I2 are in the same formatted leaf ++ node N1. To insert extent one has to ++ ++ (1) reach node N1 and shift data between N1, its neighbors and ++ possibly newly allocated nodes until I1 and I2 fall into different ++ nodes. Since I1 and I2 are still neighboring items in logical key ++ order, they will be necessary utmost items in their respective ++ nodes. ++ ++ (2) After this new extent item is inserted into node on the twig ++ level. ++ ++ Fortunately this process can reuse almost all code from standard ++ insertion procedure (viz. make_space() and insert_paste_common()), ++ due to the following observation: make_space() only shifts data up ++ to and excluding or including insertion point. It never ++ "over-moves" through insertion point. Thus, one can use ++ make_space() to perform step (1). All required for this is just to ++ instruct free_space_shortage() to keep make_space() shifting data ++ until insertion point is at the node border. ++ ++ */ ++ ++ /* perform common functionality of insert and paste. */ ++ result = insert_paste_common(op, doing, todo, &cdata, &coord, &data); ++ if (result != 0) ++ return result; ++ ++ node = op->u.extent.d->coord->node; ++ assert("nikita-1754", node != NULL); ++ assert("nikita-1755", node_plugin_by_node(node) != NULL); ++ assert("nikita-1700", coord_wrt(op->u.extent.d->coord) != COORD_INSIDE); ++ ++ /* NOTE-NIKITA add some checks here. Not assertions, -EIO. Check that ++ extent fits between items. */ ++ ++ info.doing = doing; ++ info.todo = todo; ++ ++ /* there is another complication due to placement of extents on the ++ twig level: extents are "rigid" in the sense that key-range ++ occupied by extent cannot grow indefinitely to the right as it is ++ for the formatted leaf nodes. Because of this when search finds two ++ adjacent extents on the twig level, it has to "drill" to the leaf ++ level, creating new node. Here we are removing this node. ++ */ ++ if (node_is_empty(node)) { ++ delete_dummy = node_post_carry(&info, COP_DELETE, node, 1); ++ if (IS_ERR(delete_dummy)) ++ return PTR_ERR(delete_dummy); ++ delete_dummy->u.delete.child = NULL; ++ delete_dummy->u.delete.flags = DELETE_RETAIN_EMPTY; ++ ZF_SET(node, JNODE_HEARD_BANSHEE); ++ } ++ ++ /* proceed with inserting extent item into parent. We are definitely ++ inserting rather than pasting if we get that far. */ ++ insert_extent = node_post_carry(&info, COP_INSERT, node, 1); ++ if (IS_ERR(insert_extent)) ++ /* @delete_dummy will be automatically destroyed on the level ++ exiting */ ++ return PTR_ERR(insert_extent); ++ /* NOTE-NIKITA insertion by key is simplest option here. Another ++ possibility is to insert on the left or right of already existing ++ item. ++ */ ++ insert_extent->u.insert.type = COPT_KEY; ++ insert_extent->u.insert.d = op->u.extent.d; ++ assert("nikita-1719", op->u.extent.d->key != NULL); ++ insert_extent->u.insert.d->data->arg = op->u.extent.d->coord; ++ insert_extent->u.insert.flags = ++ znode_get_tree(node)->carry.new_extent_flags; ++ ++ /* ++ * if carry was asked to track lock handle we should actually track ++ * lock handle on the twig node rather than on the leaf where ++ * operation was started from. Transfer tracked lock handle. ++ */ ++ if (doing->track_type) { ++ assert("nikita-3242", doing->tracked != NULL); ++ assert("nikita-3244", todo->tracked == NULL); ++ todo->tracked = doing->tracked; ++ todo->track_type = CARRY_TRACK_NODE; ++ doing->tracked = NULL; ++ doing->track_type = 0; ++ } ++ ++ return 0; ++} ++ ++/* update key in @parent between pointers to @left and @right. ++ ++ Find coords of @left and @right and update delimiting key between them. ++ This is helper function called by carry_update(). Finds position of ++ internal item involved. Updates item key. Updates delimiting keys of child ++ nodes involved. ++*/ ++static int update_delimiting_key(znode * parent /* node key is updated ++ * in */ , ++ znode * left /* child of @parent */ , ++ znode * right /* child of @parent */ , ++ carry_level * doing /* current carry ++ * level */ , ++ carry_level * todo /* parent carry ++ * level */ , ++ const char **error_msg /* place to ++ * store error ++ * message */ ) ++{ ++ coord_t left_pos; ++ coord_t right_pos; ++ int result; ++ reiser4_key ldkey; ++ carry_plugin_info info; ++ ++ assert("nikita-1177", right != NULL); ++ /* find position of right left child in a parent */ ++ result = find_child_ptr(parent, right, &right_pos); ++ if (result != NS_FOUND) { ++ *error_msg = "Cannot find position of right child"; ++ return result; ++ } ++ ++ if ((left != NULL) && !coord_is_leftmost_unit(&right_pos)) { ++ /* find position of the left child in a parent */ ++ result = find_child_ptr(parent, left, &left_pos); ++ if (result != NS_FOUND) { ++ *error_msg = "Cannot find position of left child"; ++ return result; ++ } ++ assert("nikita-1355", left_pos.node != NULL); ++ } else ++ left_pos.node = NULL; ++ ++ /* check that they are separated by exactly one key and are basically ++ sane */ ++ if (REISER4_DEBUG) { ++ if ((left_pos.node != NULL) ++ && !coord_is_existing_unit(&left_pos)) { ++ *error_msg = "Left child is bastard"; ++ return RETERR(-EIO); ++ } ++ if (!coord_is_existing_unit(&right_pos)) { ++ *error_msg = "Right child is bastard"; ++ return RETERR(-EIO); ++ } ++ if (left_pos.node != NULL && ++ !coord_are_neighbors(&left_pos, &right_pos)) { ++ *error_msg = "Children are not direct siblings"; ++ return RETERR(-EIO); ++ } ++ } ++ *error_msg = NULL; ++ ++ info.doing = doing; ++ info.todo = todo; ++ ++ /* ++ * If child node is not empty, new key of internal item is a key of ++ * leftmost item in the child node. If the child is empty, take its ++ * right delimiting key as a new key of the internal item. Precise key ++ * in the latter case is not important per se, because the child (and ++ * the internal item) are going to be killed shortly anyway, but we ++ * have to preserve correct order of keys in the parent node. ++ */ ++ ++ if (!ZF_ISSET(right, JNODE_HEARD_BANSHEE)) ++ leftmost_key_in_node(right, &ldkey); ++ else { ++ read_lock_dk(znode_get_tree(parent)); ++ ldkey = *znode_get_rd_key(right); ++ read_unlock_dk(znode_get_tree(parent)); ++ } ++ node_plugin_by_node(parent)->update_item_key(&right_pos, &ldkey, &info); ++ doing->restartable = 0; ++ znode_make_dirty(parent); ++ return 0; ++} ++ ++/* implements COP_UPDATE opration ++ ++ Update delimiting keys. ++ ++*/ ++static int carry_update(carry_op * op /* operation to be performed */ , ++ carry_level * doing /* current carry level */ , ++ carry_level * todo /* next carry level */ ) ++{ ++ int result; ++ carry_node *missing UNUSED_ARG; ++ znode *left; ++ znode *right; ++ carry_node *lchild; ++ carry_node *rchild; ++ const char *error_msg; ++ reiser4_tree *tree; ++ ++ /* ++ * This operation is called to update key of internal item. This is ++ * necessary when carry shifted of cut data on the child ++ * level. Arguments of this operation are: ++ * ++ * @right --- child node. Operation should update key of internal ++ * item pointing to @right. ++ * ++ * @left --- left neighbor of @right. This parameter is optional. ++ */ ++ ++ assert("nikita-902", op != NULL); ++ assert("nikita-903", todo != NULL); ++ assert("nikita-904", op->op == COP_UPDATE); ++ ++ lchild = op->u.update.left; ++ rchild = op->node; ++ ++ if (lchild != NULL) { ++ assert("nikita-1001", lchild->parent); ++ assert("nikita-1003", !lchild->left); ++ left = reiser4_carry_real(lchild); ++ } else ++ left = NULL; ++ ++ tree = znode_get_tree(rchild->node); ++ read_lock_tree(tree); ++ right = znode_parent(rchild->node); ++ read_unlock_tree(tree); ++ ++ if (right != NULL) { ++ result = update_delimiting_key(right, ++ lchild ? lchild->node : NULL, ++ rchild->node, ++ doing, todo, &error_msg); ++ } else { ++ error_msg = "Cannot find node to update key in"; ++ result = RETERR(-EIO); ++ } ++ /* operation will be reposted to the next level by the ++ ->update_item_key() method of node plugin, if necessary. */ ++ ++ if (result != 0) { ++ warning("nikita-999", "Error updating delimiting key: %s (%i)", ++ error_msg ? : "", result); ++ } ++ return result; ++} ++ ++/* move items from @node during carry */ ++static int carry_shift_data(sideof side /* in what direction to move data */ , ++ coord_t * insert_coord /* coord where new item ++ * is to be inserted */ , ++ znode * node /* node which data are moved from */ , ++ carry_level * doing /* active carry queue */ , ++ carry_level * todo /* carry queue where new ++ * operations are to be put ++ * in */ , ++ unsigned int including_insert_coord_p /* true if ++ * @insertion_coord ++ * can be moved */ ) ++{ ++ int result; ++ znode *source; ++ carry_plugin_info info; ++ node_plugin *nplug; ++ ++ source = insert_coord->node; ++ ++ info.doing = doing; ++ info.todo = todo; ++ ++ nplug = node_plugin_by_node(node); ++ result = nplug->shift(insert_coord, node, ++ (side == LEFT_SIDE) ? SHIFT_LEFT : SHIFT_RIGHT, 0, ++ (int)including_insert_coord_p, &info); ++ /* the only error ->shift() method of node plugin can return is ++ -ENOMEM due to carry node/operation allocation. */ ++ assert("nikita-915", result >= 0 || result == -ENOMEM); ++ if (result > 0) { ++ /* ++ * if some number of bytes was actually shifted, mark nodes ++ * dirty, and carry level as non-restartable. ++ */ ++ doing->restartable = 0; ++ znode_make_dirty(source); ++ znode_make_dirty(node); ++ } ++ ++ assert("nikita-2077", coord_check(insert_coord)); ++ return 0; ++} ++ ++typedef carry_node *(*carry_iterator) (carry_node * node); ++static carry_node *find_dir_carry(carry_node * node, carry_level * level, ++ carry_iterator iterator); ++ ++static carry_node *pool_level_list_prev(carry_node *node) ++{ ++ return list_entry(node->header.level_linkage.prev, carry_node, header.level_linkage); ++} ++ ++/* look for the left neighbor of given carry node in a carry queue. ++ ++ This is used by find_left_neighbor(), but I am not sure that this ++ really gives any advantage. More statistics required. ++ ++*/ ++carry_node *find_left_carry(carry_node * node /* node to find left neighbor ++ * of */ , ++ carry_level * level /* level to scan */ ) ++{ ++ return find_dir_carry(node, level, ++ (carry_iterator) pool_level_list_prev); ++} ++ ++static carry_node *pool_level_list_next(carry_node *node) ++{ ++ return list_entry(node->header.level_linkage.next, carry_node, header.level_linkage); ++} ++ ++/* look for the right neighbor of given carry node in a ++ carry queue. ++ ++ This is used by find_right_neighbor(), but I am not sure that this ++ really gives any advantage. More statistics required. ++ ++*/ ++carry_node *find_right_carry(carry_node * node /* node to find right neighbor ++ * of */ , ++ carry_level * level /* level to scan */ ) ++{ ++ return find_dir_carry(node, level, ++ (carry_iterator) pool_level_list_next); ++} ++ ++/* look for the left or right neighbor of given carry node in a carry ++ queue. ++ ++ Helper function used by find_{left|right}_carry(). ++*/ ++static carry_node *find_dir_carry(carry_node * node /* node to start scanning ++ * from */ , ++ carry_level * level /* level to scan */ , ++ carry_iterator iterator /* operation to ++ * move to the next ++ * node */ ) ++{ ++ carry_node *neighbor; ++ ++ assert("nikita-1059", node != NULL); ++ assert("nikita-1060", level != NULL); ++ ++ /* scan list of carry nodes on this list dir-ward, skipping all ++ carry nodes referencing the same znode. */ ++ neighbor = node; ++ while (1) { ++ neighbor = iterator(neighbor); ++ if (carry_node_end(level, neighbor)) ++ /* list head is reached */ ++ return NULL; ++ if (reiser4_carry_real(neighbor) != reiser4_carry_real(node)) ++ return neighbor; ++ } ++} ++ ++/* ++ * Memory reservation estimation. ++ * ++ * Carry process proceeds through tree levels upwards. Carry assumes that it ++ * takes tree in consistent state (e.g., that search tree invariants hold), ++ * and leaves tree consistent after it finishes. This means that when some ++ * error occurs carry cannot simply return if there are pending carry ++ * operations. Generic solution for this problem is carry-undo either as ++ * transaction manager feature (requiring checkpoints and isolation), or ++ * through some carry specific mechanism. ++ * ++ * Our current approach is to panic if carry hits an error while tree is ++ * inconsistent. Unfortunately -ENOMEM can easily be triggered. To work around ++ * this "memory reservation" mechanism was added. ++ * ++ * Memory reservation is implemented by perthread-pages.diff patch from ++ * core-patches. Its API is defined in ++ * ++ * int perthread_pages_reserve(int nrpages, gfp_t gfp); ++ * void perthread_pages_release(int nrpages); ++ * int perthread_pages_count(void); ++ * ++ * carry estimates its worst case memory requirements at the entry, reserved ++ * enough memory, and released unused pages before returning. ++ * ++ * Code below estimates worst case memory requirements for a given carry ++ * queue. This is dome by summing worst case memory requirements for each ++ * operation in the queue. ++ * ++ */ ++ ++/* ++ * Memory memory requirements of many operations depends on the tree ++ * height. For example, item insertion requires new node to be inserted at ++ * each tree level in the worst case. What tree height should be used for ++ * estimation? Current tree height is wrong, because tree height can change ++ * between the time when estimation was done and the time when operation is ++ * actually performed. Maximal possible tree height (REISER4_MAX_ZTREE_HEIGHT) ++ * is also not desirable, because it would lead to the huge over-estimation ++ * all the time. Plausible solution is "capped tree height": if current tree ++ * height is less than some TREE_HEIGHT_CAP constant, capped tree height is ++ * TREE_HEIGHT_CAP, otherwise it's current tree height. Idea behind this is ++ * that if tree height is TREE_HEIGHT_CAP or larger, it's extremely unlikely ++ * to be increased even more during short interval of time. ++ */ ++#define TREE_HEIGHT_CAP (5) ++ ++/* return capped tree height for the @tree. See comment above. */ ++static int cap_tree_height(reiser4_tree * tree) ++{ ++ return max_t(int, tree->height, TREE_HEIGHT_CAP); ++} ++ ++/* return capped tree height for the current tree. */ ++static int capped_height(void) ++{ ++ return cap_tree_height(current_tree); ++} ++ ++/* return number of pages required to store given number of bytes */ ++static int bytes_to_pages(int bytes) ++{ ++ return (bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; ++} ++ ++/* how many pages are required to allocate znodes during item insertion. */ ++static int carry_estimate_znodes(void) ++{ ++ /* ++ * Note, that there we have some problem here: there is no way to ++ * reserve pages specifically for the given slab. This means that ++ * these pages can be hijacked for some other end. ++ */ ++ ++ /* in the worst case we need 3 new znode on each tree level */ ++ return bytes_to_pages(capped_height() * sizeof(znode) * 3); ++} ++ ++/* ++ * how many pages are required to load bitmaps. One bitmap per level. ++ */ ++static int carry_estimate_bitmaps(void) ++{ ++ if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_LOAD_BITMAP)) { ++ int bytes; ++ ++ bytes = capped_height() * (0 + /* bnode should be added, but its is private to ++ * bitmap.c, skip for now. */ ++ 2 * sizeof(jnode)); /* working and commit jnodes */ ++ return bytes_to_pages(bytes) + 2; /* and their contents */ ++ } else ++ /* bitmaps were pre-loaded during mount */ ++ return 0; ++} ++ ++/* worst case item insertion memory requirements */ ++static int carry_estimate_insert(carry_op * op, carry_level * level) ++{ ++ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */ ++ capped_height() + /* new block on each level */ ++ 1 + /* and possibly extra new block at the leaf level */ ++ 3; /* loading of leaves into memory */ ++} ++ ++/* worst case item deletion memory requirements */ ++static int carry_estimate_delete(carry_op * op, carry_level * level) ++{ ++ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */ ++ 3; /* loading of leaves into memory */ ++} ++ ++/* worst case tree cut memory requirements */ ++static int carry_estimate_cut(carry_op * op, carry_level * level) ++{ ++ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */ ++ 3; /* loading of leaves into memory */ ++} ++ ++/* worst case memory requirements of pasting into item */ ++static int carry_estimate_paste(carry_op * op, carry_level * level) ++{ ++ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */ ++ capped_height() + /* new block on each level */ ++ 1 + /* and possibly extra new block at the leaf level */ ++ 3; /* loading of leaves into memory */ ++} ++ ++/* worst case memory requirements of extent insertion */ ++static int carry_estimate_extent(carry_op * op, carry_level * level) ++{ ++ return carry_estimate_insert(op, level) + /* insert extent */ ++ carry_estimate_delete(op, level); /* kill leaf */ ++} ++ ++/* worst case memory requirements of key update */ ++static int carry_estimate_update(carry_op * op, carry_level * level) ++{ ++ return 0; ++} ++ ++/* worst case memory requirements of flow insertion */ ++static int carry_estimate_insert_flow(carry_op * op, carry_level * level) ++{ ++ int newnodes; ++ ++ newnodes = min(bytes_to_pages(op->u.insert_flow.flow->length), ++ CARRY_FLOW_NEW_NODES_LIMIT); ++ /* ++ * roughly estimate insert_flow as a sequence of insertions. ++ */ ++ return newnodes * carry_estimate_insert(op, level); ++} ++ ++/* This is dispatch table for carry operations. It can be trivially ++ abstracted into useful plugin: tunable balancing policy is a good ++ thing. */ ++carry_op_handler op_dispatch_table[COP_LAST_OP] = { ++ [COP_INSERT] = { ++ .handler = carry_insert, ++ .estimate = carry_estimate_insert} ++ , ++ [COP_DELETE] = { ++ .handler = carry_delete, ++ .estimate = carry_estimate_delete} ++ , ++ [COP_CUT] = { ++ .handler = carry_cut, ++ .estimate = carry_estimate_cut} ++ , ++ [COP_PASTE] = { ++ .handler = carry_paste, ++ .estimate = carry_estimate_paste} ++ , ++ [COP_EXTENT] = { ++ .handler = carry_extent, ++ .estimate = carry_estimate_extent} ++ , ++ [COP_UPDATE] = { ++ .handler = carry_update, ++ .estimate = carry_estimate_update} ++ , ++ [COP_INSERT_FLOW] = { ++ .handler = carry_insert_flow, ++ .estimate = carry_estimate_insert_flow} ++}; ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/carry_ops.h b/fs/reiser4/carry_ops.h +new file mode 100644 +index 0000000..688ca8f +--- /dev/null ++++ b/fs/reiser4/carry_ops.h +@@ -0,0 +1,42 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* implementation of carry operations. See carry_ops.c for details. */ ++ ++#if !defined( __CARRY_OPS_H__ ) ++#define __CARRY_OPS_H__ ++ ++#include "forward.h" ++#include "znode.h" ++#include "carry.h" ++ ++/* carry operation handlers */ ++typedef struct carry_op_handler { ++ /* perform operation */ ++ int (*handler) (carry_op * op, carry_level * doing, carry_level * todo); ++ /* estimate memory requirements for @op */ ++ int (*estimate) (carry_op * op, carry_level * level); ++} carry_op_handler; ++ ++/* This is dispatch table for carry operations. It can be trivially ++ abstracted into useful plugin: tunable balancing policy is a good ++ thing. */ ++extern carry_op_handler op_dispatch_table[COP_LAST_OP]; ++ ++unsigned int space_needed(const znode * node, const coord_t * coord, ++ const reiser4_item_data * data, int inserting); ++extern carry_node *find_left_carry(carry_node * node, carry_level * level); ++extern carry_node *find_right_carry(carry_node * node, carry_level * level); ++ ++/* __CARRY_OPS_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/context.c b/fs/reiser4/context.c +new file mode 100644 +index 0000000..4b3137f +--- /dev/null ++++ b/fs/reiser4/context.c +@@ -0,0 +1,288 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Manipulation of reiser4_context */ ++ ++/* ++ * global context used during system call. Variable of this type is allocated ++ * on the stack at the beginning of the reiser4 part of the system call and ++ * pointer to it is stored in the current->fs_context. This allows us to avoid ++ * passing pointer to current transaction and current lockstack (both in ++ * one-to-one mapping with threads) all over the call chain. ++ * ++ * It's kind of like those global variables the prof used to tell you not to ++ * use in CS1, except thread specific.;-) Nikita, this was a good idea. ++ * ++ * In some situations it is desirable to have ability to enter reiser4_context ++ * more than once for the same thread (nested contexts). For example, there ++ * are some functions that can be called either directly from VFS/VM or from ++ * already active reiser4 context (->writepage, for example). ++ * ++ * In such situations "child" context acts like dummy: all activity is ++ * actually performed in the top level context, and get_current_context() ++ * always returns top level context. ++ * Of course, reiser4_init_context()/reiser4_done_context() have to be properly ++ * nested any way. ++ * ++ * Note that there is an important difference between reiser4 uses ++ * ->fs_context and the way other file systems use it. Other file systems ++ * (ext3 and reiserfs) use ->fs_context only for the duration of _transaction_ ++ * (this is why ->fs_context was initially called ->journal_info). This means, ++ * that when ext3 or reiserfs finds that ->fs_context is not NULL on the entry ++ * to the file system, they assume that some transaction is already underway, ++ * and usually bail out, because starting nested transaction would most likely ++ * lead to the deadlock. This gives false positives with reiser4, because we ++ * set ->fs_context before starting transaction. ++ */ ++ ++#include "debug.h" ++#include "super.h" ++#include "context.h" ++ ++#include /* balance_dirty_pages() */ ++#include ++ ++static void _reiser4_init_context(reiser4_context * context, ++ struct super_block *super) ++{ ++ memset(context, 0, sizeof(*context)); ++ ++ context->super = super; ++ context->magic = context_magic; ++ context->outer = current->journal_info; ++ current->journal_info = (void *)context; ++ context->nr_children = 0; ++ context->gfp_mask = GFP_KERNEL; ++ ++ init_lock_stack(&context->stack); ++ ++ reiser4_txn_begin(context); ++ ++ /* initialize head of tap list */ ++ INIT_LIST_HEAD(&context->taps); ++#if REISER4_DEBUG ++ context->task = current; ++#endif ++ grab_space_enable(); ++} ++ ++/* initialize context and bind it to the current thread ++ ++ This function should be called at the beginning of reiser4 part of ++ syscall. ++*/ ++reiser4_context * reiser4_init_context(struct super_block * super) ++{ ++ reiser4_context *context; ++ ++ assert("nikita-2662", !in_interrupt() && !in_irq()); ++ assert("nikita-3357", super != NULL); ++ assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super)); ++ ++ context = get_current_context_check(); ++ if (context && context->super == super) { ++ context = (reiser4_context *) current->journal_info; ++ context->nr_children++; ++ return context; ++ } ++ ++ context = kmalloc(sizeof(*context), GFP_KERNEL); ++ if (context == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ ++ _reiser4_init_context(context, super); ++ return context; ++} ++ ++/* this is used in scan_mgr which is called with spinlock held and in ++ reiser4_fill_super magic */ ++void init_stack_context(reiser4_context *context, struct super_block *super) ++{ ++ assert("nikita-2662", !in_interrupt() && !in_irq()); ++ assert("nikita-3357", super != NULL); ++ assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super)); ++ assert("vs-12", !is_in_reiser4_context()); ++ ++ _reiser4_init_context(context, super); ++ context->on_stack = 1; ++ return; ++} ++ ++/* cast lock stack embedded into reiser4 context up to its container */ ++reiser4_context *get_context_by_lock_stack(lock_stack * owner) ++{ ++ return container_of(owner, reiser4_context, stack); ++} ++ ++/* true if there is already _any_ reiser4 context for the current thread */ ++int is_in_reiser4_context(void) ++{ ++ reiser4_context *ctx; ++ ++ ctx = current->journal_info; ++ return ctx != NULL && ((unsigned long)ctx->magic) == context_magic; ++} ++ ++/* ++ * call balance dirty pages for the current context. ++ * ++ * File system is expected to call balance_dirty_pages_ratelimited() whenever ++ * it dirties a page. reiser4 does this for unformatted nodes (that is, during ++ * write---this covers vast majority of all dirty traffic), but we cannot do ++ * this immediately when formatted node is dirtied, because long term lock is ++ * usually held at that time. To work around this, dirtying of formatted node ++ * simply increases ->nr_marked_dirty counter in the current reiser4 ++ * context. When we are about to leave this context, ++ * balance_dirty_pages_ratelimited() is called, if necessary. ++ * ++ * This introduces another problem: sometimes we do not want to run ++ * balance_dirty_pages_ratelimited() when leaving a context, for example ++ * because some important lock (like ->i_mutex on the parent directory) is ++ * held. To achieve this, ->nobalance flag can be set in the current context. ++ */ ++static void balance_dirty_pages_at(reiser4_context *context) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(context->super); ++ ++ /* ++ * call balance_dirty_pages_ratelimited() to process formatted nodes ++ * dirtied during this system call. Do that only if we are not in mount ++ * and there were nodes dirtied in this context and we are not in ++ * writepage (to avoid deadlock) and not in pdflush ++ */ ++ if (sbinfo != NULL && sbinfo->fake != NULL && ++ context->nr_marked_dirty != 0 && ++ !(current->flags & PF_MEMALLOC) && ++ !current_is_pdflush()) ++ balance_dirty_pages_ratelimited(sbinfo->fake->i_mapping); ++} ++ ++/* release resources associated with context. ++ ++ This function should be called at the end of "session" with reiser4, ++ typically just before leaving reiser4 driver back to VFS. ++ ++ This is good place to put some degugging consistency checks, like that ++ thread released all locks and closed transcrash etc. ++ ++*/ ++static void reiser4_done_context(reiser4_context * context /* context being released */ ) ++{ ++ assert("nikita-860", context != NULL); ++ assert("nikita-859", context->magic == context_magic); ++ assert("vs-646", (reiser4_context *) current->journal_info == context); ++ assert("zam-686", !in_interrupt() && !in_irq()); ++ ++ /* only do anything when leaving top-level reiser4 context. All nested ++ * contexts are just dummies. */ ++ if (context->nr_children == 0) { ++ assert("jmacd-673", context->trans == NULL); ++ assert("jmacd-1002", lock_stack_isclean(&context->stack)); ++ assert("nikita-1936", reiser4_no_counters_are_held()); ++ assert("nikita-2626", list_empty_careful(reiser4_taps_list())); ++ assert("zam-1004", ergo(get_super_private(context->super), ++ get_super_private(context->super)->delete_mutex_owner != ++ current)); ++ ++ /* release all grabbed but as yet unused blocks */ ++ if (context->grabbed_blocks != 0) ++ all_grabbed2free(); ++ ++ /* ++ * synchronize against longterm_unlock_znode(): ++ * wake_up_requestor() wakes up requestors without holding ++ * zlock (otherwise they will immediately bump into that lock ++ * after wake up on another CPU). To work around (rare) ++ * situation where requestor has been woken up asynchronously ++ * and managed to run until completion (and destroy its ++ * context and lock stack) before wake_up_requestor() called ++ * wake_up() on it, wake_up_requestor() synchronize on lock ++ * stack spin lock. It has actually been observed that spin ++ * lock _was_ locked at this point, because ++ * wake_up_requestor() took interrupt. ++ */ ++ spin_lock_stack(&context->stack); ++ spin_unlock_stack(&context->stack); ++ ++ assert("zam-684", context->nr_children == 0); ++ /* restore original ->fs_context value */ ++ current->journal_info = context->outer; ++ if (context->on_stack == 0) ++ kfree(context); ++ } else { ++ context->nr_children--; ++#if REISER4_DEBUG ++ assert("zam-685", context->nr_children >= 0); ++#endif ++ } ++} ++ ++/* ++ * exit reiser4 context. Call balance_dirty_pages_at() if necessary. Close ++ * transaction. Call done_context() to do context related book-keeping. ++ */ ++void reiser4_exit_context(reiser4_context * context) ++{ ++ assert("nikita-3021", reiser4_schedulable()); ++ ++ if (context->nr_children == 0) { ++ if (!context->nobalance) { ++ reiser4_txn_restart(context); ++ balance_dirty_pages_at(context); ++ } ++ ++ /* if filesystem is mounted with -o sync or -o dirsync - commit ++ transaction. FIXME: TXNH_DONT_COMMIT is used to avoid ++ commiting on exit_context when inode semaphore is held and ++ to have ktxnmgrd to do commit instead to get better ++ concurrent filesystem accesses. But, when one mounts with -o ++ sync, he cares more about reliability than about ++ performance. So, for now we have this simple mount -o sync ++ support. */ ++ if (context->super->s_flags & (MS_SYNCHRONOUS | MS_DIRSYNC)) { ++ txn_atom *atom; ++ ++ atom = get_current_atom_locked_nocheck(); ++ if (atom) { ++ atom->flags |= ATOM_FORCE_COMMIT; ++ context->trans->flags &= ~TXNH_DONT_COMMIT; ++ spin_unlock_atom(atom); ++ } ++ } ++ reiser4_txn_end(context); ++ } ++ reiser4_done_context(context); ++} ++ ++void reiser4_ctx_gfp_mask_set(void) ++{ ++ reiser4_context *ctx; ++ ++ ctx = get_current_context(); ++ if (ctx->entd == 0 && ++ list_empty(&ctx->stack.locks) && ++ ctx->trans->atom == NULL) ++ ctx->gfp_mask = GFP_KERNEL; ++ else ++ ctx->gfp_mask = GFP_NOFS; ++} ++ ++void reiser4_ctx_gfp_mask_force (gfp_t mask) ++{ ++ reiser4_context *ctx; ++ ctx = get_current_context(); ++ ++ assert("edward-1454", ctx != NULL); ++ ++ ctx->gfp_mask = mask; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 120 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/context.h b/fs/reiser4/context.h +new file mode 100644 +index 0000000..da240a9 +--- /dev/null ++++ b/fs/reiser4/context.h +@@ -0,0 +1,228 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Reiser4 context. See context.c for details. */ ++ ++#if !defined( __REISER4_CONTEXT_H__ ) ++#define __REISER4_CONTEXT_H__ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "tap.h" ++#include "lock.h" ++ ++#include /* for __u?? */ ++#include /* for struct super_block */ ++#include ++#include /* for struct task_struct */ ++ ++/* reiser4 per-thread context */ ++struct reiser4_context { ++ /* magic constant. For identification of reiser4 contexts. */ ++ __u32 magic; ++ ++ /* current lock stack. See lock.[ch]. This is where list of all ++ locks taken by current thread is kept. This is also used in ++ deadlock detection. */ ++ lock_stack stack; ++ ++ /* current transcrash. */ ++ txn_handle *trans; ++ /* transaction handle embedded into reiser4_context. ->trans points ++ * here by default. */ ++ txn_handle trans_in_ctx; ++ ++ /* super block we are working with. To get the current tree ++ use &get_super_private (reiser4_get_current_sb ())->tree. */ ++ struct super_block *super; ++ ++ /* parent fs activation */ ++ struct fs_activation *outer; ++ ++ /* per-thread grabbed (for further allocation) blocks counter */ ++ reiser4_block_nr grabbed_blocks; ++ ++ /* list of taps currently monitored. See tap.c */ ++ struct list_head taps; ++ ++ /* grabbing space is enabled */ ++ unsigned int grab_enabled:1; ++ /* should be set when we are write dirty nodes to disk in jnode_flush or ++ * reiser4_write_logs() */ ++ unsigned int writeout_mode:1; ++ /* true, if current thread is an ent thread */ ++ unsigned int entd:1; ++ /* true, if balance_dirty_pages() should not be run when leaving this ++ * context. This is used to avoid lengthly balance_dirty_pages() ++ * operation when holding some important resource, like directory ++ * ->i_mutex */ ++ unsigned int nobalance:1; ++ ++ /* this bit is used on reiser4_done_context to decide whether context is ++ kmalloc-ed and has to be kfree-ed */ ++ unsigned int on_stack:1; ++ ++ /* count non-trivial jnode_set_dirty() calls */ ++ unsigned long nr_marked_dirty; ++ ++ /* reiser4_sync_inodes calls (via generic_sync_sb_inodes) ++ * reiser4_writepages for each of dirty inodes. Reiser4_writepages ++ * captures pages. When number of pages captured in one ++ * reiser4_sync_inodes reaches some threshold - some atoms get ++ * flushed */ ++ int nr_captured; ++ int nr_children; /* number of child contexts */ ++#if REISER4_DEBUG ++ /* debugging information about reiser4 locks held by the current ++ * thread */ ++ reiser4_lock_counters_info locks; ++ struct task_struct *task; /* so we can easily find owner of the stack */ ++ ++ /* ++ * disk space grabbing debugging support ++ */ ++ /* how many disk blocks were grabbed by the first call to ++ * reiser4_grab_space() in this context */ ++ reiser4_block_nr grabbed_initially; ++ ++ /* list of all threads doing flush currently */ ++ struct list_head flushers_link; ++ /* information about last error encountered by reiser4 */ ++ err_site err; ++#endif ++ void *vp; ++ gfp_t gfp_mask; ++}; ++ ++extern reiser4_context *get_context_by_lock_stack(lock_stack *); ++ ++/* Debugging helps. */ ++#if REISER4_DEBUG ++extern void print_contexts(void); ++#endif ++ ++#define current_tree (&(get_super_private(reiser4_get_current_sb())->tree)) ++#define current_blocksize reiser4_get_current_sb()->s_blocksize ++#define current_blocksize_bits reiser4_get_current_sb()->s_blocksize_bits ++ ++extern reiser4_context *reiser4_init_context(struct super_block *); ++extern void init_stack_context(reiser4_context *, struct super_block *); ++extern void reiser4_exit_context(reiser4_context *); ++ ++/* magic constant we store in reiser4_context allocated at the stack. Used to ++ catch accesses to staled or uninitialized contexts. */ ++#define context_magic ((__u32) 0x4b1b5d0b) ++ ++extern int is_in_reiser4_context(void); ++ ++/* ++ * return reiser4_context for the thread @tsk ++ */ ++static inline reiser4_context *get_context(const struct task_struct *tsk) ++{ ++ assert("vs-1682", ++ ((reiser4_context *) tsk->journal_info)->magic == context_magic); ++ return (reiser4_context *) tsk->journal_info; ++} ++ ++/* ++ * return reiser4 context of the current thread, or NULL if there is none. ++ */ ++static inline reiser4_context *get_current_context_check(void) ++{ ++ if (is_in_reiser4_context()) ++ return get_context(current); ++ else ++ return NULL; ++} ++ ++static inline reiser4_context *get_current_context(void); /* __attribute__((const)); */ ++ ++/* return context associated with current thread */ ++static inline reiser4_context *get_current_context(void) ++{ ++ return get_context(current); ++} ++ ++static inline gfp_t reiser4_ctx_gfp_mask_get(void) ++{ ++ reiser4_context *ctx; ++ ++ ctx = get_current_context_check(); ++ return (ctx == NULL) ? GFP_KERNEL : ctx->gfp_mask; ++} ++ ++void reiser4_ctx_gfp_mask_set(void); ++void reiser4_ctx_gfp_mask_force (gfp_t mask); ++ ++/* ++ * true if current thread is in the write-out mode. Thread enters write-out ++ * mode during jnode_flush and reiser4_write_logs(). ++ */ ++static inline int is_writeout_mode(void) ++{ ++ return get_current_context()->writeout_mode; ++} ++ ++/* ++ * enter write-out mode ++ */ ++static inline void writeout_mode_enable(void) ++{ ++ assert("zam-941", !get_current_context()->writeout_mode); ++ get_current_context()->writeout_mode = 1; ++} ++ ++/* ++ * leave write-out mode ++ */ ++static inline void writeout_mode_disable(void) ++{ ++ assert("zam-942", get_current_context()->writeout_mode); ++ get_current_context()->writeout_mode = 0; ++} ++ ++static inline void grab_space_enable(void) ++{ ++ get_current_context()->grab_enabled = 1; ++} ++ ++static inline void grab_space_disable(void) ++{ ++ get_current_context()->grab_enabled = 0; ++} ++ ++static inline void grab_space_set_enabled(int enabled) ++{ ++ get_current_context()->grab_enabled = enabled; ++} ++ ++static inline int is_grab_enabled(reiser4_context * ctx) ++{ ++ return ctx->grab_enabled; ++} ++ ++/* mark transaction handle in @ctx as TXNH_DONT_COMMIT, so that no commit or ++ * flush would be performed when it is closed. This is necessary when handle ++ * has to be closed under some coarse semaphore, like i_mutex of ++ * directory. Commit will be performed by ktxnmgrd. */ ++static inline void context_set_commit_async(reiser4_context * context) ++{ ++ context->nobalance = 1; ++ context->trans->flags |= TXNH_DONT_COMMIT; ++} ++ ++/* __REISER4_CONTEXT_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/coord.c b/fs/reiser4/coord.c +new file mode 100644 +index 0000000..d171786 +--- /dev/null ++++ b/fs/reiser4/coord.c +@@ -0,0 +1,935 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "tree.h" ++#include "plugin/item/item.h" ++#include "znode.h" ++#include "coord.h" ++ ++/* Internal constructor. */ ++static inline void ++coord_init_values(coord_t * coord, const znode * node, pos_in_node_t item_pos, ++ pos_in_node_t unit_pos, between_enum between) ++{ ++ coord->node = (znode *) node; ++ coord_set_item_pos(coord, item_pos); ++ coord->unit_pos = unit_pos; ++ coord->between = between; ++ ON_DEBUG(coord->plug_v = 0); ++ ON_DEBUG(coord->body_v = 0); ++ ++ /*ON_TRACE (TRACE_COORDS, "init coord %p node %p: %u %u %s\n", coord, node, item_pos, unit_pos, coord_tween_tostring (between)); */ ++} ++ ++/* after shifting of node content, coord previously set properly may become ++ invalid, try to "normalize" it. */ ++void coord_normalize(coord_t * coord) ++{ ++ znode *node; ++ ++ node = coord->node; ++ assert("vs-683", node); ++ ++ coord_clear_iplug(coord); ++ ++ if (node_is_empty(node)) { ++ coord_init_first_unit(coord, node); ++ } else if ((coord->between == AFTER_ITEM) ++ || (coord->between == AFTER_UNIT)) { ++ return; ++ } else if (coord->item_pos == coord_num_items(coord) ++ && coord->between == BEFORE_ITEM) { ++ coord_dec_item_pos(coord); ++ coord->between = AFTER_ITEM; ++ } else if (coord->unit_pos == coord_num_units(coord) ++ && coord->between == BEFORE_UNIT) { ++ coord->unit_pos--; ++ coord->between = AFTER_UNIT; ++ } else if (coord->item_pos == coord_num_items(coord) ++ && coord->unit_pos == 0 && coord->between == BEFORE_UNIT) { ++ coord_dec_item_pos(coord); ++ coord->unit_pos = 0; ++ coord->between = AFTER_ITEM; ++ } ++} ++ ++/* Copy a coordinate. */ ++void coord_dup(coord_t * coord, const coord_t * old_coord) ++{ ++ assert("jmacd-9800", coord_check(old_coord)); ++ coord_dup_nocheck(coord, old_coord); ++} ++ ++/* Copy a coordinate without check. Useful when old_coord->node is not ++ loaded. As in cbk_tree_lookup -> connect_znode -> connect_one_side */ ++void coord_dup_nocheck(coord_t * coord, const coord_t * old_coord) ++{ ++ coord->node = old_coord->node; ++ coord_set_item_pos(coord, old_coord->item_pos); ++ coord->unit_pos = old_coord->unit_pos; ++ coord->between = old_coord->between; ++ coord->iplugid = old_coord->iplugid; ++ ON_DEBUG(coord->plug_v = old_coord->plug_v); ++ ON_DEBUG(coord->body_v = old_coord->body_v); ++} ++ ++/* Initialize an invalid coordinate. */ ++void coord_init_invalid(coord_t * coord, const znode * node) ++{ ++ coord_init_values(coord, node, 0, 0, INVALID_COORD); ++} ++ ++void coord_init_first_unit_nocheck(coord_t * coord, const znode * node) ++{ ++ coord_init_values(coord, node, 0, 0, AT_UNIT); ++} ++ ++/* Initialize a coordinate to point at the first unit of the first item. If the node is ++ empty, it is positioned at the EMPTY_NODE. */ ++void coord_init_first_unit(coord_t * coord, const znode * node) ++{ ++ int is_empty = node_is_empty(node); ++ ++ coord_init_values(coord, node, 0, 0, (is_empty ? EMPTY_NODE : AT_UNIT)); ++ ++ assert("jmacd-9801", coord_check(coord)); ++} ++ ++/* Initialize a coordinate to point at the last unit of the last item. If the node is ++ empty, it is positioned at the EMPTY_NODE. */ ++void coord_init_last_unit(coord_t * coord, const znode * node) ++{ ++ int is_empty = node_is_empty(node); ++ ++ coord_init_values(coord, node, ++ (is_empty ? 0 : node_num_items(node) - 1), 0, ++ (is_empty ? EMPTY_NODE : AT_UNIT)); ++ if (!is_empty) ++ coord->unit_pos = coord_last_unit_pos(coord); ++ assert("jmacd-9802", coord_check(coord)); ++} ++ ++/* Initialize a coordinate to before the first item. If the node is empty, it is ++ positioned at the EMPTY_NODE. */ ++void coord_init_before_first_item(coord_t * coord, const znode * node) ++{ ++ int is_empty = node_is_empty(node); ++ ++ coord_init_values(coord, node, 0, 0, ++ (is_empty ? EMPTY_NODE : BEFORE_UNIT)); ++ ++ assert("jmacd-9803", coord_check(coord)); ++} ++ ++/* Initialize a coordinate to after the last item. If the node is empty, it is positioned ++ at the EMPTY_NODE. */ ++void coord_init_after_last_item(coord_t * coord, const znode * node) ++{ ++ int is_empty = node_is_empty(node); ++ ++ coord_init_values(coord, node, ++ (is_empty ? 0 : node_num_items(node) - 1), 0, ++ (is_empty ? EMPTY_NODE : AFTER_ITEM)); ++ ++ assert("jmacd-9804", coord_check(coord)); ++} ++ ++/* Initialize a coordinate to after last unit in the item. Coord must be set ++ already to existing item */ ++void coord_init_after_item_end(coord_t * coord) ++{ ++ coord->between = AFTER_UNIT; ++ coord->unit_pos = coord_last_unit_pos(coord); ++} ++ ++/* Initialize a coordinate to before the item. Coord must be set already to existing item */ ++void coord_init_before_item(coord_t * coord) ++{ ++ coord->unit_pos = 0; ++ coord->between = BEFORE_ITEM; ++} ++ ++/* Initialize a coordinate to after the item. Coord must be set already to existing item */ ++void coord_init_after_item(coord_t * coord) ++{ ++ coord->unit_pos = 0; ++ coord->between = AFTER_ITEM; ++} ++ ++/* Initialize a coordinate by 0s. Used in places where init_coord was used and ++ it was not clear how actually */ ++void coord_init_zero(coord_t * coord) ++{ ++ memset(coord, 0, sizeof(*coord)); ++} ++ ++/* Return the number of units at the present item. Asserts coord_is_existing_item(). */ ++unsigned coord_num_units(const coord_t * coord) ++{ ++ assert("jmacd-9806", coord_is_existing_item(coord)); ++ ++ return item_plugin_by_coord(coord)->b.nr_units(coord); ++} ++ ++/* Returns true if the coord was initializewd by coord_init_invalid (). */ ++/* Audited by: green(2002.06.15) */ ++int coord_is_invalid(const coord_t * coord) ++{ ++ return coord->between == INVALID_COORD; ++} ++ ++/* Returns true if the coordinate is positioned at an existing item, not before or after ++ an item. It may be placed at, before, or after any unit within the item, whether ++ existing or not. */ ++int coord_is_existing_item(const coord_t * coord) ++{ ++ switch (coord->between) { ++ case EMPTY_NODE: ++ case BEFORE_ITEM: ++ case AFTER_ITEM: ++ case INVALID_COORD: ++ return 0; ++ ++ case BEFORE_UNIT: ++ case AT_UNIT: ++ case AFTER_UNIT: ++ return coord->item_pos < coord_num_items(coord); ++ } ++ ++ impossible("jmacd-9900", "unreachable coord: %p", coord); ++ return 0; ++} ++ ++/* Returns true if the coordinate is positioned at an existing unit, not before or after a ++ unit. */ ++/* Audited by: green(2002.06.15) */ ++int coord_is_existing_unit(const coord_t * coord) ++{ ++ switch (coord->between) { ++ case EMPTY_NODE: ++ case BEFORE_UNIT: ++ case AFTER_UNIT: ++ case BEFORE_ITEM: ++ case AFTER_ITEM: ++ case INVALID_COORD: ++ return 0; ++ ++ case AT_UNIT: ++ return (coord->item_pos < coord_num_items(coord) ++ && coord->unit_pos < coord_num_units(coord)); ++ } ++ ++ impossible("jmacd-9902", "unreachable"); ++ return 0; ++} ++ ++/* Returns true if the coordinate is positioned at the first unit of the first item. Not ++ true for empty nodes nor coordinates positioned before the first item. */ ++/* Audited by: green(2002.06.15) */ ++int coord_is_leftmost_unit(const coord_t * coord) ++{ ++ return (coord->between == AT_UNIT && coord->item_pos == 0 ++ && coord->unit_pos == 0); ++} ++ ++#if REISER4_DEBUG ++/* For assertions only, checks for a valid coordinate. */ ++int coord_check(const coord_t * coord) ++{ ++ if (coord->node == NULL) { ++ return 0; ++ } ++ if (znode_above_root(coord->node)) ++ return 1; ++ ++ switch (coord->between) { ++ default: ++ case INVALID_COORD: ++ return 0; ++ case EMPTY_NODE: ++ if (!node_is_empty(coord->node)) { ++ return 0; ++ } ++ return coord->item_pos == 0 && coord->unit_pos == 0; ++ ++ case BEFORE_UNIT: ++ case AFTER_UNIT: ++ if (node_is_empty(coord->node) && (coord->item_pos == 0) ++ && (coord->unit_pos == 0)) ++ return 1; ++ case AT_UNIT: ++ break; ++ case AFTER_ITEM: ++ case BEFORE_ITEM: ++ /* before/after item should not set unit_pos. */ ++ if (coord->unit_pos != 0) { ++ return 0; ++ } ++ break; ++ } ++ ++ if (coord->item_pos >= node_num_items(coord->node)) { ++ return 0; ++ } ++ ++ /* FIXME-VS: we are going to check unit_pos. This makes no sense when ++ between is set either AFTER_ITEM or BEFORE_ITEM */ ++ if (coord->between == AFTER_ITEM || coord->between == BEFORE_ITEM) ++ return 1; ++ ++ if (coord_is_iplug_set(coord) && ++ coord->unit_pos > ++ item_plugin_by_coord(coord)->b.nr_units(coord) - 1) { ++ return 0; ++ } ++ return 1; ++} ++#endif ++ ++/* Adjust coordinate boundaries based on the number of items prior to coord_next/prev. ++ Returns 1 if the new position is does not exist. */ ++static int coord_adjust_items(coord_t * coord, unsigned items, int is_next) ++{ ++ /* If the node is invalid, leave it. */ ++ if (coord->between == INVALID_COORD) { ++ return 1; ++ } ++ ++ /* If the node is empty, set it appropriately. */ ++ if (items == 0) { ++ coord->between = EMPTY_NODE; ++ coord_set_item_pos(coord, 0); ++ coord->unit_pos = 0; ++ return 1; ++ } ++ ++ /* If it was empty and it no longer is, set to BEFORE/AFTER_ITEM. */ ++ if (coord->between == EMPTY_NODE) { ++ coord->between = (is_next ? BEFORE_ITEM : AFTER_ITEM); ++ coord_set_item_pos(coord, 0); ++ coord->unit_pos = 0; ++ return 0; ++ } ++ ++ /* If the item_pos is out-of-range, set it appropriatly. */ ++ if (coord->item_pos >= items) { ++ coord->between = AFTER_ITEM; ++ coord_set_item_pos(coord, items - 1); ++ coord->unit_pos = 0; ++ /* If is_next, return 1 (can't go any further). */ ++ return is_next; ++ } ++ ++ return 0; ++} ++ ++/* Advances the coordinate by one unit to the right. If empty, no change. If ++ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is an ++ existing unit. */ ++int coord_next_unit(coord_t * coord) ++{ ++ unsigned items = coord_num_items(coord); ++ ++ if (coord_adjust_items(coord, items, 1) == 1) { ++ return 1; ++ } ++ ++ switch (coord->between) { ++ case BEFORE_UNIT: ++ /* Now it is positioned at the same unit. */ ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case AFTER_UNIT: ++ case AT_UNIT: ++ /* If it was at or after a unit and there are more units in this item, ++ advance to the next one. */ ++ if (coord->unit_pos < coord_last_unit_pos(coord)) { ++ coord->unit_pos += 1; ++ coord->between = AT_UNIT; ++ return 0; ++ } ++ ++ /* Otherwise, it is crossing an item boundary and treated as if it was ++ after the current item. */ ++ coord->between = AFTER_ITEM; ++ coord->unit_pos = 0; ++ /* FALLTHROUGH */ ++ ++ case AFTER_ITEM: ++ /* Check for end-of-node. */ ++ if (coord->item_pos == items - 1) { ++ return 1; ++ } ++ ++ coord_inc_item_pos(coord); ++ coord->unit_pos = 0; ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case BEFORE_ITEM: ++ /* The adjust_items checks ensure that we are valid here. */ ++ coord->unit_pos = 0; ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case INVALID_COORD: ++ case EMPTY_NODE: ++ /* Handled in coord_adjust_items(). */ ++ break; ++ } ++ ++ impossible("jmacd-9902", "unreachable"); ++ return 0; ++} ++ ++/* Advances the coordinate by one item to the right. If empty, no change. If ++ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is ++ an existing item. */ ++int coord_next_item(coord_t * coord) ++{ ++ unsigned items = coord_num_items(coord); ++ ++ if (coord_adjust_items(coord, items, 1) == 1) { ++ return 1; ++ } ++ ++ switch (coord->between) { ++ case AFTER_UNIT: ++ case AT_UNIT: ++ case BEFORE_UNIT: ++ case AFTER_ITEM: ++ /* Check for end-of-node. */ ++ if (coord->item_pos == items - 1) { ++ coord->between = AFTER_ITEM; ++ coord->unit_pos = 0; ++ coord_clear_iplug(coord); ++ return 1; ++ } ++ ++ /* Anywhere in an item, go to the next one. */ ++ coord->between = AT_UNIT; ++ coord_inc_item_pos(coord); ++ coord->unit_pos = 0; ++ return 0; ++ ++ case BEFORE_ITEM: ++ /* The out-of-range check ensures that we are valid here. */ ++ coord->unit_pos = 0; ++ coord->between = AT_UNIT; ++ return 0; ++ case INVALID_COORD: ++ case EMPTY_NODE: ++ /* Handled in coord_adjust_items(). */ ++ break; ++ } ++ ++ impossible("jmacd-9903", "unreachable"); ++ return 0; ++} ++ ++/* Advances the coordinate by one unit to the left. If empty, no change. If ++ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position ++ is an existing unit. */ ++int coord_prev_unit(coord_t * coord) ++{ ++ unsigned items = coord_num_items(coord); ++ ++ if (coord_adjust_items(coord, items, 0) == 1) { ++ return 1; ++ } ++ ++ switch (coord->between) { ++ case AT_UNIT: ++ case BEFORE_UNIT: ++ if (coord->unit_pos > 0) { ++ coord->unit_pos -= 1; ++ coord->between = AT_UNIT; ++ return 0; ++ } ++ ++ if (coord->item_pos == 0) { ++ coord->between = BEFORE_ITEM; ++ return 1; ++ } ++ ++ coord_dec_item_pos(coord); ++ coord->unit_pos = coord_last_unit_pos(coord); ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case AFTER_UNIT: ++ /* What if unit_pos is out-of-range? */ ++ assert("jmacd-5442", ++ coord->unit_pos <= coord_last_unit_pos(coord)); ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case BEFORE_ITEM: ++ if (coord->item_pos == 0) { ++ return 1; ++ } ++ ++ coord_dec_item_pos(coord); ++ /* FALLTHROUGH */ ++ ++ case AFTER_ITEM: ++ coord->between = AT_UNIT; ++ coord->unit_pos = coord_last_unit_pos(coord); ++ return 0; ++ ++ case INVALID_COORD: ++ case EMPTY_NODE: ++ break; ++ } ++ ++ impossible("jmacd-9904", "unreachable"); ++ return 0; ++} ++ ++/* Advances the coordinate by one item to the left. If empty, no change. If ++ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position ++ is an existing item. */ ++int coord_prev_item(coord_t * coord) ++{ ++ unsigned items = coord_num_items(coord); ++ ++ if (coord_adjust_items(coord, items, 0) == 1) { ++ return 1; ++ } ++ ++ switch (coord->between) { ++ case AT_UNIT: ++ case AFTER_UNIT: ++ case BEFORE_UNIT: ++ case BEFORE_ITEM: ++ ++ if (coord->item_pos == 0) { ++ coord->between = BEFORE_ITEM; ++ coord->unit_pos = 0; ++ return 1; ++ } ++ ++ coord_dec_item_pos(coord); ++ coord->unit_pos = 0; ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case AFTER_ITEM: ++ coord->between = AT_UNIT; ++ coord->unit_pos = 0; ++ return 0; ++ ++ case INVALID_COORD: ++ case EMPTY_NODE: ++ break; ++ } ++ ++ impossible("jmacd-9905", "unreachable"); ++ return 0; ++} ++ ++/* Calls either coord_init_first_unit or coord_init_last_unit depending on sideof argument. */ ++void coord_init_sideof_unit(coord_t * coord, const znode * node, sideof dir) ++{ ++ assert("jmacd-9821", dir == LEFT_SIDE || dir == RIGHT_SIDE); ++ if (dir == LEFT_SIDE) { ++ coord_init_first_unit(coord, node); ++ } else { ++ coord_init_last_unit(coord, node); ++ } ++} ++ ++/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending on sideof ++ argument. */ ++/* Audited by: green(2002.06.15) */ ++int coord_is_after_sideof_unit(coord_t * coord, sideof dir) ++{ ++ assert("jmacd-9822", dir == LEFT_SIDE || dir == RIGHT_SIDE); ++ if (dir == LEFT_SIDE) { ++ return coord_is_before_leftmost(coord); ++ } else { ++ return coord_is_after_rightmost(coord); ++ } ++} ++ ++/* Calls either coord_next_unit or coord_prev_unit depending on sideof argument. */ ++/* Audited by: green(2002.06.15) */ ++int coord_sideof_unit(coord_t * coord, sideof dir) ++{ ++ assert("jmacd-9823", dir == LEFT_SIDE || dir == RIGHT_SIDE); ++ if (dir == LEFT_SIDE) { ++ return coord_prev_unit(coord); ++ } else { ++ return coord_next_unit(coord); ++ } ++} ++ ++#if REISER4_DEBUG ++int coords_equal(const coord_t * c1, const coord_t * c2) ++{ ++ assert("nikita-2840", c1 != NULL); ++ assert("nikita-2841", c2 != NULL); ++ ++ return ++ c1->node == c2->node && ++ c1->item_pos == c2->item_pos && ++ c1->unit_pos == c2->unit_pos && c1->between == c2->between; ++} ++#endif /* REISER4_DEBUG */ ++ ++/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if coord_is_after_leftmost ++ return NCOORD_ON_THE_LEFT, otherwise return NCOORD_INSIDE. */ ++/* Audited by: green(2002.06.15) */ ++coord_wrt_node coord_wrt(const coord_t * coord) ++{ ++ if (coord_is_before_leftmost(coord)) { ++ return COORD_ON_THE_LEFT; ++ } ++ ++ if (coord_is_after_rightmost(coord)) { ++ return COORD_ON_THE_RIGHT; ++ } ++ ++ return COORD_INSIDE; ++} ++ ++/* Returns true if the coordinate is positioned after the last item or after the last unit ++ of the last item or it is an empty node. */ ++/* Audited by: green(2002.06.15) */ ++int coord_is_after_rightmost(const coord_t * coord) ++{ ++ assert("jmacd-7313", coord_check(coord)); ++ ++ switch (coord->between) { ++ case INVALID_COORD: ++ case AT_UNIT: ++ case BEFORE_UNIT: ++ case BEFORE_ITEM: ++ return 0; ++ ++ case EMPTY_NODE: ++ return 1; ++ ++ case AFTER_ITEM: ++ return (coord->item_pos == node_num_items(coord->node) - 1); ++ ++ case AFTER_UNIT: ++ return ((coord->item_pos == node_num_items(coord->node) - 1) && ++ coord->unit_pos == coord_last_unit_pos(coord)); ++ } ++ ++ impossible("jmacd-9908", "unreachable"); ++ return 0; ++} ++ ++/* Returns true if the coordinate is positioned before the first item or it is an empty ++ node. */ ++int coord_is_before_leftmost(const coord_t * coord) ++{ ++ /* FIXME-VS: coord_check requires node to be loaded whereas it is not ++ necessary to check if coord is set before leftmost ++ assert ("jmacd-7313", coord_check (coord)); */ ++ switch (coord->between) { ++ case INVALID_COORD: ++ case AT_UNIT: ++ case AFTER_ITEM: ++ case AFTER_UNIT: ++ return 0; ++ ++ case EMPTY_NODE: ++ return 1; ++ ++ case BEFORE_ITEM: ++ case BEFORE_UNIT: ++ return (coord->item_pos == 0) && (coord->unit_pos == 0); ++ } ++ ++ impossible("jmacd-9908", "unreachable"); ++ return 0; ++} ++ ++/* Returns true if the coordinate is positioned after a item, before a item, after the ++ last unit of an item, before the first unit of an item, or at an empty node. */ ++/* Audited by: green(2002.06.15) */ ++int coord_is_between_items(const coord_t * coord) ++{ ++ assert("jmacd-7313", coord_check(coord)); ++ ++ switch (coord->between) { ++ case INVALID_COORD: ++ case AT_UNIT: ++ return 0; ++ ++ case AFTER_ITEM: ++ case BEFORE_ITEM: ++ case EMPTY_NODE: ++ return 1; ++ ++ case BEFORE_UNIT: ++ return coord->unit_pos == 0; ++ ++ case AFTER_UNIT: ++ return coord->unit_pos == coord_last_unit_pos(coord); ++ } ++ ++ impossible("jmacd-9908", "unreachable"); ++ return 0; ++} ++ ++#if REISER4_DEBUG ++/* Returns true if the coordinates are positioned at adjacent units, regardless of ++ before-after or item boundaries. */ ++int coord_are_neighbors(coord_t * c1, coord_t * c2) ++{ ++ coord_t *left; ++ coord_t *right; ++ ++ assert("nikita-1241", c1 != NULL); ++ assert("nikita-1242", c2 != NULL); ++ assert("nikita-1243", c1->node == c2->node); ++ assert("nikita-1244", coord_is_existing_unit(c1)); ++ assert("nikita-1245", coord_is_existing_unit(c2)); ++ ++ left = right = NULL; ++ switch (coord_compare(c1, c2)) { ++ case COORD_CMP_ON_LEFT: ++ left = c1; ++ right = c2; ++ break; ++ case COORD_CMP_ON_RIGHT: ++ left = c2; ++ right = c1; ++ break; ++ case COORD_CMP_SAME: ++ return 0; ++ default: ++ wrong_return_value("nikita-1246", "compare_coords()"); ++ } ++ assert("vs-731", left && right); ++ if (left->item_pos == right->item_pos) { ++ return left->unit_pos + 1 == right->unit_pos; ++ } else if (left->item_pos + 1 == right->item_pos) { ++ return (left->unit_pos == coord_last_unit_pos(left)) ++ && (right->unit_pos == 0); ++ } else { ++ return 0; ++ } ++} ++#endif /* REISER4_DEBUG */ ++ ++/* Assuming two coordinates are positioned in the same node, return COORD_CMP_ON_RIGHT, ++ COORD_CMP_ON_LEFT, or COORD_CMP_SAME depending on c1's position relative to c2. */ ++/* Audited by: green(2002.06.15) */ ++coord_cmp coord_compare(coord_t * c1, coord_t * c2) ++{ ++ assert("vs-209", c1->node == c2->node); ++ assert("vs-194", coord_is_existing_unit(c1) ++ && coord_is_existing_unit(c2)); ++ ++ if (c1->item_pos > c2->item_pos) ++ return COORD_CMP_ON_RIGHT; ++ if (c1->item_pos < c2->item_pos) ++ return COORD_CMP_ON_LEFT; ++ if (c1->unit_pos > c2->unit_pos) ++ return COORD_CMP_ON_RIGHT; ++ if (c1->unit_pos < c2->unit_pos) ++ return COORD_CMP_ON_LEFT; ++ return COORD_CMP_SAME; ++} ++ ++/* If the coordinate is between items, shifts it to the right. Returns 0 on success and ++ non-zero if there is no position to the right. */ ++int coord_set_to_right(coord_t * coord) ++{ ++ unsigned items = coord_num_items(coord); ++ ++ if (coord_adjust_items(coord, items, 1) == 1) { ++ return 1; ++ } ++ ++ switch (coord->between) { ++ case AT_UNIT: ++ return 0; ++ ++ case BEFORE_ITEM: ++ case BEFORE_UNIT: ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case AFTER_UNIT: ++ if (coord->unit_pos < coord_last_unit_pos(coord)) { ++ coord->unit_pos += 1; ++ coord->between = AT_UNIT; ++ return 0; ++ } else { ++ ++ coord->unit_pos = 0; ++ ++ if (coord->item_pos == items - 1) { ++ coord->between = AFTER_ITEM; ++ return 1; ++ } ++ ++ coord_inc_item_pos(coord); ++ coord->between = AT_UNIT; ++ return 0; ++ } ++ ++ case AFTER_ITEM: ++ if (coord->item_pos == items - 1) { ++ return 1; ++ } ++ ++ coord_inc_item_pos(coord); ++ coord->unit_pos = 0; ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case EMPTY_NODE: ++ return 1; ++ ++ case INVALID_COORD: ++ break; ++ } ++ ++ impossible("jmacd-9920", "unreachable"); ++ return 0; ++} ++ ++/* If the coordinate is between items, shifts it to the left. Returns 0 on success and ++ non-zero if there is no position to the left. */ ++int coord_set_to_left(coord_t * coord) ++{ ++ unsigned items = coord_num_items(coord); ++ ++ if (coord_adjust_items(coord, items, 0) == 1) { ++ return 1; ++ } ++ ++ switch (coord->between) { ++ case AT_UNIT: ++ return 0; ++ ++ case AFTER_UNIT: ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case AFTER_ITEM: ++ coord->between = AT_UNIT; ++ coord->unit_pos = coord_last_unit_pos(coord); ++ return 0; ++ ++ case BEFORE_UNIT: ++ if (coord->unit_pos > 0) { ++ coord->unit_pos -= 1; ++ coord->between = AT_UNIT; ++ return 0; ++ } else { ++ ++ if (coord->item_pos == 0) { ++ coord->between = BEFORE_ITEM; ++ return 1; ++ } ++ ++ coord->unit_pos = coord_last_unit_pos(coord); ++ coord_dec_item_pos(coord); ++ coord->between = AT_UNIT; ++ return 0; ++ } ++ ++ case BEFORE_ITEM: ++ if (coord->item_pos == 0) { ++ return 1; ++ } ++ ++ coord_dec_item_pos(coord); ++ coord->unit_pos = coord_last_unit_pos(coord); ++ coord->between = AT_UNIT; ++ return 0; ++ ++ case EMPTY_NODE: ++ return 1; ++ ++ case INVALID_COORD: ++ break; ++ } ++ ++ impossible("jmacd-9920", "unreachable"); ++ return 0; ++} ++ ++static const char *coord_tween_tostring(between_enum n) ++{ ++ switch (n) { ++ case BEFORE_UNIT: ++ return "before unit"; ++ case BEFORE_ITEM: ++ return "before item"; ++ case AT_UNIT: ++ return "at unit"; ++ case AFTER_UNIT: ++ return "after unit"; ++ case AFTER_ITEM: ++ return "after item"; ++ case EMPTY_NODE: ++ return "empty node"; ++ case INVALID_COORD: ++ return "invalid"; ++ default: ++ { ++ static char buf[30]; ++ ++ sprintf(buf, "unknown: %i", n); ++ return buf; ++ } ++ } ++} ++ ++void print_coord(const char *mes, const coord_t * coord, int node) ++{ ++ if (coord == NULL) { ++ printk("%s: null\n", mes); ++ return; ++ } ++ printk("%s: item_pos = %d, unit_pos %d, tween=%s, iplug=%d\n", ++ mes, coord->item_pos, coord->unit_pos, ++ coord_tween_tostring(coord->between), coord->iplugid); ++} ++ ++int ++item_utmost_child_real_block(const coord_t * coord, sideof side, ++ reiser4_block_nr * blk) ++{ ++ return item_plugin_by_coord(coord)->f.utmost_child_real_block(coord, ++ side, ++ blk); ++} ++ ++int item_utmost_child(const coord_t * coord, sideof side, jnode ** child) ++{ ++ return item_plugin_by_coord(coord)->f.utmost_child(coord, side, child); ++} ++ ++/* @count bytes of flow @f got written, update correspondingly f->length, ++ f->data and f->key */ ++void move_flow_forward(flow_t * f, unsigned count) ++{ ++ if (f->data) ++ f->data += count; ++ f->length -= count; ++ set_key_offset(&f->key, get_key_offset(&f->key) + count); ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/coord.h b/fs/reiser4/coord.h +new file mode 100644 +index 0000000..313e615 +--- /dev/null ++++ b/fs/reiser4/coord.h +@@ -0,0 +1,389 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Coords */ ++ ++#if !defined( __REISER4_COORD_H__ ) ++#define __REISER4_COORD_H__ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++ ++/* insertions happen between coords in the tree, so we need some means ++ of specifying the sense of betweenness. */ ++typedef enum { ++ BEFORE_UNIT, /* Note: we/init_coord depends on this value being zero. */ ++ AT_UNIT, ++ AFTER_UNIT, ++ BEFORE_ITEM, ++ AFTER_ITEM, ++ INVALID_COORD, ++ EMPTY_NODE, ++} between_enum; ++ ++/* location of coord w.r.t. its node */ ++typedef enum { ++ COORD_ON_THE_LEFT = -1, ++ COORD_ON_THE_RIGHT = +1, ++ COORD_INSIDE = 0 ++} coord_wrt_node; ++ ++typedef enum { ++ COORD_CMP_SAME = 0, COORD_CMP_ON_LEFT = -1, COORD_CMP_ON_RIGHT = +1 ++} coord_cmp; ++ ++struct coord { ++ /* node in a tree */ ++ /* 0 */ znode *node; ++ ++ /* position of item within node */ ++ /* 4 */ pos_in_node_t item_pos; ++ /* position of unit within item */ ++ /* 6 */ pos_in_node_t unit_pos; ++ /* optimization: plugin of item is stored in coord_t. Until this was ++ implemented, item_plugin_by_coord() was major CPU consumer. ->iplugid ++ is invalidated (set to 0xff) on each modification of ->item_pos, ++ and all such modifications are funneled through coord_*_item_pos() ++ functions below. ++ */ ++ /* 8 */ char iplugid; ++ /* position of coord w.r.t. to neighboring items and/or units. ++ Values are taken from &between_enum above. ++ */ ++ /* 9 */ char between; ++ /* padding. It will be added by the compiler anyway to conform to the ++ * C language alignment requirements. We keep it here to be on the ++ * safe side and to have a clear picture of the memory layout of this ++ * structure. */ ++ /* 10 */ __u16 pad; ++ /* 12 */ int offset; ++#if REISER4_DEBUG ++ unsigned long plug_v; ++ unsigned long body_v; ++#endif ++}; ++ ++#define INVALID_PLUGID ((char)((1 << 8) - 1)) ++#define INVALID_OFFSET -1 ++ ++static inline void coord_clear_iplug(coord_t * coord) ++{ ++ assert("nikita-2835", coord != NULL); ++ coord->iplugid = INVALID_PLUGID; ++ coord->offset = INVALID_OFFSET; ++} ++ ++static inline int coord_is_iplug_set(const coord_t * coord) ++{ ++ assert("nikita-2836", coord != NULL); ++ return coord->iplugid != INVALID_PLUGID; ++} ++ ++static inline void coord_set_item_pos(coord_t * coord, pos_in_node_t pos) ++{ ++ assert("nikita-2478", coord != NULL); ++ coord->item_pos = pos; ++ coord_clear_iplug(coord); ++} ++ ++static inline void coord_dec_item_pos(coord_t * coord) ++{ ++ assert("nikita-2480", coord != NULL); ++ --coord->item_pos; ++ coord_clear_iplug(coord); ++} ++ ++static inline void coord_inc_item_pos(coord_t * coord) ++{ ++ assert("nikita-2481", coord != NULL); ++ ++coord->item_pos; ++ coord_clear_iplug(coord); ++} ++ ++static inline void coord_add_item_pos(coord_t * coord, int delta) ++{ ++ assert("nikita-2482", coord != NULL); ++ coord->item_pos += delta; ++ coord_clear_iplug(coord); ++} ++ ++static inline void coord_invalid_item_pos(coord_t * coord) ++{ ++ assert("nikita-2832", coord != NULL); ++ coord->item_pos = (unsigned short)~0; ++ coord_clear_iplug(coord); ++} ++ ++/* Reverse a direction. */ ++static inline sideof sideof_reverse(sideof side) ++{ ++ return side == LEFT_SIDE ? RIGHT_SIDE : LEFT_SIDE; ++} ++ ++/* NOTE: There is a somewhat odd mixture of the following opposed terms: ++ ++ "first" and "last" ++ "next" and "prev" ++ "before" and "after" ++ "leftmost" and "rightmost" ++ ++ But I think the chosen names are decent the way they are. ++*/ ++ ++/* COORD INITIALIZERS */ ++ ++/* Initialize an invalid coordinate. */ ++extern void coord_init_invalid(coord_t * coord, const znode * node); ++ ++extern void coord_init_first_unit_nocheck(coord_t * coord, const znode * node); ++ ++/* Initialize a coordinate to point at the first unit of the first item. If the node is ++ empty, it is positioned at the EMPTY_NODE. */ ++extern void coord_init_first_unit(coord_t * coord, const znode * node); ++ ++/* Initialize a coordinate to point at the last unit of the last item. If the node is ++ empty, it is positioned at the EMPTY_NODE. */ ++extern void coord_init_last_unit(coord_t * coord, const znode * node); ++ ++/* Initialize a coordinate to before the first item. If the node is empty, it is ++ positioned at the EMPTY_NODE. */ ++extern void coord_init_before_first_item(coord_t * coord, const znode * node); ++ ++/* Initialize a coordinate to after the last item. If the node is empty, it is positioned ++ at the EMPTY_NODE. */ ++extern void coord_init_after_last_item(coord_t * coord, const znode * node); ++ ++/* Initialize a coordinate to after last unit in the item. Coord must be set ++ already to existing item */ ++void coord_init_after_item_end(coord_t * coord); ++ ++/* Initialize a coordinate to before the item. Coord must be set already to existing item */ ++void coord_init_before_item(coord_t *); ++/* Initialize a coordinate to after the item. Coord must be set already to existing item */ ++void coord_init_after_item(coord_t *); ++ ++/* Calls either coord_init_first_unit or coord_init_last_unit depending on sideof argument. */ ++extern void coord_init_sideof_unit(coord_t * coord, const znode * node, ++ sideof dir); ++ ++/* Initialize a coordinate by 0s. Used in places where init_coord was used and ++ it was not clear how actually ++ FIXME-VS: added by vs (2002, june, 8) */ ++extern void coord_init_zero(coord_t * coord); ++ ++/* COORD METHODS */ ++ ++/* after shifting of node content, coord previously set properly may become ++ invalid, try to "normalize" it. */ ++void coord_normalize(coord_t * coord); ++ ++/* Copy a coordinate. */ ++extern void coord_dup(coord_t * coord, const coord_t * old_coord); ++ ++/* Copy a coordinate without check. */ ++void coord_dup_nocheck(coord_t * coord, const coord_t * old_coord); ++ ++unsigned coord_num_units(const coord_t * coord); ++ ++/* Return the last valid unit number at the present item (i.e., ++ coord_num_units() - 1). */ ++static inline unsigned coord_last_unit_pos(const coord_t * coord) ++{ ++ return coord_num_units(coord) - 1; ++} ++ ++#if REISER4_DEBUG ++/* For assertions only, checks for a valid coordinate. */ ++extern int coord_check(const coord_t * coord); ++ ++extern unsigned long znode_times_locked(const znode * z); ++ ++static inline void coord_update_v(coord_t * coord) ++{ ++ coord->plug_v = coord->body_v = znode_times_locked(coord->node); ++} ++#endif ++ ++extern int coords_equal(const coord_t * c1, const coord_t * c2); ++ ++extern void print_coord(const char *mes, const coord_t * coord, int print_node); ++ ++/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if coord_is_after_leftmost ++ return NCOORD_ON_THE_LEFT, otherwise return NCOORD_INSIDE. */ ++extern coord_wrt_node coord_wrt(const coord_t * coord); ++ ++/* Returns true if the coordinates are positioned at adjacent units, regardless of ++ before-after or item boundaries. */ ++extern int coord_are_neighbors(coord_t * c1, coord_t * c2); ++ ++/* Assuming two coordinates are positioned in the same node, return NCOORD_CMP_ON_RIGHT, ++ NCOORD_CMP_ON_LEFT, or NCOORD_CMP_SAME depending on c1's position relative to c2. */ ++extern coord_cmp coord_compare(coord_t * c1, coord_t * c2); ++ ++/* COORD PREDICATES */ ++ ++/* Returns true if the coord was initializewd by coord_init_invalid (). */ ++extern int coord_is_invalid(const coord_t * coord); ++ ++/* Returns true if the coordinate is positioned at an existing item, not before or after ++ an item. It may be placed at, before, or after any unit within the item, whether ++ existing or not. If this is true you can call methods of the item plugin. */ ++extern int coord_is_existing_item(const coord_t * coord); ++ ++/* Returns true if the coordinate is positioned after a item, before a item, after the ++ last unit of an item, before the first unit of an item, or at an empty node. */ ++extern int coord_is_between_items(const coord_t * coord); ++ ++/* Returns true if the coordinate is positioned at an existing unit, not before or after a ++ unit. */ ++extern int coord_is_existing_unit(const coord_t * coord); ++ ++/* Returns true if the coordinate is positioned at an empty node. */ ++extern int coord_is_empty(const coord_t * coord); ++ ++/* Returns true if the coordinate is positioned at the first unit of the first item. Not ++ true for empty nodes nor coordinates positioned before the first item. */ ++extern int coord_is_leftmost_unit(const coord_t * coord); ++ ++/* Returns true if the coordinate is positioned after the last item or after the last unit ++ of the last item or it is an empty node. */ ++extern int coord_is_after_rightmost(const coord_t * coord); ++ ++/* Returns true if the coordinate is positioned before the first item or it is an empty ++ node. */ ++extern int coord_is_before_leftmost(const coord_t * coord); ++ ++/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending on sideof ++ argument. */ ++extern int coord_is_after_sideof_unit(coord_t * coord, sideof dir); ++ ++/* COORD MODIFIERS */ ++ ++/* Advances the coordinate by one unit to the right. If empty, no change. If ++ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is ++ an existing unit. */ ++extern int coord_next_unit(coord_t * coord); ++ ++/* Advances the coordinate by one item to the right. If empty, no change. If ++ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is ++ an existing item. */ ++extern int coord_next_item(coord_t * coord); ++ ++/* Advances the coordinate by one unit to the left. If empty, no change. If ++ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position ++ is an existing unit. */ ++extern int coord_prev_unit(coord_t * coord); ++ ++/* Advances the coordinate by one item to the left. If empty, no change. If ++ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position ++ is an existing item. */ ++extern int coord_prev_item(coord_t * coord); ++ ++/* If the coordinate is between items, shifts it to the right. Returns 0 on success and ++ non-zero if there is no position to the right. */ ++extern int coord_set_to_right(coord_t * coord); ++ ++/* If the coordinate is between items, shifts it to the left. Returns 0 on success and ++ non-zero if there is no position to the left. */ ++extern int coord_set_to_left(coord_t * coord); ++ ++/* If the coordinate is at an existing unit, set to after that unit. Returns 0 on success ++ and non-zero if the unit did not exist. */ ++extern int coord_set_after_unit(coord_t * coord); ++ ++/* Calls either coord_next_unit or coord_prev_unit depending on sideof argument. */ ++extern int coord_sideof_unit(coord_t * coord, sideof dir); ++ ++/* iterate over all units in @node */ ++#define for_all_units( coord, node ) \ ++ for( coord_init_before_first_item( ( coord ), ( node ) ) ; \ ++ coord_next_unit( coord ) == 0 ; ) ++ ++/* iterate over all items in @node */ ++#define for_all_items( coord, node ) \ ++ for( coord_init_before_first_item( ( coord ), ( node ) ) ; \ ++ coord_next_item( coord ) == 0 ; ) ++ ++/* COORD/ITEM METHODS */ ++ ++extern int item_utmost_child_real_block(const coord_t * coord, sideof side, ++ reiser4_block_nr * blk); ++extern int item_utmost_child(const coord_t * coord, sideof side, ++ jnode ** child); ++ ++/* a flow is a sequence of bytes being written to or read from the tree. The ++ tree will slice the flow into items while storing it into nodes, but all of ++ that is hidden from anything outside the tree. */ ++ ++struct flow { ++ reiser4_key key; /* key of start of flow's sequence of bytes */ ++ loff_t length; /* length of flow's sequence of bytes */ ++ char *data; /* start of flow's sequence of bytes */ ++ int user; /* if 1 data is user space, 0 - kernel space */ ++ rw_op op; /* NIKITA-FIXME-HANS: comment is where? */ ++}; ++ ++void move_flow_forward(flow_t * f, unsigned count); ++ ++/* &reiser4_item_data - description of data to be inserted or pasted ++ ++ Q: articulate the reasons for the difference between this and flow. ++ ++ A: Becides flow we insert into tree other things: stat data, directory ++ entry, etc. To insert them into tree one has to provide this structure. If ++ one is going to insert flow - he can use insert_flow, where this structure ++ does not have to be created ++*/ ++struct reiser4_item_data { ++ /* actual data to be inserted. If NULL, ->create_item() will not ++ do xmemcpy itself, leaving this up to the caller. This can ++ save some amount of unnecessary memory copying, for example, ++ during insertion of stat data. ++ ++ */ ++ char *data; ++ /* 1 if 'char * data' contains pointer to user space and 0 if it is ++ kernel space */ ++ int user; ++ /* amount of data we are going to insert or paste */ ++ int length; ++ /* "Arg" is opaque data that is passed down to the ++ ->create_item() method of node layout, which in turn ++ hands it to the ->create_hook() of item being created. This ++ arg is currently used by: ++ ++ . ->create_hook() of internal item ++ (fs/reiser4/plugin/item/internal.c:internal_create_hook()), ++ . ->paste() method of directory item. ++ . ->create_hook() of extent item ++ ++ For internal item, this is left "brother" of new node being ++ inserted and it is used to add new node into sibling list ++ after parent to it was just inserted into parent. ++ ++ While ->arg does look somewhat of unnecessary compication, ++ it actually saves a lot of headache in many places, because ++ all data necessary to insert or paste new data into tree are ++ collected in one place, and this eliminates a lot of extra ++ argument passing and storing everywhere. ++ ++ */ ++ void *arg; ++ /* plugin of item we are inserting */ ++ item_plugin *iplug; ++}; ++ ++/* __REISER4_COORD_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/debug.c b/fs/reiser4/debug.c +new file mode 100644 +index 0000000..3c55fe8 +--- /dev/null ++++ b/fs/reiser4/debug.c +@@ -0,0 +1,308 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Debugging facilities. */ ++ ++/* ++ * This file contains generic debugging functions used by reiser4. Roughly ++ * following: ++ * ++ * panicking: reiser4_do_panic(), reiser4_print_prefix(). ++ * ++ * locking: ++ * reiser4_schedulable(), reiser4_lock_counters(), print_lock_counters(), ++ * reiser4_no_counters_are_held(), reiser4_commit_check_locks() ++ * ++ * error code monitoring (see comment before RETERR macro): ++ * reiser4_return_err(), reiser4_report_err(). ++ * ++ * stack back-tracing: fill_backtrace() ++ * ++ * miscellaneous: reiser4_preempt_point(), call_on_each_assert(), ++ * reiser4_debugtrap(). ++ * ++ */ ++ ++#include "reiser4.h" ++#include "context.h" ++#include "super.h" ++#include "txnmgr.h" ++#include "znode.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if 0 ++#if REISER4_DEBUG ++static void reiser4_report_err(void); ++#else ++#define reiser4_report_err() noop ++#endif ++#endif /* 0 */ ++ ++/* ++ * global buffer where message given to reiser4_panic is formatted. ++ */ ++static char panic_buf[REISER4_PANIC_MSG_BUFFER_SIZE]; ++ ++/* ++ * lock protecting consistency of panic_buf under concurrent panics ++ */ ++static DEFINE_SPINLOCK(panic_guard); ++ ++/* Your best friend. Call it on each occasion. This is called by ++ fs/reiser4/debug.h:reiser4_panic(). */ ++void reiser4_do_panic(const char *format /* format string */ , ... /* rest */ ) ++{ ++ static int in_panic = 0; ++ va_list args; ++ ++ /* ++ * check for recursive panic. ++ */ ++ if (in_panic == 0) { ++ in_panic = 1; ++ ++ spin_lock(&panic_guard); ++ va_start(args, format); ++ vsnprintf(panic_buf, sizeof(panic_buf), format, args); ++ va_end(args); ++ printk(KERN_EMERG "reiser4 panicked cowardly: %s", panic_buf); ++ spin_unlock(&panic_guard); ++ ++ /* ++ * if kernel debugger is configured---drop in. Early dropping ++ * into kgdb is not always convenient, because panic message ++ * is not yet printed most of the times. But: ++ * ++ * (1) message can be extracted from printk_buf[] ++ * (declared static inside of printk()), and ++ * ++ * (2) sometimes serial/kgdb combo dies while printing ++ * long panic message, so it's more prudent to break into ++ * debugger earlier. ++ * ++ */ ++ DEBUGON(1); ++ } ++ /* to make gcc happy about noreturn attribute */ ++ panic("%s", panic_buf); ++} ++ ++#if 0 ++void ++reiser4_print_prefix(const char *level, int reperr, const char *mid, ++ const char *function, const char *file, int lineno) ++{ ++ const char *comm; ++ int pid; ++ ++ if (unlikely(in_interrupt() || in_irq())) { ++ comm = "interrupt"; ++ pid = 0; ++ } else { ++ comm = current->comm; ++ pid = current->pid; ++ } ++ printk("%sreiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n", ++ level, comm, pid, function, file, lineno, mid); ++ if (reperr) ++ reiser4_report_err(); ++} ++#endif /* 0 */ ++ ++/* Preemption point: this should be called periodically during long running ++ operations (carry, allocate, and squeeze are best examples) */ ++int reiser4_preempt_point(void) ++{ ++ assert("nikita-3008", reiser4_schedulable()); ++ cond_resched(); ++ return signal_pending(current); ++} ++ ++#if REISER4_DEBUG ++/* Debugging aid: return struct where information about locks taken by current ++ thread is accumulated. This can be used to formulate lock ordering ++ constraints and various assertions. ++ ++*/ ++reiser4_lock_counters_info *reiser4_lock_counters(void) ++{ ++ reiser4_context *ctx = get_current_context(); ++ assert("jmacd-1123", ctx != NULL); ++ return &ctx->locks; ++} ++ ++/* ++ * print human readable information about locks held by the reiser4 context. ++ */ ++static void print_lock_counters(const char *prefix, ++ const reiser4_lock_counters_info * info) ++{ ++ printk("%s: jnode: %i, tree: %i (r:%i,w:%i), dk: %i (r:%i,w:%i)\n" ++ "jload: %i, " ++ "txnh: %i, atom: %i, stack: %i, txnmgr: %i, " ++ "ktxnmgrd: %i, fq: %i\n" ++ "inode: %i, " ++ "cbk_cache: %i (r:%i,w%i), " ++ "eflush: %i, " ++ "zlock: %i,\n" ++ "spin: %i, long: %i inode_sem: (r:%i,w:%i)\n" ++ "d: %i, x: %i, t: %i\n", prefix, ++ info->spin_locked_jnode, ++ info->rw_locked_tree, info->read_locked_tree, ++ info->write_locked_tree, ++ info->rw_locked_dk, info->read_locked_dk, info->write_locked_dk, ++ info->spin_locked_jload, ++ info->spin_locked_txnh, ++ info->spin_locked_atom, info->spin_locked_stack, ++ info->spin_locked_txnmgr, info->spin_locked_ktxnmgrd, ++ info->spin_locked_fq, ++ info->spin_locked_inode, ++ info->rw_locked_cbk_cache, ++ info->read_locked_cbk_cache, ++ info->write_locked_cbk_cache, ++ info->spin_locked_super_eflush, ++ info->spin_locked_zlock, ++ info->spin_locked, ++ info->long_term_locked_znode, ++ info->inode_sem_r, info->inode_sem_w, ++ info->d_refs, info->x_refs, info->t_refs); ++} ++ ++/* check that no spinlocks are held */ ++int reiser4_schedulable(void) ++{ ++ if (get_current_context_check() != NULL) { ++ if (!LOCK_CNT_NIL(spin_locked)) { ++ print_lock_counters("in atomic", reiser4_lock_counters()); ++ return 0; ++ } ++ } ++ might_sleep(); ++ return 1; ++} ++/* ++ * return true, iff no locks are held. ++ */ ++int reiser4_no_counters_are_held(void) ++{ ++ reiser4_lock_counters_info *counters; ++ ++ counters = reiser4_lock_counters(); ++ return ++ (counters->spin_locked_zlock == 0) && ++ (counters->spin_locked_jnode == 0) && ++ (counters->rw_locked_tree == 0) && ++ (counters->read_locked_tree == 0) && ++ (counters->write_locked_tree == 0) && ++ (counters->rw_locked_dk == 0) && ++ (counters->read_locked_dk == 0) && ++ (counters->write_locked_dk == 0) && ++ (counters->spin_locked_txnh == 0) && ++ (counters->spin_locked_atom == 0) && ++ (counters->spin_locked_stack == 0) && ++ (counters->spin_locked_txnmgr == 0) && ++ (counters->spin_locked_inode == 0) && ++ (counters->spin_locked == 0) && ++ (counters->long_term_locked_znode == 0) && ++ (counters->inode_sem_r == 0) && ++ (counters->inode_sem_w == 0) && (counters->d_refs == 0); ++} ++ ++/* ++ * return true, iff transaction commit can be done under locks held by the ++ * current thread. ++ */ ++int reiser4_commit_check_locks(void) ++{ ++ reiser4_lock_counters_info *counters; ++ int inode_sem_r; ++ int inode_sem_w; ++ int result; ++ ++ /* ++ * inode's read/write semaphore is the only reiser4 lock that can be ++ * held during commit. ++ */ ++ ++ counters = reiser4_lock_counters(); ++ inode_sem_r = counters->inode_sem_r; ++ inode_sem_w = counters->inode_sem_w; ++ ++ counters->inode_sem_r = counters->inode_sem_w = 0; ++ result = reiser4_no_counters_are_held(); ++ counters->inode_sem_r = inode_sem_r; ++ counters->inode_sem_w = inode_sem_w; ++ return result; ++} ++ ++/* ++ * fill "error site" in the current reiser4 context. See comment before RETERR ++ * macro for more details. ++ */ ++void reiser4_return_err(int code, const char *file, int line) ++{ ++ if (code < 0 && is_in_reiser4_context()) { ++ reiser4_context *ctx = get_current_context(); ++ ++ if (ctx != NULL) { ++ ctx->err.code = code; ++ ctx->err.file = file; ++ ctx->err.line = line; ++ } ++ } ++} ++ ++#if 0 ++/* ++ * report error information recorder by reiser4_return_err(). ++ */ ++static void reiser4_report_err(void) ++{ ++ reiser4_context *ctx = get_current_context_check(); ++ ++ if (ctx != NULL) { ++ if (ctx->err.code != 0) { ++ printk("code: %i at %s:%i\n", ++ ctx->err.code, ctx->err.file, ctx->err.line); ++ } ++ } ++} ++#endif /* 0 */ ++ ++#endif /* REISER4_DEBUG */ ++ ++#if KERNEL_DEBUGGER ++ ++/* ++ * this functions just drops into kernel debugger. It is a convenient place to ++ * put breakpoint in. ++ */ ++void reiser4_debugtrap(void) ++{ ++ /* do nothing. Put break point here. */ ++#if defined(CONFIG_KGDB) && !defined(CONFIG_REISER4_FS_MODULE) ++ extern void breakpoint(void); ++ breakpoint(); ++#endif ++} ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/debug.h b/fs/reiser4/debug.h +new file mode 100644 +index 0000000..68e7f31 +--- /dev/null ++++ b/fs/reiser4/debug.h +@@ -0,0 +1,350 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Declarations of debug macros. */ ++ ++#if !defined( __FS_REISER4_DEBUG_H__ ) ++#define __FS_REISER4_DEBUG_H__ ++ ++#include "forward.h" ++#include "reiser4.h" ++ ++/* generic function to produce formatted output, decorating it with ++ whatever standard prefixes/postfixes we want. "Fun" is a function ++ that will be actually called, can be printk, panic etc. ++ This is for use by other debugging macros, not by users. */ ++#define DCALL(lev, fun, reperr, label, format, ...) \ ++({ \ ++ fun(lev "reiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n" format "\n" , \ ++ current->comm, current->pid, __FUNCTION__, \ ++ __FILE__, __LINE__, label, ## __VA_ARGS__); \ ++}) ++ ++/* ++ * cause kernel to crash ++ */ ++#define reiser4_panic(mid, format, ...) \ ++ DCALL("", reiser4_do_panic, 1, mid, format , ## __VA_ARGS__) ++ ++/* print message with indication of current process, file, line and ++ function */ ++#define reiser4_log(label, format, ...) \ ++ DCALL(KERN_DEBUG, printk, 0, label, format , ## __VA_ARGS__) ++ ++/* Assertion checked during compilation. ++ If "cond" is false (0) we get duplicate case label in switch. ++ Use this to check something like famous ++ cassert (sizeof(struct reiserfs_journal_commit) == 4096) ; ++ in 3.x journal.c. If cassertion fails you get compiler error, ++ so no "maintainer-id". ++*/ ++#define cassert(cond) ({ switch(-1) { case (cond): case 0: break; } }) ++ ++#define noop do {;} while(0) ++ ++#if REISER4_DEBUG ++/* version of info that only actually prints anything when _d_ebugging ++ is on */ ++#define dinfo(format, ...) printk(format , ## __VA_ARGS__) ++/* macro to catch logical errors. Put it into `default' clause of ++ switch() statement. */ ++#define impossible(label, format, ...) \ ++ reiser4_panic(label, "impossible: " format , ## __VA_ARGS__) ++/* assert assures that @cond is true. If it is not, reiser4_panic() is ++ called. Use this for checking logical consistency and _never_ call ++ this to check correctness of external data: disk blocks and user-input . */ ++#define assert(label, cond) \ ++({ \ ++ /* call_on_each_assert(); */ \ ++ if (cond) { \ ++ /* put negated check to avoid using !(cond) that would lose \ ++ * warnings for things like assert(a = b); */ \ ++ ; \ ++ } else { \ ++ DEBUGON(1); \ ++ reiser4_panic(label, "assertion failed: %s", #cond); \ ++ } \ ++}) ++ ++/* like assertion, but @expr is evaluated even if REISER4_DEBUG is off. */ ++#define check_me( label, expr ) assert( label, ( expr ) ) ++ ++#define ON_DEBUG( exp ) exp ++ ++extern int reiser4_schedulable(void); ++extern void call_on_each_assert(void); ++ ++#else ++ ++#define dinfo( format, args... ) noop ++#define impossible( label, format, args... ) noop ++#define assert( label, cond ) noop ++#define check_me( label, expr ) ( ( void ) ( expr ) ) ++#define ON_DEBUG( exp ) ++#define reiser4_schedulable() might_sleep() ++ ++/* REISER4_DEBUG */ ++#endif ++ ++#if REISER4_DEBUG ++/* per-thread information about lock acquired by this thread. Used by lock ++ * ordering checking in spin_macros.h */ ++typedef struct reiser4_lock_counters_info { ++ int rw_locked_tree; ++ int read_locked_tree; ++ int write_locked_tree; ++ ++ int rw_locked_dk; ++ int read_locked_dk; ++ int write_locked_dk; ++ ++ int rw_locked_cbk_cache; ++ int read_locked_cbk_cache; ++ int write_locked_cbk_cache; ++ ++ int spin_locked_zlock; ++ int spin_locked_jnode; ++ int spin_locked_jload; ++ int spin_locked_txnh; ++ int spin_locked_atom; ++ int spin_locked_stack; ++ int spin_locked_txnmgr; ++ int spin_locked_ktxnmgrd; ++ int spin_locked_fq; ++ int spin_locked_inode; ++ int spin_locked_super_eflush; ++ int spin_locked; ++ int long_term_locked_znode; ++ ++ int inode_sem_r; ++ int inode_sem_w; ++ ++ int d_refs; ++ int x_refs; ++ int t_refs; ++} reiser4_lock_counters_info; ++ ++extern reiser4_lock_counters_info *reiser4_lock_counters(void); ++#define IN_CONTEXT(a, b) (is_in_reiser4_context() ? (a) : (b)) ++ ++/* increment lock-counter @counter, if present */ ++#define LOCK_CNT_INC(counter) \ ++ IN_CONTEXT(++(reiser4_lock_counters()->counter), 0) ++ ++/* decrement lock-counter @counter, if present */ ++#define LOCK_CNT_DEC(counter) \ ++ IN_CONTEXT(--(reiser4_lock_counters()->counter), 0) ++ ++/* check that lock-counter is zero. This is for use in assertions */ ++#define LOCK_CNT_NIL(counter) \ ++ IN_CONTEXT(reiser4_lock_counters()->counter == 0, 1) ++ ++/* check that lock-counter is greater than zero. This is for use in ++ * assertions */ ++#define LOCK_CNT_GTZ(counter) \ ++ IN_CONTEXT(reiser4_lock_counters()->counter > 0, 1) ++#define LOCK_CNT_LT(counter,n) \ ++ IN_CONTEXT(reiser4_lock_counters()->counter < n, 1) ++ ++#else /* REISER4_DEBUG */ ++ ++/* no-op versions on the above */ ++ ++typedef struct reiser4_lock_counters_info { ++} reiser4_lock_counters_info; ++ ++#define reiser4_lock_counters() ((reiser4_lock_counters_info *)NULL) ++#define LOCK_CNT_INC(counter) noop ++#define LOCK_CNT_DEC(counter) noop ++#define LOCK_CNT_NIL(counter) (1) ++#define LOCK_CNT_GTZ(counter) (1) ++#define LOCK_CNT_LT(counter,n) (1) ++ ++#endif /* REISER4_DEBUG */ ++ ++#define assert_spin_not_locked(lock) BUG_ON(0) ++#define assert_rw_write_locked(lock) BUG_ON(0) ++#define assert_rw_read_locked(lock) BUG_ON(0) ++#define assert_rw_locked(lock) BUG_ON(0) ++#define assert_rw_not_write_locked(lock) BUG_ON(0) ++#define assert_rw_not_read_locked(lock) BUG_ON(0) ++#define assert_rw_not_locked(lock) BUG_ON(0) ++ ++/* flags controlling debugging behavior. Are set through debug_flags=N mount ++ option. */ ++typedef enum { ++ /* print a lot of information during panic. When this is on all jnodes ++ * are listed. This can be *very* large output. Usually you don't want ++ * this. Especially over serial line. */ ++ REISER4_VERBOSE_PANIC = 0x00000001, ++ /* print a lot of information during umount */ ++ REISER4_VERBOSE_UMOUNT = 0x00000002, ++ /* print gathered statistics on umount */ ++ REISER4_STATS_ON_UMOUNT = 0x00000004, ++ /* check node consistency */ ++ REISER4_CHECK_NODE = 0x00000008 ++} reiser4_debug_flags; ++ ++extern int is_in_reiser4_context(void); ++ ++/* ++ * evaluate expression @e only if with reiser4 context ++ */ ++#define ON_CONTEXT(e) do { \ ++ if(is_in_reiser4_context()) { \ ++ e; \ ++ } } while(0) ++ ++/* ++ * evaluate expression @e only when within reiser4_context and debugging is ++ * on. ++ */ ++#define ON_DEBUG_CONTEXT( e ) ON_DEBUG( ON_CONTEXT( e ) ) ++ ++/* ++ * complain about unexpected function result and crash. Used in "default" ++ * branches of switch statements and alike to assert that invalid results are ++ * not silently ignored. ++ */ ++#define wrong_return_value( label, function ) \ ++ impossible( label, "wrong return value from " function ) ++ ++/* Issue different types of reiser4 messages to the console */ ++#define warning( label, format, ... ) \ ++ DCALL( KERN_WARNING, \ ++ printk, 1, label, "WARNING: " format , ## __VA_ARGS__ ) ++#define notice( label, format, ... ) \ ++ DCALL( KERN_NOTICE, \ ++ printk, 1, label, "NOTICE: " format , ## __VA_ARGS__ ) ++ ++/* mark not yet implemented functionality */ ++#define not_yet( label, format, ... ) \ ++ reiser4_panic( label, "NOT YET IMPLEMENTED: " format , ## __VA_ARGS__ ) ++ ++extern void reiser4_do_panic(const char *format, ...) ++ __attribute__ ((noreturn, format(printf, 1, 2))); ++ ++extern int reiser4_preempt_point(void); ++extern void reiser4_print_stats(void); ++ ++#if REISER4_DEBUG ++extern int reiser4_no_counters_are_held(void); ++extern int reiser4_commit_check_locks(void); ++#else ++#define reiser4_no_counters_are_held() (1) ++#define reiser4_commit_check_locks() (1) ++#endif ++ ++/* true if @i is power-of-two. Useful for rate-limited warnings, etc. */ ++#define IS_POW(i) \ ++({ \ ++ typeof(i) __i; \ ++ \ ++ __i = (i); \ ++ !(__i & (__i - 1)); \ ++}) ++ ++#define KERNEL_DEBUGGER (1) ++ ++#if KERNEL_DEBUGGER ++ ++extern void reiser4_debugtrap(void); ++ ++/* ++ * Check condition @cond and drop into kernel debugger (kgdb) if it's true. If ++ * kgdb is not compiled in, do nothing. ++ */ ++#define DEBUGON(cond) \ ++({ \ ++ if (unlikely(cond)) \ ++ reiser4_debugtrap(); \ ++}) ++#else ++#define DEBUGON(cond) noop ++#endif ++ ++/* ++ * Error code tracing facility. (Idea is borrowed from XFS code.) ++ * ++ * Suppose some strange and/or unexpected code is returned from some function ++ * (for example, write(2) returns -EEXIST). It is possible to place a ++ * breakpoint in the reiser4_write(), but it is too late here. How to find out ++ * in what particular place -EEXIST was generated first? ++ * ++ * In reiser4 all places where actual error codes are produced (that is, ++ * statements of the form ++ * ++ * return -EFOO; // (1), or ++ * ++ * result = -EFOO; // (2) ++ * ++ * are replaced with ++ * ++ * return RETERR(-EFOO); // (1a), and ++ * ++ * result = RETERR(-EFOO); // (2a) respectively ++ * ++ * RETERR() macro fills a backtrace in reiser4_context. This back-trace is ++ * printed in error and warning messages. Moreover, it's possible to put a ++ * conditional breakpoint in reiser4_return_err (low-level function called ++ * by RETERR() to do the actual work) to break into debugger immediately ++ * when particular error happens. ++ * ++ */ ++ ++#if REISER4_DEBUG ++ ++/* ++ * data-type to store information about where error happened ("error site"). ++ */ ++typedef struct err_site { ++ int code; /* error code */ ++ const char *file; /* source file, filled by __FILE__ */ ++ int line; /* source file line, filled by __LINE__ */ ++} err_site; ++ ++extern void reiser4_return_err(int code, const char *file, int line); ++ ++/* ++ * fill &get_current_context()->err_site with error information. ++ */ ++#define RETERR(code) \ ++({ \ ++ typeof(code) __code; \ ++ \ ++ __code = (code); \ ++ reiser4_return_err(__code, __FILE__, __LINE__); \ ++ __code; \ ++}) ++ ++#else ++ ++/* ++ * no-op versions of the above ++ */ ++ ++typedef struct err_site { ++} err_site; ++#define RETERR(code) code ++#endif ++ ++#if REISER4_LARGE_KEY ++/* ++ * conditionally compile arguments only if REISER4_LARGE_KEY is on. ++ */ ++#define ON_LARGE_KEY(...) __VA_ARGS__ ++#else ++#define ON_LARGE_KEY(...) ++#endif ++ ++/* __FS_REISER4_DEBUG_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/dformat.h b/fs/reiser4/dformat.h +new file mode 100644 +index 0000000..8bca29e +--- /dev/null ++++ b/fs/reiser4/dformat.h +@@ -0,0 +1,70 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Formats of on-disk data and conversion functions. */ ++ ++/* put all item formats in the files describing the particular items, ++ our model is, everything you need to do to add an item to reiser4, ++ (excepting the changes to the plugin that uses the item which go ++ into the file defining that plugin), you put into one file. */ ++/* Data on disk are stored in little-endian format. ++ To declare fields of on-disk structures, use d8, d16, d32 and d64. ++ d??tocpu() and cputod??() to convert. */ ++ ++#if !defined( __FS_REISER4_DFORMAT_H__ ) ++#define __FS_REISER4_DFORMAT_H__ ++ ++#include ++#include ++#include ++ ++typedef __u8 d8; ++typedef __le16 d16; ++typedef __le32 d32; ++typedef __le64 d64; ++ ++#define PACKED __attribute__((packed)) ++ ++/* data-type for block number */ ++typedef __u64 reiser4_block_nr; ++ ++/* data-type for block number on disk, disk format */ ++typedef __le64 reiser4_dblock_nr; ++ ++/** ++ * disk_addr_eq - compare disk addresses ++ * @b1: pointer to block number ot compare ++ * @b2: pointer to block number ot compare ++ * ++ * Returns true if if disk addresses are the same ++ */ ++static inline int disk_addr_eq(const reiser4_block_nr *b1, ++ const reiser4_block_nr * b2) ++{ ++ assert("nikita-1033", b1 != NULL); ++ assert("nikita-1266", b2 != NULL); ++ ++ return !memcmp(b1, b2, sizeof *b1); ++} ++ ++/* structure of master reiser4 super block */ ++typedef struct reiser4_master_sb { ++ char magic[16]; /* "ReIsEr4" */ ++ __le16 disk_plugin_id; /* id of disk layout plugin */ ++ __le16 blocksize; ++ char uuid[16]; /* unique id */ ++ char label[16]; /* filesystem label */ ++ __le64 diskmap; /* location of the diskmap. 0 if not present */ ++} reiser4_master_sb; ++ ++/* __FS_REISER4_DFORMAT_H__ */ ++#endif ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/dscale.c b/fs/reiser4/dscale.c +new file mode 100644 +index 0000000..a9bc224 +--- /dev/null ++++ b/fs/reiser4/dscale.c +@@ -0,0 +1,174 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Scalable on-disk integers */ ++ ++/* ++ * Various on-disk structures contain integer-like structures. Stat-data ++ * contain [yes, "data" is plural, check the dictionary] file size, link ++ * count; extent unit contains extent width etc. To accommodate for general ++ * case enough space is reserved to keep largest possible value. 64 bits in ++ * all cases above. But in overwhelming majority of cases numbers actually ++ * stored in these fields will be comparatively small and reserving 8 bytes is ++ * a waste of precious disk bandwidth. ++ * ++ * Scalable integers are one way to solve this problem. dscale_write() ++ * function stores __u64 value in the given area consuming from 1 to 9 bytes, ++ * depending on the magnitude of the value supplied. dscale_read() reads value ++ * previously stored by dscale_write(). ++ * ++ * dscale_write() produces format not completely unlike of UTF: two highest ++ * bits of the first byte are used to store "tag". One of 4 possible tag ++ * values is chosen depending on the number being encoded: ++ * ++ * 0 ... 0x3f => 0 [table 1] ++ * 0x40 ... 0x3fff => 1 ++ * 0x4000 ... 0x3fffffff => 2 ++ * 0x40000000 ... 0xffffffffffffffff => 3 ++ * ++ * (see dscale_range() function) ++ * ++ * Values in the range 0x40000000 ... 0xffffffffffffffff require 8 full bytes ++ * to be stored, so in this case there is no place in the first byte to store ++ * tag. For such values tag is stored in an extra 9th byte. ++ * ++ * As _highest_ bits are used for the test (which is natural) scaled integers ++ * are stored in BIG-ENDIAN format in contrast with the rest of reiser4 which ++ * uses LITTLE-ENDIAN. ++ * ++ */ ++ ++#include "debug.h" ++#include "dscale.h" ++ ++/* return tag of scaled integer stored at @address */ ++static int gettag(const unsigned char *address) ++{ ++ /* tag is stored in two highest bits */ ++ return (*address) >> 6; ++} ++ ++/* clear tag from value. Clear tag embedded into @value. */ ++static void cleartag(__u64 * value, int tag) ++{ ++ /* ++ * W-w-what ?! ++ * ++ * Actually, this is rather simple: @value passed here was read by ++ * dscale_read(), converted from BIG-ENDIAN, and padded to __u64 by ++ * zeroes. Tag is still stored in the highest (arithmetically) ++ * non-zero bits of @value, but relative position of tag within __u64 ++ * depends on @tag. ++ * ++ * For example if @tag is 0, it's stored 2 highest bits of lowest ++ * byte, and its offset (counting from lowest bit) is 8 - 2 == 6 bits. ++ * ++ * If tag is 1, it's stored in two highest bits of 2nd lowest byte, ++ * and it's offset if (2 * 8) - 2 == 14 bits. ++ * ++ * See table 1 above for details. ++ * ++ * All these cases are captured by the formula: ++ */ ++ *value &= ~(3 << (((1 << tag) << 3) - 2)); ++ /* ++ * That is, clear two (3 == 0t11) bits at the offset ++ * ++ * 8 * (2 ^ tag) - 2, ++ * ++ * that is, two highest bits of (2 ^ tag)-th byte of @value. ++ */ ++} ++ ++/* return tag for @value. See table 1 above for details. */ ++static int dscale_range(__u64 value) ++{ ++ if (value > 0x3fffffff) ++ return 3; ++ if (value > 0x3fff) ++ return 2; ++ if (value > 0x3f) ++ return 1; ++ return 0; ++} ++ ++/* restore value stored at @adderss by dscale_write() and return number of ++ * bytes consumed */ ++int dscale_read(unsigned char *address, __u64 * value) ++{ ++ int tag; ++ ++ /* read tag */ ++ tag = gettag(address); ++ switch (tag) { ++ case 3: ++ /* In this case tag is stored in an extra byte, skip this byte ++ * and decode value stored in the next 8 bytes.*/ ++ *value = __be64_to_cpu(get_unaligned((__be64 *)(address + 1))); ++ /* worst case: 8 bytes for value itself plus one byte for ++ * tag. */ ++ return 9; ++ case 0: ++ *value = get_unaligned(address); ++ break; ++ case 1: ++ *value = __be16_to_cpu(get_unaligned((__be16 *)address)); ++ break; ++ case 2: ++ *value = __be32_to_cpu(get_unaligned((__be32 *)address)); ++ break; ++ default: ++ return RETERR(-EIO); ++ } ++ /* clear tag embedded into @value */ ++ cleartag(value, tag); ++ /* number of bytes consumed is (2 ^ tag)---see table 1. */ ++ return 1 << tag; ++} ++ ++/* store @value at @address and return number of bytes consumed */ ++int dscale_write(unsigned char *address, __u64 value) ++{ ++ int tag; ++ int shift; ++ __be64 v; ++ unsigned char *valarr; ++ ++ tag = dscale_range(value); ++ v = __cpu_to_be64(value); ++ valarr = (unsigned char *)&v; ++ shift = (tag == 3) ? 1 : 0; ++ memcpy(address + shift, valarr + sizeof v - (1 << tag), 1 << tag); ++ *address |= (tag << 6); ++ return shift + (1 << tag); ++} ++ ++/* number of bytes required to store @value */ ++int dscale_bytes(__u64 value) ++{ ++ int bytes; ++ ++ bytes = 1 << dscale_range(value); ++ if (bytes == 8) ++ ++bytes; ++ return bytes; ++} ++ ++/* returns true if @value and @other require the same number of bytes to be ++ * stored. Used by detect when data structure (like stat-data) has to be ++ * expanded or contracted. */ ++int dscale_fit(__u64 value, __u64 other) ++{ ++ return dscale_range(value) == dscale_range(other); ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/dscale.h b/fs/reiser4/dscale.h +new file mode 100644 +index 0000000..545e111 +--- /dev/null ++++ b/fs/reiser4/dscale.h +@@ -0,0 +1,27 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Scalable on-disk integers. See dscale.h for details. */ ++ ++#if !defined( __FS_REISER4_DSCALE_H__ ) ++#define __FS_REISER4_DSCALE_H__ ++ ++#include "dformat.h" ++ ++extern int dscale_read(unsigned char *address, __u64 * value); ++extern int dscale_write(unsigned char *address, __u64 value); ++extern int dscale_bytes(__u64 value); ++extern int dscale_fit(__u64 value, __u64 other); ++ ++/* __FS_REISER4_DSCALE_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/entd.c b/fs/reiser4/entd.c +new file mode 100644 +index 0000000..1be9fff +--- /dev/null ++++ b/fs/reiser4/entd.c +@@ -0,0 +1,335 @@ ++/* Copyright 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Ent daemon. */ ++ ++#include "debug.h" ++#include "txnmgr.h" ++#include "tree.h" ++#include "entd.h" ++#include "super.h" ++#include "context.h" ++#include "reiser4.h" ++#include "vfs_ops.h" ++#include "page_cache.h" ++#include "inode.h" ++ ++#include /* struct task_struct */ ++#include ++#include ++#include ++#include /* INITIAL_JIFFIES */ ++#include /* bdi_write_congested */ ++#include ++#include ++#include ++ ++#define DEF_PRIORITY 12 ++#define MAX_ENTD_ITERS 10 ++ ++static void entd_flush(struct super_block *, struct wbq *); ++static int entd(void *arg); ++ ++/* ++ * set ->comm field of end thread to make its state visible to the user level ++ */ ++#define entd_set_comm(state) \ ++ snprintf(current->comm, sizeof(current->comm), \ ++ "ent:%s%s", super->s_id, (state)) ++ ++/** ++ * reiser4_init_entd - initialize entd context and start kernel daemon ++ * @super: super block to start ent thread for ++ * ++ * Creates entd contexts, starts kernel thread and waits until it ++ * initializes. ++ */ ++int reiser4_init_entd(struct super_block *super) ++{ ++ entd_context *ctx; ++ ++ assert("nikita-3104", super != NULL); ++ ++ ctx = get_entd_context(super); ++ ++ memset(ctx, 0, sizeof *ctx); ++ spin_lock_init(&ctx->guard); ++ init_waitqueue_head(&ctx->wait); ++#if REISER4_DEBUG ++ INIT_LIST_HEAD(&ctx->flushers_list); ++#endif ++ /* lists of writepage requests */ ++ INIT_LIST_HEAD(&ctx->todo_list); ++ INIT_LIST_HEAD(&ctx->done_list); ++ /* start entd */ ++ ctx->tsk = kthread_run(entd, super, "ent:%s", super->s_id); ++ if (IS_ERR(ctx->tsk)) ++ return PTR_ERR(ctx->tsk); ++ return 0; ++} ++ ++static void put_wbq(struct wbq *rq) ++{ ++ iput(rq->mapping->host); ++ complete(&rq->completion); ++} ++ ++/* ent should be locked */ ++static struct wbq *__get_wbq(entd_context * ent) ++{ ++ struct wbq *wbq; ++ ++ if (list_empty(&ent->todo_list)) ++ return NULL; ++ ++ ent->nr_todo_reqs --; ++ wbq = list_entry(ent->todo_list.next, struct wbq, link); ++ list_del_init(&wbq->link); ++ return wbq; ++} ++ ++/* ent thread function */ ++static int entd(void *arg) ++{ ++ struct super_block *super; ++ entd_context *ent; ++ int done = 0; ++ ++ super = arg; ++ /* do_fork() just copies task_struct into the new ++ thread. ->fs_context shouldn't be copied of course. This shouldn't ++ be a problem for the rest of the code though. ++ */ ++ current->journal_info = NULL; ++ ++ ent = get_entd_context(super); ++ ++ while (!done) { ++ try_to_freeze(); ++ ++ spin_lock(&ent->guard); ++ while (ent->nr_todo_reqs != 0) { ++ struct wbq *rq; ++ ++ assert("", list_empty(&ent->done_list)); ++ ++ /* take request from the queue head */ ++ rq = __get_wbq(ent); ++ assert("", rq != NULL); ++ ent->cur_request = rq; ++ spin_unlock(&ent->guard); ++ ++ entd_set_comm("!"); ++ entd_flush(super, rq); ++ ++ put_wbq(rq); ++ ++ /* ++ * wakeup all requestors and iput their inodes ++ */ ++ spin_lock(&ent->guard); ++ while (!list_empty(&ent->done_list)) { ++ rq = list_entry(ent->done_list.next, struct wbq, link); ++ list_del_init(&rq->link); ++ ent->nr_done_reqs --; ++ spin_unlock(&ent->guard); ++ assert("", rq->written == 1); ++ put_wbq(rq); ++ spin_lock(&ent->guard); ++ } ++ } ++ spin_unlock(&ent->guard); ++ ++ entd_set_comm("."); ++ ++ { ++ DEFINE_WAIT(__wait); ++ ++ do { ++ prepare_to_wait(&ent->wait, &__wait, TASK_INTERRUPTIBLE); ++ if (kthread_should_stop()) { ++ done = 1; ++ break; ++ } ++ if (ent->nr_todo_reqs != 0) ++ break; ++ schedule(); ++ } while (0); ++ finish_wait(&ent->wait, &__wait); ++ } ++ } ++ BUG_ON(ent->nr_todo_reqs != 0); ++ return 0; ++} ++ ++/** ++ * reiser4_done_entd - stop entd kernel thread ++ * @super: super block to stop ent thread for ++ * ++ * It is called on umount. Sends stop signal to entd and wait until it handles ++ * it. ++ */ ++void reiser4_done_entd(struct super_block *super) ++{ ++ entd_context *ent; ++ ++ assert("nikita-3103", super != NULL); ++ ++ ent = get_entd_context(super); ++ assert("zam-1055", ent->tsk != NULL); ++ kthread_stop(ent->tsk); ++} ++ ++/* called at the beginning of jnode_flush to register flusher thread with ent ++ * daemon */ ++void reiser4_enter_flush(struct super_block *super) ++{ ++ entd_context *ent; ++ ++ assert("zam-1029", super != NULL); ++ ent = get_entd_context(super); ++ ++ assert("zam-1030", ent != NULL); ++ ++ spin_lock(&ent->guard); ++ ent->flushers++; ++#if REISER4_DEBUG ++ list_add(&get_current_context()->flushers_link, &ent->flushers_list); ++#endif ++ spin_unlock(&ent->guard); ++} ++ ++/* called at the end of jnode_flush */ ++void reiser4_leave_flush(struct super_block *super) ++{ ++ entd_context *ent; ++ int wake_up_ent; ++ ++ assert("zam-1027", super != NULL); ++ ent = get_entd_context(super); ++ ++ assert("zam-1028", ent != NULL); ++ ++ spin_lock(&ent->guard); ++ ent->flushers--; ++ wake_up_ent = (ent->flushers == 0 && ent->nr_todo_reqs != 0); ++#if REISER4_DEBUG ++ list_del_init(&get_current_context()->flushers_link); ++#endif ++ spin_unlock(&ent->guard); ++ if (wake_up_ent) ++ wake_up(&ent->wait); ++} ++ ++#define ENTD_CAPTURE_APAGE_BURST SWAP_CLUSTER_MAX ++ ++static void entd_flush(struct super_block *super, struct wbq *rq) ++{ ++ reiser4_context ctx; ++ int tmp; ++ ++ init_stack_context(&ctx, super); ++ ctx.entd = 1; ++ ctx.gfp_mask = GFP_NOFS; ++ ++ rq->wbc->range_start = page_offset(rq->page); ++ rq->wbc->range_end = rq->wbc->range_start + ++ (ENTD_CAPTURE_APAGE_BURST << PAGE_CACHE_SHIFT); ++ tmp = rq->wbc->nr_to_write; ++ rq->mapping->a_ops->writepages(rq->mapping, rq->wbc); ++ ++ if (rq->wbc->nr_to_write > 0) { ++ rq->wbc->range_start = 0; ++ rq->wbc->range_end = 0; ++ generic_sync_sb_inodes(super, rq->wbc); ++ } ++ rq->wbc->nr_to_write = ENTD_CAPTURE_APAGE_BURST; ++ reiser4_writeout(super, rq->wbc); ++ ++ context_set_commit_async(&ctx); ++ reiser4_exit_context(&ctx); ++} ++ ++/** ++ * write_page_by_ent - ask entd thread to flush this page as part of slum ++ * @page: page to be written ++ * @wbc: writeback control passed to reiser4_writepage ++ * ++ * Creates a request, puts it on entd list of requests, wakeups entd if ++ * necessary, waits until entd completes with the request. ++ */ ++int write_page_by_ent(struct page *page, struct writeback_control *wbc) ++{ ++ struct super_block *sb; ++ struct inode *inode; ++ entd_context *ent; ++ struct wbq rq; ++ ++ assert("", PageLocked(page)); ++ assert("", page->mapping != NULL); ++ ++ sb = page->mapping->host->i_sb; ++ ent = get_entd_context(sb); ++ assert("", ent && ent->done == 0); ++ ++ /* ++ * we are going to unlock page and ask ent thread to write the ++ * page. Re-dirty page before unlocking so that if ent thread fails to ++ * write it - it will remain dirty ++ */ ++ reiser4_set_page_dirty_internal(page); ++ ++ /* ++ * pin inode in memory, unlock page, entd_flush will iput. We can not ++ * iput here becasue we can not allow delete_inode to be called here ++ */ ++ inode = igrab(page->mapping->host); ++ unlock_page(page); ++ if (inode == NULL) ++ /* inode is getting freed */ ++ return 0; ++ ++ /* init wbq */ ++ INIT_LIST_HEAD(&rq.link); ++ rq.magic = WBQ_MAGIC; ++ rq.wbc = wbc; ++ rq.page = page; ++ rq.mapping = inode->i_mapping; ++ rq.node = NULL; ++ rq.written = 0; ++ init_completion(&rq.completion); ++ ++ /* add request to entd's list of writepage requests */ ++ spin_lock(&ent->guard); ++ ent->nr_todo_reqs++; ++ list_add_tail(&rq.link, &ent->todo_list); ++ if (ent->nr_todo_reqs == 1) ++ wake_up(&ent->wait); ++ ++ spin_unlock(&ent->guard); ++ ++ /* wait until entd finishes */ ++ wait_for_completion(&rq.completion); ++ ++ if (rq.written) ++ /* Eventually ENTD has written the page to disk. */ ++ return 0; ++ return 0; ++} ++ ++int wbq_available(void) ++{ ++ struct super_block *sb = reiser4_get_current_sb(); ++ entd_context *ent = get_entd_context(sb); ++ return ent->nr_todo_reqs; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/entd.h b/fs/reiser4/entd.h +new file mode 100644 +index 0000000..4f79a57 +--- /dev/null ++++ b/fs/reiser4/entd.h +@@ -0,0 +1,90 @@ ++/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Ent daemon. */ ++ ++#ifndef __ENTD_H__ ++#define __ENTD_H__ ++ ++#include "context.h" ++ ++#include ++#include ++#include ++#include ++#include /* for struct task_struct */ ++ ++#define WBQ_MAGIC 0x7876dc76 ++ ++/* write-back request. */ ++struct wbq { ++ int magic; ++ struct list_head link; /* list head of this list is in entd context */ ++ struct writeback_control *wbc; ++ struct page *page; ++ struct address_space *mapping; ++ struct completion completion; ++ jnode *node; /* set if ent thread captured requested page */ ++ int written; /* set if ent thread wrote requested page */ ++}; ++ ++/* ent-thread context. This is used to synchronize starting/stopping ent ++ * threads. */ ++typedef struct entd_context { ++ /* wait queue that ent thread waits on for more work. It's ++ * signaled by write_page_by_ent(). */ ++ wait_queue_head_t wait; ++ /* spinlock protecting other fields */ ++ spinlock_t guard; ++ /* ent thread */ ++ struct task_struct *tsk; ++ /* set to indicate that ent thread should leave. */ ++ int done; ++ /* counter of active flushers */ ++ int flushers; ++ /* ++ * when reiser4_writepage asks entd to write a page - it adds struct ++ * wbq to this list ++ */ ++ struct list_head todo_list; ++ /* number of elements on the above list */ ++ int nr_todo_reqs; ++ ++ struct wbq *cur_request; ++ /* ++ * when entd writes a page it moves write-back request from todo_list ++ * to done_list. This list is used at the end of entd iteration to ++ * wakeup requestors and iput inodes. ++ */ ++ struct list_head done_list; ++ /* number of elements on the above list */ ++ int nr_done_reqs; ++ ++#if REISER4_DEBUG ++ /* list of all active flushers */ ++ struct list_head flushers_list; ++#endif ++} entd_context; ++ ++extern int reiser4_init_entd(struct super_block *); ++extern void reiser4_done_entd(struct super_block *); ++ ++extern void reiser4_enter_flush(struct super_block *); ++extern void reiser4_leave_flush(struct super_block *); ++ ++extern int write_page_by_ent(struct page *, struct writeback_control *); ++extern int wbq_available(void); ++extern void ent_writes_page(struct super_block *, struct page *); ++ ++extern jnode *get_jnode_by_wbq(struct super_block *, struct wbq *); ++/* __ENTD_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/eottl.c b/fs/reiser4/eottl.c +new file mode 100644 +index 0000000..f921b19 +--- /dev/null ++++ b/fs/reiser4/eottl.c +@@ -0,0 +1,509 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/item/item.h" ++#include "plugin/node/node.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree_walk.h" ++#include "tree_mod.h" ++#include "carry.h" ++#include "tree.h" ++#include "super.h" ++ ++#include /* for __u?? */ ++ ++/* ++ * Extents on the twig level (EOTTL) handling. ++ * ++ * EOTTL poses some problems to the tree traversal, that are better explained ++ * by example. ++ * ++ * Suppose we have block B1 on the twig level with the following items: ++ * ++ * 0. internal item I0 with key (0:0:0:0) (locality, key-type, object-id, ++ * offset) ++ * 1. extent item E1 with key (1:4:100:0), having 10 blocks of 4k each ++ * 2. internal item I2 with key (10:0:0:0) ++ * ++ * We are trying to insert item with key (5:0:0:0). Lookup finds node B1, and ++ * then intra-node lookup is done. This lookup finished on the E1, because the ++ * key we are looking for is larger than the key of E1 and is smaller than key ++ * the of I2. ++ * ++ * Here search is stuck. ++ * ++ * After some thought it is clear what is wrong here: extents on the twig level ++ * break some basic property of the *search* tree (on the pretext, that they ++ * restore property of balanced tree). ++ * ++ * Said property is the following: if in the internal node of the search tree ++ * we have [ ... Key1 Pointer Key2 ... ] then, all data that are or will be ++ * keyed in the tree with the Key such that Key1 <= Key < Key2 are accessible ++ * through the Pointer. ++ * ++ * This is not true, when Pointer is Extent-Pointer, simply because extent ++ * cannot expand indefinitely to the right to include any item with ++ * ++ * Key1 <= Key <= Key2. ++ * ++ * For example, our E1 extent is only responsible for the data with keys ++ * ++ * (1:4:100:0) <= key <= (1:4:100:0xffffffffffffffff), and ++ * ++ * so, key range ++ * ++ * ( (1:4:100:0xffffffffffffffff), (10:0:0:0) ) ++ * ++ * is orphaned: there is no way to get there from the tree root. ++ * ++ * In other words, extent pointers are different than normal child pointers as ++ * far as search tree is concerned, and this creates such problems. ++ * ++ * Possible solution for this problem is to insert our item into node pointed ++ * to by I2. There are some problems through: ++ * ++ * (1) I2 can be in a different node. ++ * (2) E1 can be immediately followed by another extent E2. ++ * ++ * (1) is solved by calling reiser4_get_right_neighbor() and accounting ++ * for locks/coords as necessary. ++ * ++ * (2) is more complex. Solution here is to insert new empty leaf node and ++ * insert internal item between E1 and E2 pointing to said leaf node. This is ++ * further complicated by possibility that E2 is in a different node, etc. ++ * ++ * Problems: ++ * ++ * (1) if there was internal item I2 immediately on the right of an extent E1 ++ * we and we decided to insert new item S1 into node N2 pointed to by I2, then ++ * key of S1 will be less than smallest key in the N2. Normally, search key ++ * checks that key we are looking for is in the range of keys covered by the ++ * node key is being looked in. To work around of this situation, while ++ * preserving useful consistency check new flag CBK_TRUST_DK was added to the ++ * cbk falgs bitmask. This flag is automatically set on entrance to the ++ * coord_by_key() and is only cleared when we are about to enter situation ++ * described above. ++ * ++ * (2) If extent E1 is immediately followed by another extent E2 and we are ++ * searching for the key that is between E1 and E2 we only have to insert new ++ * empty leaf node when coord_by_key was called for insertion, rather than just ++ * for lookup. To distinguish these cases, new flag CBK_FOR_INSERT was added to ++ * the cbk falgs bitmask. This flag is automatically set by coord_by_key calls ++ * performed by insert_by_key() and friends. ++ * ++ * (3) Insertion of new empty leaf node (possibly) requires balancing. In any ++ * case it requires modification of node content which is only possible under ++ * write lock. It may well happen that we only have read lock on the node where ++ * new internal pointer is to be inserted (common case: lookup of non-existent ++ * stat-data that fells between two extents). If only read lock is held, tree ++ * traversal is restarted with lock_level modified so that next time we hit ++ * this problem, write lock will be held. Once we have write lock, balancing ++ * will be performed. ++ */ ++ ++/** ++ * is_next_item_internal - check whether next item is internal ++ * @coord: coordinate of extent item in twig node ++ * @key: search key ++ * @lh: twig node lock handle ++ * ++ * Looks at the unit next to @coord. If it is an internal one - 1 is returned, ++ * @coord is set to that unit. If that unit is in right neighbor, @lh is moved ++ * to that node, @coord is set to its first unit. If next item is not internal ++ * or does not exist then 0 is returned, @coord and @lh are left unchanged. 2 ++ * is returned if search restart has to be done. ++ */ ++static int ++is_next_item_internal(coord_t *coord, const reiser4_key *key, ++ lock_handle *lh) ++{ ++ coord_t next; ++ lock_handle rn; ++ int result; ++ ++ coord_dup(&next, coord); ++ if (coord_next_unit(&next) == 0) { ++ /* next unit is in this node */ ++ if (item_is_internal(&next)) { ++ coord_dup(coord, &next); ++ return 1; ++ } ++ assert("vs-3", item_is_extent(&next)); ++ return 0; ++ } ++ ++ /* ++ * next unit either does not exist or is in right neighbor. If it is in ++ * right neighbor we have to check right delimiting key because ++ * concurrent thread could get their first and insert item with a key ++ * smaller than @key ++ */ ++ read_lock_dk(current_tree); ++ result = keycmp(key, znode_get_rd_key(coord->node)); ++ read_unlock_dk(current_tree); ++ assert("vs-6", result != EQUAL_TO); ++ if (result == GREATER_THAN) ++ return 2; ++ ++ /* lock right neighbor */ ++ init_lh(&rn); ++ result = reiser4_get_right_neighbor(&rn, coord->node, ++ znode_is_wlocked(coord->node) ? ++ ZNODE_WRITE_LOCK : ZNODE_READ_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ if (result == -E_NO_NEIGHBOR) { ++ /* we are on the rightmost edge of the tree */ ++ done_lh(&rn); ++ return 0; ++ } ++ ++ if (result) { ++ assert("vs-4", result < 0); ++ done_lh(&rn); ++ return result; ++ } ++ ++ /* ++ * check whether concurrent thread managed to insert item with a key ++ * smaller than @key ++ */ ++ read_lock_dk(current_tree); ++ result = keycmp(key, znode_get_ld_key(rn.node)); ++ read_unlock_dk(current_tree); ++ assert("vs-6", result != EQUAL_TO); ++ if (result == GREATER_THAN) { ++ done_lh(&rn); ++ return 2; ++ } ++ ++ result = zload(rn.node); ++ if (result) { ++ assert("vs-5", result < 0); ++ done_lh(&rn); ++ return result; ++ } ++ ++ coord_init_first_unit(&next, rn.node); ++ if (item_is_internal(&next)) { ++ /* ++ * next unit is in right neighbor and it is an unit of internal ++ * item. Unlock coord->node. Move @lh to right neighbor. @coord ++ * is set to the first unit of right neighbor. ++ */ ++ coord_dup(coord, &next); ++ zrelse(rn.node); ++ done_lh(lh); ++ move_lh(lh, &rn); ++ return 1; ++ } ++ ++ /* ++ * next unit is unit of extent item. Return without chaning @lh and ++ * @coord. ++ */ ++ assert("vs-6", item_is_extent(&next)); ++ zrelse(rn.node); ++ done_lh(&rn); ++ return 0; ++} ++ ++/** ++ * rd_key - calculate key of an item next to the given one ++ * @coord: position in a node ++ * @key: storage for result key ++ * ++ * @coord is set between items or after the last item in a node. Calculate key ++ * of item to the right of @coord. ++ */ ++static reiser4_key *rd_key(const coord_t *coord, reiser4_key *key) ++{ ++ coord_t dup; ++ ++ assert("nikita-2281", coord_is_between_items(coord)); ++ coord_dup(&dup, coord); ++ ++ if (coord_set_to_right(&dup) == 0) ++ /* next item is in this node. Return its key. */ ++ unit_key_by_coord(&dup, key); ++ else { ++ /* ++ * next item either does not exist or is in right ++ * neighbor. Return znode's right delimiting key. ++ */ ++ read_lock_dk(current_tree); ++ *key = *znode_get_rd_key(coord->node); ++ read_unlock_dk(current_tree); ++ } ++ return key; ++} ++ ++/** ++ * add_empty_leaf - insert empty leaf between two extents ++ * @insert_coord: position in twig node between two extents ++ * @lh: twig node lock handle ++ * @key: left delimiting key of new node ++ * @rdkey: right delimiting key of new node ++ * ++ * Inserts empty leaf node between two extent items. It is necessary when we ++ * have to insert an item on leaf level between two extents (items on the twig ++ * level). ++ */ ++static int ++add_empty_leaf(coord_t *insert_coord, lock_handle *lh, ++ const reiser4_key *key, const reiser4_key *rdkey) ++{ ++ int result; ++ carry_pool *pool; ++ carry_level *todo; ++ reiser4_item_data *item; ++ carry_insert_data *cdata; ++ carry_op *op; ++ znode *node; ++ reiser4_tree *tree; ++ ++ assert("vs-49827", znode_contains_key_lock(insert_coord->node, key)); ++ tree = znode_get_tree(insert_coord->node); ++ node = reiser4_new_node(insert_coord->node, LEAF_LEVEL); ++ if (IS_ERR(node)) ++ return PTR_ERR(node); ++ ++ /* setup delimiting keys for node being inserted */ ++ write_lock_dk(tree); ++ znode_set_ld_key(node, key); ++ znode_set_rd_key(node, rdkey); ++ ON_DEBUG(node->creator = current); ++ ON_DEBUG(node->first_key = *key); ++ write_unlock_dk(tree); ++ ++ ZF_SET(node, JNODE_ORPHAN); ++ ++ /* ++ * allocate carry_pool, 3 carry_level-s, reiser4_item_data and ++ * carry_insert_data ++ */ ++ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) + ++ sizeof(*item) + sizeof(*cdata)); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ todo = (carry_level *) (pool + 1); ++ init_carry_level(todo, pool); ++ ++ item = (reiser4_item_data *) (todo + 3); ++ cdata = (carry_insert_data *) (item + 1); ++ ++ op = reiser4_post_carry(todo, COP_INSERT, insert_coord->node, 0); ++ if (!IS_ERR(op)) { ++ cdata->coord = insert_coord; ++ cdata->key = key; ++ cdata->data = item; ++ op->u.insert.d = cdata; ++ op->u.insert.type = COPT_ITEM_DATA; ++ build_child_ptr_data(node, item); ++ item->arg = NULL; ++ /* have @insert_coord to be set at inserted item after ++ insertion is done */ ++ todo->track_type = CARRY_TRACK_CHANGE; ++ todo->tracked = lh; ++ ++ result = reiser4_carry(todo, NULL); ++ if (result == 0) { ++ /* ++ * pin node in memory. This is necessary for ++ * znode_make_dirty() below. ++ */ ++ result = zload(node); ++ if (result == 0) { ++ lock_handle local_lh; ++ ++ /* ++ * if we inserted new child into tree we have ++ * to mark it dirty so that flush will be able ++ * to process it. ++ */ ++ init_lh(&local_lh); ++ result = longterm_lock_znode(&local_lh, node, ++ ZNODE_WRITE_LOCK, ++ ZNODE_LOCK_LOPRI); ++ if (result == 0) { ++ znode_make_dirty(node); ++ ++ /* ++ * when internal item pointing to @node ++ * was inserted into twig node ++ * create_hook_internal did not connect ++ * it properly because its right ++ * neighbor was not known. Do it ++ * here ++ */ ++ write_lock_tree(tree); ++ assert("nikita-3312", ++ znode_is_right_connected(node)); ++ assert("nikita-2984", ++ node->right == NULL); ++ ZF_CLR(node, JNODE_RIGHT_CONNECTED); ++ write_unlock_tree(tree); ++ result = ++ connect_znode(insert_coord, node); ++ ON_DEBUG(if (result == 0) check_dkeys(node);); ++ ++ done_lh(lh); ++ move_lh(lh, &local_lh); ++ assert("vs-1676", node_is_empty(node)); ++ coord_init_first_unit(insert_coord, ++ node); ++ } else { ++ warning("nikita-3136", ++ "Cannot lock child"); ++ } ++ done_lh(&local_lh); ++ zrelse(node); ++ } ++ } ++ } else ++ result = PTR_ERR(op); ++ zput(node); ++ done_carry_pool(pool); ++ return result; ++} ++ ++/** ++ * handle_eottl - handle extent-on-the-twig-level cases in tree traversal ++ * @h: search handle ++ * @outcome: flag saying whether search has to restart or is done ++ * ++ * Handles search on twig level. If this function completes search itself then ++ * it returns 1. If search has to go one level down then 0 is returned. If ++ * error happens then LOOKUP_DONE is returned via @outcome and error code is saved ++ * in @h->result. ++ */ ++int handle_eottl(cbk_handle *h, int *outcome) ++{ ++ int result; ++ reiser4_key key; ++ coord_t *coord; ++ ++ coord = h->coord; ++ ++ if (h->level != TWIG_LEVEL || ++ (coord_is_existing_item(coord) && item_is_internal(coord))) { ++ /* Continue to traverse tree downward. */ ++ return 0; ++ } ++ ++ /* ++ * make sure that @h->coord is set to twig node and that it is either ++ * set to extent item or after extent item ++ */ ++ assert("vs-356", h->level == TWIG_LEVEL); ++ assert("vs-357", ( { ++ coord_t lcoord; ++ coord_dup(&lcoord, coord); ++ check_me("vs-733", coord_set_to_left(&lcoord) == 0); ++ item_is_extent(&lcoord); ++ } ++ )); ++ ++ if (*outcome == NS_FOUND) { ++ /* we have found desired key on twig level in extent item */ ++ h->result = CBK_COORD_FOUND; ++ *outcome = LOOKUP_DONE; ++ return 1; ++ } ++ ++ if (!(h->flags & CBK_FOR_INSERT)) { ++ /* tree traversal is not for insertion. Just return ++ CBK_COORD_NOTFOUND. */ ++ h->result = CBK_COORD_NOTFOUND; ++ *outcome = LOOKUP_DONE; ++ return 1; ++ } ++ ++ /* take a look at the item to the right of h -> coord */ ++ result = is_next_item_internal(coord, h->key, h->active_lh); ++ if (unlikely(result < 0)) { ++ h->error = "get_right_neighbor failed"; ++ h->result = result; ++ *outcome = LOOKUP_DONE; ++ return 1; ++ } ++ if (result == 0) { ++ /* ++ * item to the right is also an extent one. Allocate a new node ++ * and insert pointer to it after item h -> coord. ++ * ++ * This is a result of extents being located at the twig ++ * level. For explanation, see comment just above ++ * is_next_item_internal(). ++ */ ++ znode *loaded; ++ ++ if (cbk_lock_mode(h->level, h) != ZNODE_WRITE_LOCK) { ++ /* ++ * we got node read locked, restart coord_by_key to ++ * have write lock on twig level ++ */ ++ h->lock_level = TWIG_LEVEL; ++ h->lock_mode = ZNODE_WRITE_LOCK; ++ *outcome = LOOKUP_REST; ++ return 1; ++ } ++ ++ loaded = coord->node; ++ result = ++ add_empty_leaf(coord, h->active_lh, h->key, ++ rd_key(coord, &key)); ++ if (result) { ++ h->error = "could not add empty leaf"; ++ h->result = result; ++ *outcome = LOOKUP_DONE; ++ return 1; ++ } ++ /* added empty leaf is locked (h->active_lh), its parent node ++ is unlocked, h->coord is set as EMPTY */ ++ assert("vs-13", coord->between == EMPTY_NODE); ++ assert("vs-14", znode_is_write_locked(coord->node)); ++ assert("vs-15", ++ WITH_DATA(coord->node, node_is_empty(coord->node))); ++ assert("vs-16", jnode_is_leaf(ZJNODE(coord->node))); ++ assert("vs-17", coord->node == h->active_lh->node); ++ *outcome = LOOKUP_DONE; ++ h->result = CBK_COORD_NOTFOUND; ++ return 1; ++ } else if (result == 1) { ++ /* ++ * this is special case mentioned in the comment on ++ * tree.h:cbk_flags. We have found internal item immediately on ++ * the right of extent, and we are going to insert new item ++ * there. Key of item we are going to insert is smaller than ++ * leftmost key in the node pointed to by said internal item ++ * (otherwise search wouldn't come to the extent in the first ++ * place). ++ * ++ * This is a result of extents being located at the twig ++ * level. For explanation, see comment just above ++ * is_next_item_internal(). ++ */ ++ h->flags &= ~CBK_TRUST_DK; ++ } else { ++ assert("vs-8", result == 2); ++ *outcome = LOOKUP_REST; ++ return 1; ++ } ++ assert("vs-362", WITH_DATA(coord->node, item_is_internal(coord))); ++ return 0; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 120 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/estimate.c b/fs/reiser4/estimate.c +new file mode 100644 +index 0000000..656c20b +--- /dev/null ++++ b/fs/reiser4/estimate.c +@@ -0,0 +1,120 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "debug.h" ++#include "dformat.h" ++#include "tree.h" ++#include "carry.h" ++#include "inode.h" ++#include "plugin/cluster.h" ++#include "plugin/item/ctail.h" ++ ++/* this returns how many nodes might get dirty and added nodes if @children nodes are dirtied ++ ++ Amount of internals which will get dirty or get allocated we estimate as 5% of the childs + 1 balancing. 1 balancing ++ is 2 neighbours, 2 new blocks and the current block on the leaf level, 2 neighbour nodes + the current (or 1 ++ neighbour and 1 new and the current) on twig level, 2 neighbour nodes on upper levels and 1 for a new root. So 5 for ++ leaf level, 3 for twig level, 2 on upper + 1 for root. ++ ++ Do not calculate the current node of the lowest level here - this is overhead only. ++ ++ children is almost always 1 here. Exception is flow insertion ++*/ ++static reiser4_block_nr ++max_balance_overhead(reiser4_block_nr childen, tree_level tree_height) ++{ ++ reiser4_block_nr ten_percent; ++ ++ ten_percent = ((103 * childen) >> 10); ++ ++ /* If we have too many balancings at the time, tree height can raise on more ++ then 1. Assume that if tree_height is 5, it can raise on 1 only. */ ++ return ((tree_height < 5 ? 5 : tree_height) * 2 + (4 + ten_percent)); ++} ++ ++/* this returns maximal possible number of nodes which can be modified plus number of new nodes which can be required to ++ perform insertion of one item into the tree */ ++/* it is only called when tree height changes, or gets initialized */ ++reiser4_block_nr calc_estimate_one_insert(tree_level height) ++{ ++ return 1 + max_balance_overhead(1, height); ++} ++ ++reiser4_block_nr estimate_one_insert_item(reiser4_tree * tree) ++{ ++ return tree->estimate_one_insert; ++} ++ ++/* this returns maximal possible number of nodes which can be modified plus number of new nodes which can be required to ++ perform insertion of one unit into an item in the tree */ ++reiser4_block_nr estimate_one_insert_into_item(reiser4_tree * tree) ++{ ++ /* estimate insert into item just like item insertion */ ++ return tree->estimate_one_insert; ++} ++ ++reiser4_block_nr estimate_one_item_removal(reiser4_tree * tree) ++{ ++ /* on item removal reiser4 does not try to pack nodes more complact, so, only one node may be dirtied on leaf ++ level */ ++ return tree->estimate_one_insert; ++} ++ ++/* on leaf level insert_flow may add CARRY_FLOW_NEW_NODES_LIMIT new nodes and dirty 3 existing nodes (insert point and ++ both its neighbors). Max_balance_overhead should estimate number of blocks which may change/get added on internal ++ levels */ ++reiser4_block_nr estimate_insert_flow(tree_level height) ++{ ++ return 3 + CARRY_FLOW_NEW_NODES_LIMIT + max_balance_overhead(3 + ++ CARRY_FLOW_NEW_NODES_LIMIT, ++ height); ++} ++ ++/* returnes max number of nodes can be occupied by disk cluster */ ++static reiser4_block_nr estimate_cluster(struct inode * inode, int unprepped) ++{ ++ int per_cluster; ++ per_cluster = (unprepped ? 1 : cluster_nrpages(inode)); ++ return 3 + per_cluster + ++ max_balance_overhead(3 + per_cluster, ++ REISER4_MAX_ZTREE_HEIGHT); ++} ++ ++/* how many nodes might get dirty and added ++ during insertion of a disk cluster */ ++reiser4_block_nr estimate_insert_cluster(struct inode * inode) ++{ ++ return estimate_cluster(inode, 1); /* 24 */ ++} ++ ++/* how many nodes might get dirty and added ++ during update of a (prepped or unprepped) disk cluster */ ++reiser4_block_nr estimate_update_cluster(struct inode * inode) ++{ ++ return estimate_cluster(inode, 0); /* 44, for 64K-cluster */ ++} ++ ++/* How many nodes occupied by a disk cluster might get dirty. ++ Note that this estimation is not precise (i.e. disk cluster ++ can occupy more nodes). ++ Q: Why we don't use precise estimation? ++ A: 1.Because precise estimation is fairly bad: 65536 nodes ++ for 64K logical cluster, it means 256M of dead space on ++ a partition ++ 2.It is a very rare case when disk cluster occupies more ++ nodes then this estimation returns. ++*/ ++reiser4_block_nr estimate_dirty_cluster(struct inode * inode) ++{ ++ return cluster_nrpages(inode) + 4; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/export_ops.c b/fs/reiser4/export_ops.c +new file mode 100644 +index 0000000..b75afe7 +--- /dev/null ++++ b/fs/reiser4/export_ops.c +@@ -0,0 +1,295 @@ ++/* Copyright 2005 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#include "inode.h" ++#include "plugin/plugin.h" ++ ++/* ++ * Supported file-handle types ++ */ ++typedef enum { ++ FH_WITH_PARENT = 0x10, /* file handle with parent */ ++ FH_WITHOUT_PARENT = 0x11 /* file handle without parent */ ++} reiser4_fhtype; ++ ++#define NFSERROR (255) ++ ++/* initialize place-holder for object */ ++static void object_on_wire_init(reiser4_object_on_wire *o) ++{ ++ o->plugin = NULL; ++} ++ ++/* finish with @o */ ++static void object_on_wire_done(reiser4_object_on_wire *o) ++{ ++ if (o->plugin != NULL) ++ o->plugin->wire.done(o); ++} ++ ++/* ++ * read serialized object identity from @addr and store information about ++ * object in @obj. This is dual to encode_inode(). ++ */ ++static char *decode_inode(struct super_block *s, char *addr, ++ reiser4_object_on_wire * obj) ++{ ++ file_plugin *fplug; ++ ++ /* identifier of object plugin is stored in the first two bytes, ++ * followed by... */ ++ fplug = file_plugin_by_disk_id(reiser4_get_tree(s), (d16 *) addr); ++ if (fplug != NULL) { ++ addr += sizeof(d16); ++ obj->plugin = fplug; ++ assert("nikita-3520", fplug->wire.read != NULL); ++ /* plugin specific encoding of object identity. */ ++ addr = fplug->wire.read(addr, obj); ++ } else ++ addr = ERR_PTR(RETERR(-EINVAL)); ++ return addr; ++} ++ ++/** ++ * reiser4_decode_fh - decode_fh of export operations ++ * @super: super block ++ * @fh: nfsd file handle ++ * @len: length of file handle ++ * @fhtype: type of file handle ++ * @acceptable: acceptability testing function ++ * @context: argument for @acceptable ++ * ++ * Returns dentry referring to the same file as @fh. ++ */ ++static struct dentry *reiser4_decode_fh(struct super_block *super, __u32 *fh, ++ int len, int fhtype, ++ int (*acceptable) (void *context, ++ struct dentry *de), ++ void *context) ++{ ++ reiser4_context *ctx; ++ reiser4_object_on_wire object; ++ reiser4_object_on_wire parent; ++ char *addr; ++ int with_parent; ++ ++ ctx = reiser4_init_context(super); ++ if (IS_ERR(ctx)) ++ return (struct dentry *)ctx; ++ ++ assert("vs-1482", ++ fhtype == FH_WITH_PARENT || fhtype == FH_WITHOUT_PARENT); ++ ++ with_parent = (fhtype == FH_WITH_PARENT); ++ ++ addr = (char *)fh; ++ ++ object_on_wire_init(&object); ++ object_on_wire_init(&parent); ++ ++ addr = decode_inode(super, addr, &object); ++ if (!IS_ERR(addr)) { ++ if (with_parent) ++ addr = decode_inode(super, addr, &parent); ++ if (!IS_ERR(addr)) { ++ struct dentry *d; ++ typeof(super->s_export_op->find_exported_dentry) fn; ++ ++ fn = super->s_export_op->find_exported_dentry; ++ assert("nikita-3521", fn != NULL); ++ d = fn(super, &object, with_parent ? &parent : NULL, ++ acceptable, context); ++ if (d != NULL && !IS_ERR(d)) ++ /* FIXME check for -ENOMEM */ ++ reiser4_get_dentry_fsdata(d)->stateless = 1; ++ addr = (char *)d; ++ } ++ } ++ ++ object_on_wire_done(&object); ++ object_on_wire_done(&parent); ++ ++ reiser4_exit_context(ctx); ++ return (void *)addr; ++} ++ ++/* ++ * Object serialization support. ++ * ++ * To support knfsd file system provides export_operations that are used to ++ * construct and interpret NFS file handles. As a generalization of this, ++ * reiser4 object plugins have serialization support: it provides methods to ++ * create on-wire representation of identity of reiser4 object, and ++ * re-create/locate object given its on-wire identity. ++ * ++ */ ++ ++/* ++ * return number of bytes that on-wire representation of @inode's identity ++ * consumes. ++ */ ++static int encode_inode_size(struct inode *inode) ++{ ++ assert("nikita-3514", inode != NULL); ++ assert("nikita-3515", inode_file_plugin(inode) != NULL); ++ assert("nikita-3516", inode_file_plugin(inode)->wire.size != NULL); ++ ++ return inode_file_plugin(inode)->wire.size(inode) + sizeof(d16); ++} ++ ++/* ++ * store on-wire representation of @inode's identity at the area beginning at ++ * @start. ++ */ ++static char *encode_inode(struct inode *inode, char *start) ++{ ++ assert("nikita-3517", inode != NULL); ++ assert("nikita-3518", inode_file_plugin(inode) != NULL); ++ assert("nikita-3519", inode_file_plugin(inode)->wire.write != NULL); ++ ++ /* ++ * first, store two-byte identifier of object plugin, then ++ */ ++ save_plugin_id(file_plugin_to_plugin(inode_file_plugin(inode)), ++ (d16 *) start); ++ start += sizeof(d16); ++ /* ++ * call plugin to serialize object's identity ++ */ ++ return inode_file_plugin(inode)->wire.write(inode, start); ++} ++ ++/* this returns number of 32 bit long numbers encoded in @lenp. 255 is ++ * returned if file handle can not be stored */ ++/** ++ * reiser4_encode_fh - encode_fh of export operations ++ * @dentry: ++ * @fh: ++ * @lenp: ++ * @need_parent: ++ * ++ */ ++static int ++reiser4_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, ++ int need_parent) ++{ ++ struct inode *inode; ++ struct inode *parent; ++ char *addr; ++ int need; ++ int delta; ++ int result; ++ reiser4_context *ctx; ++ ++ /* ++ * knfsd asks as to serialize object in @dentry, and, optionally its ++ * parent (if need_parent != 0). ++ * ++ * encode_inode() and encode_inode_size() is used to build ++ * representation of object and its parent. All hard work is done by ++ * object plugins. ++ */ ++ inode = dentry->d_inode; ++ parent = dentry->d_parent->d_inode; ++ ++ addr = (char *)fh; ++ ++ need = encode_inode_size(inode); ++ if (need < 0) ++ return NFSERROR; ++ if (need_parent) { ++ delta = encode_inode_size(parent); ++ if (delta < 0) ++ return NFSERROR; ++ need += delta; ++ } ++ ++ ctx = reiser4_init_context(dentry->d_inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ if (need <= sizeof(__u32) * (*lenp)) { ++ addr = encode_inode(inode, addr); ++ if (need_parent) ++ addr = encode_inode(parent, addr); ++ ++ /* store in lenp number of 32bit words required for file ++ * handle. */ ++ *lenp = (need + sizeof(__u32) - 1) >> 2; ++ result = need_parent ? FH_WITH_PARENT : FH_WITHOUT_PARENT; ++ } else ++ /* no enough space in file handle */ ++ result = NFSERROR; ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/** ++ * reiser4_get_dentry_parent - get_parent of export operations ++ * @child: ++ * ++ */ ++static struct dentry *reiser4_get_dentry_parent(struct dentry *child) ++{ ++ struct inode *dir; ++ dir_plugin *dplug; ++ ++ assert("nikita-3527", child != NULL); ++ /* see comment in reiser4_get_dentry() about following assertion */ ++ assert("nikita-3528", is_in_reiser4_context()); ++ ++ dir = child->d_inode; ++ assert("nikita-3529", dir != NULL); ++ dplug = inode_dir_plugin(dir); ++ assert("nikita-3531", ergo(dplug != NULL, dplug->get_parent != NULL)); ++ if (dplug != NULL) ++ return dplug->get_parent(dir); ++ else ++ return ERR_PTR(RETERR(-ENOTDIR)); ++} ++ ++/** ++ * reiser4_get_dentry - get_dentry of export operations ++ * @super: ++ * @data: ++ * ++ * ++ */ ++static struct dentry *reiser4_get_dentry(struct super_block *super, void *data) ++{ ++ reiser4_object_on_wire *o; ++ ++ assert("nikita-3522", super != NULL); ++ assert("nikita-3523", data != NULL); ++ /* ++ * this is only supposed to be called by ++ * ++ * reiser4_decode_fh->find_exported_dentry ++ * ++ * so, reiser4_context should be here already. ++ */ ++ assert("nikita-3526", is_in_reiser4_context()); ++ ++ o = (reiser4_object_on_wire *)data; ++ assert("nikita-3524", o->plugin != NULL); ++ assert("nikita-3525", o->plugin->wire.get != NULL); ++ ++ return o->plugin->wire.get(super, o); ++} ++ ++struct export_operations reiser4_export_operations = { ++ .encode_fh = reiser4_encode_fh, ++ .decode_fh = reiser4_decode_fh, ++ .get_parent = reiser4_get_dentry_parent, ++ .get_dentry = reiser4_get_dentry ++}; ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/flush.c b/fs/reiser4/flush.c +new file mode 100644 +index 0000000..49b6ca5 +--- /dev/null ++++ b/fs/reiser4/flush.c +@@ -0,0 +1,3622 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* The design document for this file is at http://www.namesys.com/v4/v4.html. */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/item/item.h" ++#include "plugin/plugin.h" ++#include "plugin/object.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree_walk.h" ++#include "carry.h" ++#include "tree.h" ++#include "vfs_ops.h" ++#include "inode.h" ++#include "page_cache.h" ++#include "wander.h" ++#include "super.h" ++#include "entd.h" ++#include "reiser4.h" ++#include "flush.h" ++#include "writeout.h" ++ ++#include ++#include /* for struct super_block */ ++#include /* for struct page */ ++#include /* for struct bio */ ++#include ++#include ++ ++/* IMPLEMENTATION NOTES */ ++ ++/* PARENT-FIRST: Some terminology: A parent-first traversal is a way of assigning a total ++ order to the nodes of the tree in which the parent is placed before its children, which ++ are ordered (recursively) in left-to-right order. When we speak of a "parent-first preceder", it ++ describes the node that "came before in forward parent-first order". When we speak of a ++ "parent-first follower", it describes the node that "comes next in parent-first ++ order" (alternatively the node that "came before in reverse parent-first order"). ++ ++ The following pseudo-code prints the nodes of a tree in forward parent-first order: ++ ++ void parent_first (node) ++ { ++ print_node (node); ++ if (node->level > leaf) { ++ for (i = 0; i < num_children; i += 1) { ++ parent_first (node->child[i]); ++ } ++ } ++ } ++*/ ++ ++/* JUST WHAT ARE WE TRYING TO OPTIMIZE, HERE? The idea is to optimize block allocation so ++ that a left-to-right scan of the tree's data (i.e., the leaves in left-to-right order) ++ can be accomplished with sequential reads, which results in reading nodes in their ++ parent-first order. This is a read-optimization aspect of the flush algorithm, and ++ there is also a write-optimization aspect, which is that we wish to make large ++ sequential writes to the disk by allocating or reallocating blocks so that they can be ++ written in sequence. Sometimes the read-optimization and write-optimization goals ++ conflict with each other, as we discuss in more detail below. ++*/ ++ ++/* STATE BITS: The flush code revolves around the state of the jnodes it covers. Here are ++ the relevant jnode->state bits and their relevence to flush: ++ ++ JNODE_DIRTY: If a node is dirty, it must be flushed. But in order to be written it ++ must be allocated first. In order to be considered allocated, the jnode must have ++ exactly one of { JNODE_OVRWR, JNODE_RELOC } set. These two bits are exclusive, and ++ all dirtied jnodes eventually have one of these bits set during each transaction. ++ ++ JNODE_CREATED: The node was freshly created in its transaction and has no previous ++ block address, so it is unconditionally assigned to be relocated, although this is ++ mainly for code-convenience. It is not being 'relocated' from anything, but in ++ almost every regard it is treated as part of the relocate set. The JNODE_CREATED bit ++ remains set even after JNODE_RELOC is set, so the actual relocate can be ++ distinguished from the created-and-allocated set easily: relocate-set members ++ (belonging to the preserve-set) have (JNODE_RELOC) set and created-set members which ++ have no previous location to preserve have (JNODE_RELOC | JNODE_CREATED) set. ++ ++ JNODE_OVRWR: The node belongs to atom's overwrite set. The flush algorithm made the ++ decision to maintain the pre-existing location for this node and it will be written ++ to the wandered-log. ++ ++ JNODE_RELOC: The flush algorithm made the decision to relocate this block (if it was ++ not created, see note above). A block with JNODE_RELOC set is eligible for ++ early-flushing and may be submitted during flush_empty_queues. When the JNODE_RELOC ++ bit is set on a znode, the parent node's internal item is modified and the znode is ++ rehashed. ++ ++ JNODE_SQUEEZABLE: Before shifting everything left, the flush algorithm scans the node ++ and calls plugin->f.squeeze() method for its items. By this technology we update disk ++ clusters of cryptcompress objects. Also if leftmost point that was found by flush scan ++ has this flag (races with write(), rare case) the flush algorythm makes the decision ++ to pass it to squalloc() in spite of its flushprepped status for squeezing, not for ++ repeated allocation. ++ ++ JNODE_FLUSH_QUEUED: This bit is set when a call to flush enters the jnode into its ++ flush queue. This means the jnode is not on any clean or dirty list, instead it is ++ moved to one of the flush queue (see flush_queue.h) object private list. This ++ prevents multiple concurrent flushes from attempting to start flushing from the ++ same node. ++ ++ (DEAD STATE BIT) JNODE_FLUSH_BUSY: This bit was set during the bottom-up ++ squeeze-and-allocate on a node while its children are actively being squeezed and ++ allocated. This flag was created to avoid submitting a write request for a node ++ while its children are still being allocated and squeezed. Then flush queue was ++ re-implemented to allow unlimited number of nodes be queued. This flag support was ++ commented out in source code because we decided that there was no reason to submit ++ queued nodes before jnode_flush() finishes. However, current code calls fq_write() ++ during a slum traversal and may submit "busy nodes" to disk. Probably we can ++ re-enable the JNODE_FLUSH_BUSY bit support in future. ++ ++ With these state bits, we describe a test used frequently in the code below, ++ jnode_is_flushprepped() (and the spin-lock-taking jnode_check_flushprepped()). The ++ test for "flushprepped" returns true if any of the following are true: ++ ++ - The node is not dirty ++ - The node has JNODE_RELOC set ++ - The node has JNODE_OVRWR set ++ ++ If either the node is not dirty or it has already been processed by flush (and assigned ++ JNODE_OVRWR or JNODE_RELOC), then it is prepped. If jnode_is_flushprepped() returns ++ true then flush has work to do on that node. ++*/ ++ ++/* FLUSH_PREP_ONCE_PER_TRANSACTION: Within a single transaction a node is never ++ flushprepped twice (unless an explicit call to flush_unprep is made as described in ++ detail below). For example a node is dirtied, allocated, and then early-flushed to ++ disk and set clean. Before the transaction commits, the page is dirtied again and, due ++ to memory pressure, the node is flushed again. The flush algorithm will not relocate ++ the node to a new disk location, it will simply write it to the same, previously ++ relocated position again. ++*/ ++ ++/* THE BOTTOM-UP VS. TOP-DOWN ISSUE: This code implements a bottom-up algorithm where we ++ start at a leaf node and allocate in parent-first order by iterating to the right. At ++ each step of the iteration, we check for the right neighbor. Before advancing to the ++ right neighbor, we check if the current position and the right neighbor share the same ++ parent. If they do not share the same parent, the parent is allocated before the right ++ neighbor. ++ ++ This process goes recursively up the tree and squeeze nodes level by level as long as ++ the right neighbor and the current position have different parents, then it allocates ++ the right-neighbors-with-different-parents on the way back down. This process is ++ described in more detail in flush_squalloc_changed_ancestor and the recursive function ++ squalloc_one_changed_ancestor. But the purpose here is not to discuss the ++ specifics of the bottom-up approach as it is to contrast the bottom-up and top-down ++ approaches. ++ ++ The top-down algorithm was implemented earlier (April-May 2002). In the top-down ++ approach, we find a starting point by scanning left along each level past dirty nodes, ++ then going up and repeating the process until the left node and the parent node are ++ clean. We then perform a parent-first traversal from the starting point, which makes ++ allocating in parent-first order trivial. After one subtree has been allocated in this ++ manner, we move to the right, try moving upward, then repeat the parent-first ++ traversal. ++ ++ Both approaches have problems that need to be addressed. Both are approximately the ++ same amount of code, but the bottom-up approach has advantages in the order it acquires ++ locks which, at the very least, make it the better approach. At first glance each one ++ makes the other one look simpler, so it is important to remember a few of the problems ++ with each one. ++ ++ Main problem with the top-down approach: When you encounter a clean child during the ++ parent-first traversal, what do you do? You would like to avoid searching through a ++ large tree of nodes just to find a few dirty leaves at the bottom, and there is not an ++ obvious solution. One of the advantages of the top-down approach is that during the ++ parent-first traversal you check every child of a parent to see if it is dirty. In ++ this way, the top-down approach easily handles the main problem of the bottom-up ++ approach: unallocated children. ++ ++ The unallocated children problem is that before writing a node to disk we must make ++ sure that all of its children are allocated. Otherwise, the writing the node means ++ extra I/O because the node will have to be written again when the child is finally ++ allocated. ++ ++ WE HAVE NOT YET ELIMINATED THE UNALLOCATED CHILDREN PROBLEM. Except for bugs, this ++ should not cause any file system corruption, it only degrades I/O performance because a ++ node may be written when it is sure to be written at least one more time in the same ++ transaction when the remaining children are allocated. What follows is a description ++ of how we will solve the problem. ++*/ ++ ++/* HANDLING UNALLOCATED CHILDREN: During flush we may allocate a parent node then, ++ proceeding in parent first order, allocate some of its left-children, then encounter a ++ clean child in the middle of the parent. We do not allocate the clean child, but there ++ may remain unallocated (dirty) children to the right of the clean child. If we were to ++ stop flushing at this moment and write everything to disk, the parent might still ++ contain unallocated children. ++ ++ We could try to allocate all the descendents of every node that we allocate, but this ++ is not necessary. Doing so could result in allocating the entire tree: if the root ++ node is allocated then every unallocated node would have to be allocated before ++ flushing. Actually, we do not have to write a node just because we allocate it. It is ++ possible to allocate but not write a node during flush, when it still has unallocated ++ children. However, this approach is probably not optimal for the following reason. ++ ++ The flush algorithm is designed to allocate nodes in parent-first order in an attempt ++ to optimize reads that occur in the same order. Thus we are read-optimizing for a ++ left-to-right scan through all the leaves in the system, and we are hoping to ++ write-optimize at the same time because those nodes will be written together in batch. ++ What happens, however, if we assign a block number to a node in its read-optimized ++ order but then avoid writing it because it has unallocated children? In that ++ situation, we lose out on the write-optimization aspect because a node will have to be ++ written again to the its location on the device, later, which likely means seeking back ++ to that location. ++ ++ So there are tradeoffs. We can choose either: ++ ++ A. Allocate all unallocated children to preserve both write-optimization and ++ read-optimization, but this is not always desirable because it may mean having to ++ allocate and flush very many nodes at once. ++ ++ B. Defer writing nodes with unallocated children, keep their read-optimized locations, ++ but sacrifice write-optimization because those nodes will be written again. ++ ++ C. Defer writing nodes with unallocated children, but do not keep their read-optimized ++ locations. Instead, choose to write-optimize them later, when they are written. To ++ facilitate this, we "undo" the read-optimized allocation that was given to the node so ++ that later it can be write-optimized, thus "unpreparing" the flush decision. This is a ++ case where we disturb the FLUSH_PREP_ONCE_PER_TRANSACTION rule described above. By a ++ call to flush_unprep() we will: if the node was wandered, unset the JNODE_OVRWR bit; ++ if the node was relocated, unset the JNODE_RELOC bit, non-deferred-deallocate its block ++ location, and set the JNODE_CREATED bit, effectively setting the node back to an ++ unallocated state. ++ ++ We will take the following approach in v4.0: for twig nodes we will always finish ++ allocating unallocated children (A). For nodes with (level > TWIG) we will defer ++ writing and choose write-optimization (C). ++ ++ To summarize, there are several parts to a solution that avoids the problem with ++ unallocated children: ++ ++ FIXME-ZAM: Still no one approach is implemented to eliminate the "UNALLOCATED CHILDREN" ++ problem because there was an experiment which was done showed that we have 1-2 nodes ++ with unallocated children for thousands of written nodes. The experiment was simple ++ like coping / deletion of linux kernel sources. However the problem can arise in more ++ complex tests. I think we have jnode_io_hook to insert a check for unallocated ++ children and see what kind of problem we have. ++ ++ 1. When flush reaches a stopping point (e.g., a clean node), it should continue calling ++ squeeze-and-allocate on any remaining unallocated children. FIXME: Difficulty to ++ implement: should be simple -- amounts to adding a while loop to jnode_flush, see ++ comments in that function. ++ ++ 2. When flush reaches flush_empty_queue(), some of the (level > TWIG) nodes may still ++ have unallocated children. If the twig level has unallocated children it is an ++ assertion failure. If a higher-level node has unallocated children, then it should be ++ explicitly de-allocated by a call to flush_unprep(). FIXME: Difficulty to implement: ++ should be simple. ++ ++ 3. (CPU-Optimization) Checking whether a node has unallocated children may consume more ++ CPU cycles than we would like, and it is possible (but medium complexity) to optimize ++ this somewhat in the case where large sub-trees are flushed. The following observation ++ helps: if both the left- and right-neighbor of a node are processed by the flush ++ algorithm then the node itself is guaranteed to have all of its children allocated. ++ However, the cost of this check may not be so expensive after all: it is not needed for ++ leaves and flush can guarantee this property for twigs. That leaves only (level > ++ TWIG) nodes that have to be checked, so this optimization only helps if at least three ++ (level > TWIG) nodes are flushed in one pass, and the savings will be very small unless ++ there are many more (level > TWIG) nodes. But if there are many (level > TWIG) nodes ++ then the number of blocks being written will be very large, so the savings may be ++ insignificant. That said, the idea is to maintain both the left and right edges of ++ nodes that are processed in flush. When flush_empty_queue() is called, a relatively ++ simple test will tell whether the (level > TWIG) node is on the edge. If it is on the ++ edge, the slow check is necessary, but if it is in the interior then it can be assumed ++ to have all of its children allocated. FIXME: medium complexity to implement, but ++ simple to verify given that we must have a slow check anyway. ++ ++ 4. (Optional) This part is optional, not for v4.0--flush should work independently of ++ whether this option is used or not. Called RAPID_SCAN, the idea is to amend the ++ left-scan operation to take unallocated children into account. Normally, the left-scan ++ operation goes left as long as adjacent nodes are dirty up until some large maximum ++ value (FLUSH_SCAN_MAXNODES) at which point it stops and begins flushing. But scan-left ++ may stop at a position where there are unallocated children to the left with the same ++ parent. When RAPID_SCAN is enabled, the ordinary scan-left operation stops after ++ FLUSH_RELOCATE_THRESHOLD, which is much smaller than FLUSH_SCAN_MAXNODES, then procedes ++ with a rapid scan. The rapid scan skips all the interior children of a node--if the ++ leftmost child of a twig is dirty, check its left neighbor (the rightmost child of the ++ twig to the left). If the left neighbor of the leftmost child is also dirty, then ++ continue the scan at the left twig and repeat. This option will cause flush to ++ allocate more twigs in a single pass, but it also has the potential to write many more ++ nodes than would otherwise be written without the RAPID_SCAN option. RAPID_SCAN ++ was partially implemented, code removed August 12, 2002 by JMACD. ++*/ ++ ++/* FLUSH CALLED ON NON-LEAF LEVEL. Most of our design considerations assume that the ++ starting point for flush is a leaf node, but actually the flush code cares very little ++ about whether or not this is true. It is possible that all the leaf nodes are flushed ++ and dirty parent nodes still remain, in which case jnode_flush() is called on a ++ non-leaf argument. Flush doesn't care--it treats the argument node as if it were a ++ leaf, even when it is not. This is a simple approach, and there may be a more optimal ++ policy but until a problem with this approach is discovered, simplest is probably best. ++ ++ NOTE: In this case, the ordering produced by flush is parent-first only if you ignore ++ the leaves. This is done as a matter of simplicity and there is only one (shaky) ++ justification. When an atom commits, it flushes all leaf level nodes first, followed ++ by twigs, and so on. With flushing done in this order, if flush is eventually called ++ on a non-leaf node it means that (somehow) we reached a point where all leaves are ++ clean and only internal nodes need to be flushed. If that it the case, then it means ++ there were no leaves that were the parent-first preceder/follower of the parent. This ++ is expected to be a rare case, which is why we do nothing special about it. However, ++ memory pressure may pass an internal node to flush when there are still dirty leaf ++ nodes that need to be flushed, which could prove our original assumptions ++ "inoperative". If this needs to be fixed, then scan_left/right should have ++ special checks for the non-leaf levels. For example, instead of passing from a node to ++ the left neighbor, it should pass from the node to the left neighbor's rightmost ++ descendent (if dirty). ++ ++*/ ++ ++/* UNIMPLEMENTED AS YET: REPACKING AND RESIZING. We walk the tree in 4MB-16MB chunks, dirtying everything and putting ++ it into a transaction. We tell the allocator to allocate the blocks as far as possible towards one end of the ++ logical device--the left (starting) end of the device if we are walking from left to right, the right end of the ++ device if we are walking from right to left. We then make passes in alternating directions, and as we do this the ++ device becomes sorted such that tree order and block number order fully correlate. ++ ++ Resizing is done by shifting everything either all the way to the left or all the way ++ to the right, and then reporting the last block. ++*/ ++ ++/* RELOCATE DECISIONS: The code makes a decision to relocate in several places. This ++ descibes the policy from the highest level: ++ ++ The FLUSH_RELOCATE_THRESHOLD parameter: If we count this many consecutive nodes on the ++ leaf level during flush-scan (right, left), then we unconditionally decide to relocate ++ leaf nodes. ++ ++ Otherwise, there are two contexts in which we make a decision to relocate: ++ ++ 1. The REVERSE PARENT-FIRST context: Implemented in reverse_relocate_test(). ++ During the initial stages of flush, after scan-right completes, we want to ask the ++ question: should we relocate this leaf node and thus dirty the parent node. Then if ++ the node is a leftmost child its parent is its own parent-first preceder, thus we repeat ++ the question at the next level up, and so on. In these cases we are moving in the ++ reverse-parent first direction. ++ ++ There is another case which is considered the reverse direction, which comes at the end ++ of a twig in reverse_relocate_end_of_twig(). As we finish processing a twig we may ++ reach a point where there is a clean twig to the right with a dirty leftmost child. In ++ this case, we may wish to relocate the child by testing if it should be relocated ++ relative to its parent. ++ ++ 2. The FORWARD PARENT-FIRST context: Testing for forward relocation is done in ++ allocate_znode. What distinguishes the forward parent-first case from the ++ reverse-parent first case is that the preceder has already been allocated in the ++ forward case, whereas in the reverse case we don't know what the preceder is until we ++ finish "going in reverse". That simplifies the forward case considerably, and there we ++ actually use the block allocator to determine whether, e.g., a block closer to the ++ preceder is available. ++*/ ++ ++/* SQUEEZE_LEFT_EDGE: Unimplemented idea for future consideration. The idea is, once we ++ finish scan-left and find a starting point, if the parent's left neighbor is dirty then ++ squeeze the parent's left neighbor and the parent. This may change the ++ flush-starting-node's parent. Repeat until the child's parent is stable. If the child ++ is a leftmost child, repeat this left-edge squeezing operation at the next level up. ++ Note that we cannot allocate extents during this or they will be out of parent-first ++ order. There is also some difficult coordinate maintenence issues. We can't do a tree ++ search to find coordinates again (because we hold locks), we have to determine them ++ from the two nodes being squeezed. Looks difficult, but has potential to increase ++ space utilization. */ ++ ++/* Flush-scan helper functions. */ ++static void scan_init(flush_scan * scan); ++static void scan_done(flush_scan * scan); ++ ++/* Flush-scan algorithm. */ ++static int scan_left(flush_scan * scan, flush_scan * right, jnode * node, ++ unsigned limit); ++static int scan_right(flush_scan * scan, jnode * node, unsigned limit); ++static int scan_common(flush_scan * scan, flush_scan * other); ++static int scan_formatted(flush_scan * scan); ++static int scan_unformatted(flush_scan * scan, flush_scan * other); ++static int scan_by_coord(flush_scan * scan); ++ ++/* Initial flush-point ancestor allocation. */ ++static int alloc_pos_and_ancestors(flush_pos_t * pos); ++static int alloc_one_ancestor(const coord_t * coord, flush_pos_t * pos); ++static int set_preceder(const coord_t * coord_in, flush_pos_t * pos); ++ ++/* Main flush algorithm. Note on abbreviation: "squeeze and allocate" == "squalloc". */ ++static int squalloc(flush_pos_t * pos); ++ ++/* Flush squeeze implementation. */ ++static int squeeze_right_non_twig(znode * left, znode * right); ++static int shift_one_internal_unit(znode * left, znode * right); ++ ++/* Flush reverse parent-first relocation routines. */ ++static int reverse_relocate_if_close_enough(const reiser4_block_nr * pblk, ++ const reiser4_block_nr * nblk); ++static int reverse_relocate_test(jnode * node, const coord_t * parent_coord, ++ flush_pos_t * pos); ++static int reverse_relocate_check_dirty_parent(jnode * node, ++ const coord_t * parent_coord, ++ flush_pos_t * pos); ++ ++/* Flush allocate write-queueing functions: */ ++static int allocate_znode(znode * node, const coord_t * parent_coord, ++ flush_pos_t * pos); ++static int allocate_znode_update(znode * node, const coord_t * parent_coord, ++ flush_pos_t * pos); ++static int lock_parent_and_allocate_znode(znode *, flush_pos_t *); ++ ++/* Flush helper functions: */ ++static int jnode_lock_parent_coord(jnode * node, ++ coord_t * coord, ++ lock_handle * parent_lh, ++ load_count * parent_zh, ++ znode_lock_mode mode, int try); ++static int neighbor_in_slum(znode * node, lock_handle * right_lock, sideof side, ++ znode_lock_mode mode, int check_dirty); ++static int znode_same_parents(znode * a, znode * b); ++ ++static int znode_check_flushprepped(znode * node) ++{ ++ return jnode_check_flushprepped(ZJNODE(node)); ++} ++ ++/* Flush position functions */ ++static void pos_init(flush_pos_t * pos); ++static int pos_valid(flush_pos_t * pos); ++static void pos_done(flush_pos_t * pos); ++static int pos_stop(flush_pos_t * pos); ++ ++/* check that @org is first jnode extent unit, if extent is unallocated, ++ * because all jnodes of unallocated extent are dirty and of the same atom. */ ++#define checkchild(scan) \ ++assert("nikita-3435", \ ++ ergo(scan->direction == LEFT_SIDE && \ ++ (scan->parent_coord.node->level == TWIG_LEVEL) && \ ++ jnode_is_unformatted(scan->node) && \ ++ extent_is_unallocated(&scan->parent_coord), \ ++ extent_unit_index(&scan->parent_coord) == index_jnode(scan->node))) ++ ++/* This flush_cnt variable is used to track the number of concurrent flush operations, ++ useful for debugging. It is initialized in txnmgr.c out of laziness (because flush has ++ no static initializer function...) */ ++ON_DEBUG(atomic_t flush_cnt; ++ ) ++ ++/* check fs backing device for write congestion */ ++static int check_write_congestion(void) ++{ ++ struct super_block *sb; ++ struct backing_dev_info *bdi; ++ ++ sb = reiser4_get_current_sb(); ++ bdi = reiser4_get_super_fake(sb)->i_mapping->backing_dev_info; ++ return bdi_write_congested(bdi); ++} ++ ++/* conditionally write flush queue */ ++static int write_prepped_nodes(flush_pos_t * pos) ++{ ++ int ret; ++ ++ assert("zam-831", pos); ++ assert("zam-832", pos->fq); ++ ++ if (!(pos->flags & JNODE_FLUSH_WRITE_BLOCKS)) ++ return 0; ++ ++ if (check_write_congestion()) ++ return 0; ++ ++ ret = reiser4_write_fq(pos->fq, pos->nr_written, ++ WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM); ++ return ret; ++} ++ ++/* Proper release all flush pos. resources then move flush position to new ++ locked node */ ++static void move_flush_pos(flush_pos_t * pos, lock_handle * new_lock, ++ load_count * new_load, const coord_t * new_coord) ++{ ++ assert("zam-857", new_lock->node == new_load->node); ++ ++ if (new_coord) { ++ assert("zam-858", new_coord->node == new_lock->node); ++ coord_dup(&pos->coord, new_coord); ++ } else { ++ coord_init_first_unit(&pos->coord, new_lock->node); ++ } ++ ++ if (pos->child) { ++ jput(pos->child); ++ pos->child = NULL; ++ } ++ ++ move_load_count(&pos->load, new_load); ++ done_lh(&pos->lock); ++ move_lh(&pos->lock, new_lock); ++} ++ ++/* delete empty node which link from the parent still exists. */ ++static int delete_empty_node(znode * node) ++{ ++ reiser4_key smallest_removed; ++ ++ assert("zam-1019", node != NULL); ++ assert("zam-1020", node_is_empty(node)); ++ assert("zam-1023", znode_is_wlocked(node)); ++ ++ return reiser4_delete_node(node, &smallest_removed, NULL, 1); ++} ++ ++/* Prepare flush position for alloc_pos_and_ancestors() and squalloc() */ ++static int prepare_flush_pos(flush_pos_t * pos, jnode * org) ++{ ++ int ret; ++ load_count load; ++ lock_handle lock; ++ ++ init_lh(&lock); ++ init_load_count(&load); ++ ++ if (jnode_is_znode(org)) { ++ ret = longterm_lock_znode(&lock, JZNODE(org), ++ ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI); ++ if (ret) ++ return ret; ++ ++ ret = incr_load_count_znode(&load, JZNODE(org)); ++ if (ret) ++ return ret; ++ ++ pos->state = ++ (jnode_get_level(org) == ++ LEAF_LEVEL) ? POS_ON_LEAF : POS_ON_INTERNAL; ++ move_flush_pos(pos, &lock, &load, NULL); ++ } else { ++ coord_t parent_coord; ++ ret = jnode_lock_parent_coord(org, &parent_coord, &lock, ++ &load, ZNODE_WRITE_LOCK, 0); ++ if (ret) ++ goto done; ++ if (!item_is_extent(&parent_coord)) { ++ /* file was converted to tail, org became HB, we found internal ++ item */ ++ ret = -EAGAIN; ++ goto done; ++ } ++ ++ pos->state = POS_ON_EPOINT; ++ move_flush_pos(pos, &lock, &load, &parent_coord); ++ pos->child = jref(org); ++ if (extent_is_unallocated(&parent_coord) ++ && extent_unit_index(&parent_coord) != index_jnode(org)) { ++ /* @org is not first child of its parent unit. This may happen ++ because longerm lock of its parent node was released between ++ scan_left and scan_right. For now work around this having flush to repeat */ ++ ret = -EAGAIN; ++ } ++ } ++ ++ done: ++ done_load_count(&load); ++ done_lh(&lock); ++ return ret; ++} ++ ++/* TODO LIST (no particular order): */ ++/* I have labelled most of the legitimate FIXME comments in this file with letters to ++ indicate which issue they relate to. There are a few miscellaneous FIXMEs with ++ specific names mentioned instead that need to be inspected/resolved. */ ++/* B. There is an issue described in reverse_relocate_test having to do with an ++ imprecise is_preceder? check having to do with partially-dirty extents. The code that ++ sets preceder hints and computes the preceder is basically untested. Careful testing ++ needs to be done that preceder calculations are done correctly, since if it doesn't ++ affect correctness we will not catch this stuff during regular testing. */ ++/* C. EINVAL, E_DEADLOCK, E_NO_NEIGHBOR, ENOENT handling. It is unclear which of these are ++ considered expected but unlikely conditions. Flush currently returns 0 (i.e., success ++ but no progress, i.e., restart) whenever it receives any of these in jnode_flush(). ++ Many of the calls that may produce one of these return values (i.e., ++ longterm_lock_znode, reiser4_get_parent, reiser4_get_neighbor, ...) check some of these ++ values themselves and, for instance, stop flushing instead of resulting in a restart. ++ If any of these results are true error conditions then flush will go into a busy-loop, ++ as we noticed during testing when a corrupt tree caused find_child_ptr to return ++ ENOENT. It needs careful thought and testing of corner conditions. ++*/ ++/* D. Atomicity of flush_prep against deletion and flush concurrency. Suppose a created ++ block is assigned a block number then early-flushed to disk. It is dirtied again and ++ flush is called again. Concurrently, that block is deleted, and the de-allocation of ++ its block number does not need to be deferred, since it is not part of the preserve set ++ (i.e., it didn't exist before the transaction). I think there may be a race condition ++ where flush writes the dirty, created block after the non-deferred deallocated block ++ number is re-allocated, making it possible to write deleted data on top of non-deleted ++ data. Its just a theory, but it needs to be thought out. */ ++/* F. bio_alloc() failure is not handled gracefully. */ ++/* G. Unallocated children. */ ++/* H. Add a WANDERED_LIST to the atom to clarify the placement of wandered blocks. */ ++/* I. Rename flush-scan to scan-point, (flush-pos to flush-point?) */ ++ ++/* JNODE_FLUSH: MAIN ENTRY POINT */ ++/* This is the main entry point for flushing a jnode and its dirty neighborhood (dirty ++ neighborhood is named "slum"). Jnode_flush() is called if reiser4 has to write dirty ++ blocks to disk, it happens when Linux VM decides to reduce number of dirty pages or as ++ a part of transaction commit. ++ ++ Our objective here is to prep and flush the slum the jnode belongs to. We want to ++ squish the slum together, and allocate the nodes in it as we squish because allocation ++ of children affects squishing of parents. ++ ++ The "argument" @node tells flush where to start. From there, flush finds the left edge ++ of the slum, and calls squalloc (in which nodes are squeezed and allocated). To find a ++ "better place" to start squalloc first we perform a flush_scan. ++ ++ Flush-scanning may be performed in both left and right directions, but for different ++ purposes. When scanning to the left, we are searching for a node that precedes a ++ sequence of parent-first-ordered nodes which we will then flush in parent-first order. ++ During flush-scanning, we also take the opportunity to count the number of consecutive ++ leaf nodes. If this number is past some threshold (FLUSH_RELOCATE_THRESHOLD), then we ++ make a decision to reallocate leaf nodes (thus favoring write-optimization). ++ ++ Since the flush argument node can be anywhere in a sequence of dirty leaves, there may ++ also be dirty nodes to the right of the argument. If the scan-left operation does not ++ count at least FLUSH_RELOCATE_THRESHOLD nodes then we follow it with a right-scan ++ operation to see whether there is, in fact, enough nodes to meet the relocate ++ threshold. Each right- and left-scan operation uses a single flush_scan object. ++ ++ After left-scan and possibly right-scan, we prepare a flush_position object with the ++ starting flush point or parent coordinate, which was determined using scan-left. ++ ++ Next we call the main flush routine, squalloc, which iterates along the ++ leaf level, squeezing and allocating nodes (and placing them into the flush queue). ++ ++ After squalloc returns we take extra steps to ensure that all the children ++ of the final twig node are allocated--this involves repeating squalloc ++ until we finish at a twig with no unallocated children. ++ ++ Finally, we call flush_empty_queue to submit write-requests to disk. If we encounter ++ any above-twig nodes during flush_empty_queue that still have unallocated children, we ++ flush_unprep them. ++ ++ Flush treats several "failure" cases as non-failures, essentially causing them to start ++ over. E_DEADLOCK is one example. FIXME:(C) EINVAL, E_NO_NEIGHBOR, ENOENT: these should ++ probably be handled properly rather than restarting, but there are a bunch of cases to ++ audit. ++*/ ++ ++static int ++jnode_flush(jnode * node, long nr_to_write, long *nr_written, ++ flush_queue_t * fq, int flags) ++{ ++ long ret = 0; ++ flush_scan *right_scan; ++ flush_scan *left_scan; ++ flush_pos_t *flush_pos; ++ int todo; ++ struct super_block *sb; ++ reiser4_super_info_data *sbinfo; ++ jnode *leftmost_in_slum = NULL; ++ ++ assert("jmacd-76619", lock_stack_isclean(get_current_lock_stack())); ++ assert("nikita-3022", reiser4_schedulable()); ++ ++ assert("nikita-3185", ++ get_current_super_private()->delete_mutex_owner != current); ++ ++ /* allocate right_scan, left_scan and flush_pos */ ++ right_scan = ++ kmalloc(2 * sizeof(*right_scan) + sizeof(*flush_pos), ++ reiser4_ctx_gfp_mask_get()); ++ if (right_scan == NULL) ++ return RETERR(-ENOMEM); ++ left_scan = right_scan + 1; ++ flush_pos = (flush_pos_t *) (left_scan + 1); ++ ++ sb = reiser4_get_current_sb(); ++ sbinfo = get_super_private(sb); ++ ++ /* Flush-concurrency debug code */ ++#if REISER4_DEBUG ++ atomic_inc(&flush_cnt); ++#endif ++ ++ reiser4_enter_flush(sb); ++ ++ /* Initialize a flush position. */ ++ pos_init(flush_pos); ++ ++ flush_pos->nr_written = nr_written; ++ flush_pos->fq = fq; ++ flush_pos->flags = flags; ++ flush_pos->nr_to_write = nr_to_write; ++ ++ scan_init(right_scan); ++ scan_init(left_scan); ++ ++ /* First scan left and remember the leftmost scan position. If the leftmost ++ position is unformatted we remember its parent_coord. We scan until counting ++ FLUSH_SCAN_MAXNODES. ++ ++ If starting @node is unformatted, at the beginning of left scan its ++ parent (twig level node, containing extent item) will be long term ++ locked and lock handle will be stored in the ++ @right_scan->parent_lock. This lock is used to start the rightward ++ scan without redoing the tree traversal (necessary to find parent) ++ and, hence, is kept during leftward scan. As a result, we have to ++ use try-lock when taking long term locks during the leftward scan. ++ */ ++ ret = scan_left(left_scan, right_scan, ++ node, sbinfo->flush.scan_maxnodes); ++ if (ret != 0) ++ goto failed; ++ ++ leftmost_in_slum = jref(left_scan->node); ++ scan_done(left_scan); ++ ++ /* Then possibly go right to decide if we will use a policy of relocating leaves. ++ This is only done if we did not scan past (and count) enough nodes during the ++ leftward scan. If we do scan right, we only care to go far enough to establish ++ that at least FLUSH_RELOCATE_THRESHOLD number of nodes are being flushed. The ++ scan limit is the difference between left_scan.count and the threshold. */ ++ ++ todo = sbinfo->flush.relocate_threshold - left_scan->count; ++ /* scan right is inherently deadlock prone, because we are ++ * (potentially) holding a lock on the twig node at this moment. ++ * FIXME: this is incorrect comment: lock is not held */ ++ if (todo > 0) { ++ ret = scan_right(right_scan, node, (unsigned)todo); ++ if (ret != 0) ++ goto failed; ++ } ++ ++ /* Only the right-scan count is needed, release any rightward locks right away. */ ++ scan_done(right_scan); ++ ++ /* ... and the answer is: we should relocate leaf nodes if at least ++ FLUSH_RELOCATE_THRESHOLD nodes were found. */ ++ flush_pos->leaf_relocate = JF_ISSET(node, JNODE_REPACK) || ++ (left_scan->count + right_scan->count >= ++ sbinfo->flush.relocate_threshold); ++ ++ /* Funny business here. We set the 'point' in the flush_position at prior to ++ starting squalloc regardless of whether the first point is ++ formatted or unformatted. Without this there would be an invariant, in the ++ rest of the code, that if the flush_position is unformatted then ++ flush_position->point is NULL and flush_position->parent_{lock,coord} is set, ++ and if the flush_position is formatted then flush_position->point is non-NULL ++ and no parent info is set. ++ ++ This seems lazy, but it makes the initial calls to reverse_relocate_test ++ (which ask "is it the pos->point the leftmost child of its parent") much easier ++ because we know the first child already. Nothing is broken by this, but the ++ reasoning is subtle. Holding an extra reference on a jnode during flush can ++ cause us to see nodes with HEARD_BANSHEE during squalloc, because nodes are not ++ removed from sibling lists until they have zero reference count. Flush would ++ never observe a HEARD_BANSHEE node on the left-edge of flush, nodes are only ++ deleted to the right. So if nothing is broken, why fix it? ++ ++ NOTE-NIKITA actually, flush can meet HEARD_BANSHEE node at any ++ point and in any moment, because of the concurrent file system ++ activity (for example, truncate). */ ++ ++ /* Check jnode state after flush_scan completed. Having a lock on this ++ node or its parent (in case of unformatted) helps us in case of ++ concurrent flushing. */ ++ if (jnode_check_flushprepped(leftmost_in_slum) ++ && !jnode_convertible(leftmost_in_slum)) { ++ ret = 0; ++ goto failed; ++ } ++ ++ /* Now setup flush_pos using scan_left's endpoint. */ ++ ret = prepare_flush_pos(flush_pos, leftmost_in_slum); ++ if (ret) ++ goto failed; ++ ++ if (znode_get_level(flush_pos->coord.node) == LEAF_LEVEL ++ && node_is_empty(flush_pos->coord.node)) { ++ znode *empty = flush_pos->coord.node; ++ ++ assert("zam-1022", !ZF_ISSET(empty, JNODE_HEARD_BANSHEE)); ++ ret = delete_empty_node(empty); ++ goto failed; ++ } ++ ++ if (jnode_check_flushprepped(leftmost_in_slum) ++ && !jnode_convertible(leftmost_in_slum)) { ++ ret = 0; ++ goto failed; ++ } ++ ++ /* Set pos->preceder and (re)allocate pos and its ancestors if it is needed */ ++ ret = alloc_pos_and_ancestors(flush_pos); ++ if (ret) ++ goto failed; ++ ++ /* Do the main rightward-bottom-up squeeze and allocate loop. */ ++ ret = squalloc(flush_pos); ++ pos_stop(flush_pos); ++ if (ret) ++ goto failed; ++ ++ /* FIXME_NFQUCMPD: Here, handle the twig-special case for unallocated children. ++ First, the pos_stop() and pos_valid() routines should be modified ++ so that pos_stop() sets a flush_position->stop flag to 1 without ++ releasing the current position immediately--instead release it in ++ pos_done(). This is a better implementation than the current one anyway. ++ ++ It is not clear that all fields of the flush_position should not be released, ++ but at the very least the parent_lock, parent_coord, and parent_load should ++ remain held because they are hold the last twig when pos_stop() is ++ called. ++ ++ When we reach this point in the code, if the parent_coord is set to after the ++ last item then we know that flush reached the end of a twig (and according to ++ the new flush queueing design, we will return now). If parent_coord is not ++ past the last item, we should check if the current twig has any unallocated ++ children to the right (we are not concerned with unallocated children to the ++ left--in that case the twig itself should not have been allocated). If the ++ twig has unallocated children to the right, set the parent_coord to that ++ position and then repeat the call to squalloc. ++ ++ Testing for unallocated children may be defined in two ways: if any internal ++ item has a fake block number, it is unallocated; if any extent item is ++ unallocated then all of its children are unallocated. But there is a more ++ aggressive approach: if there are any dirty children of the twig to the right ++ of the current position, we may wish to relocate those nodes now. Checking for ++ potential relocation is more expensive as it requires knowing whether there are ++ any dirty children that are not unallocated. The extent_needs_allocation ++ should be used after setting the correct preceder. ++ ++ When we reach the end of a twig at this point in the code, if the flush can ++ continue (when the queue is ready) it will need some information on the future ++ starting point. That should be stored away in the flush_handle using a seal, I ++ believe. Holding a jref() on the future starting point may break other code ++ that deletes that node. ++ */ ++ ++ /* FIXME_NFQUCMPD: Also, we don't want to do any flushing when flush is called ++ above the twig level. If the VM calls flush above the twig level, do nothing ++ and return (but figure out why this happens). The txnmgr should be modified to ++ only flush its leaf-level dirty list. This will do all the necessary squeeze ++ and allocate steps but leave unallocated branches and possibly unallocated ++ twigs (when the twig's leftmost child is not dirty). After flushing the leaf ++ level, the remaining unallocated nodes should be given write-optimized ++ locations. (Possibly, the remaining unallocated twigs should be allocated just ++ before their leftmost child.) ++ */ ++ ++ /* Any failure reaches this point. */ ++ failed: ++ ++ switch (ret) { ++ case -E_REPEAT: ++ case -EINVAL: ++ case -E_DEADLOCK: ++ case -E_NO_NEIGHBOR: ++ case -ENOENT: ++ /* FIXME(C): Except for E_DEADLOCK, these should probably be handled properly ++ in each case. They already are handled in many cases. */ ++ /* Something bad happened, but difficult to avoid... Try again! */ ++ ret = 0; ++ } ++ ++ if (leftmost_in_slum) ++ jput(leftmost_in_slum); ++ ++ pos_done(flush_pos); ++ scan_done(left_scan); ++ scan_done(right_scan); ++ kfree(right_scan); ++ ++ ON_DEBUG(atomic_dec(&flush_cnt)); ++ ++ reiser4_leave_flush(sb); ++ ++ return ret; ++} ++ ++/* The reiser4 flush subsystem can be turned into "rapid flush mode" means that ++ * flusher should submit all prepped nodes immediately without keeping them in ++ * flush queues for long time. The reason for rapid flush mode is to free ++ * memory as fast as possible. */ ++ ++#if REISER4_USE_RAPID_FLUSH ++ ++/** ++ * submit all prepped nodes if rapid flush mode is set, ++ * turn rapid flush mode off. ++ */ ++ ++static int rapid_flush(flush_pos_t * pos) ++{ ++ if (!wbq_available()) ++ return 0; ++ ++ return write_prepped_nodes(pos); ++} ++ ++#else ++ ++#define rapid_flush(pos) (0) ++ ++#endif /* REISER4_USE_RAPID_FLUSH */ ++ ++static jnode *find_flush_start_jnode(jnode *start, txn_atom *atom, ++ flush_queue_t *fq, int *nr_queued, ++ int flags) ++{ ++ jnode * node; ++ ++ if (start != NULL) { ++ spin_lock_jnode(start); ++ if (!jnode_is_flushprepped(start)) { ++ assert("zam-1056", start->atom == atom); ++ node = start; ++ goto enter; ++ } ++ spin_unlock_jnode(start); ++ } ++ /* ++ * In this loop we process all already prepped (RELOC or OVRWR) and dirtied again ++ * nodes. The atom spin lock is not released until all dirty nodes processed or ++ * not prepped node found in the atom dirty lists. ++ */ ++ while ((node = find_first_dirty_jnode(atom, flags))) { ++ spin_lock_jnode(node); ++ enter: ++ assert("zam-881", JF_ISSET(node, JNODE_DIRTY)); ++ assert("zam-898", !JF_ISSET(node, JNODE_OVRWR)); ++ ++ if (JF_ISSET(node, JNODE_WRITEBACK)) { ++ /* move node to the end of atom's writeback list */ ++ list_move_tail(&node->capture_link, ATOM_WB_LIST(atom)); ++ ++ /* ++ * jnode is not necessarily on dirty list: if it was dirtied when ++ * it was on flush queue - it does not get moved to dirty list ++ */ ++ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), ++ WB_LIST, 1)); ++ ++ } else if (jnode_is_znode(node) ++ && znode_above_root(JZNODE(node))) { ++ /* ++ * A special case for znode-above-root. The above-root (fake) ++ * znode is captured and dirtied when the tree height changes or ++ * when the root node is relocated. This causes atoms to fuse so ++ * that changes at the root are serialized. However, this node is ++ * never flushed. This special case used to be in lock.c to ++ * prevent the above-root node from ever being captured, but now ++ * that it is captured we simply prevent it from flushing. The ++ * log-writer code relies on this to properly log superblock ++ * modifications of the tree height. ++ */ ++ jnode_make_wander_nolock(node); ++ } else if (JF_ISSET(node, JNODE_RELOC)) { ++ queue_jnode(fq, node); ++ ++(*nr_queued); ++ } else ++ break; ++ ++ spin_unlock_jnode(node); ++ } ++ return node; ++} ++ ++/* Flush some nodes of current atom, usually slum, return -E_REPEAT if there are more nodes ++ * to flush, return 0 if atom's dirty lists empty and keep current atom locked, return ++ * other errors as they are. */ ++int ++flush_current_atom(int flags, long nr_to_write, long *nr_submitted, ++ txn_atom ** atom, jnode *start) ++{ ++ reiser4_super_info_data *sinfo = get_current_super_private(); ++ flush_queue_t *fq = NULL; ++ jnode *node; ++ int nr_queued; ++ int ret; ++ ++ assert("zam-889", atom != NULL && *atom != NULL); ++ assert_spin_locked(&((*atom)->alock)); ++ assert("zam-892", get_current_context()->trans->atom == *atom); ++ ++ nr_to_write = LONG_MAX; ++ while (1) { ++ ret = reiser4_fq_by_atom(*atom, &fq); ++ if (ret != -E_REPEAT) ++ break; ++ *atom = get_current_atom_locked(); ++ } ++ if (ret) ++ return ret; ++ ++ assert_spin_locked(&((*atom)->alock)); ++ ++ /* parallel flushers limit */ ++ if (sinfo->tmgr.atom_max_flushers != 0) { ++ while ((*atom)->nr_flushers >= sinfo->tmgr.atom_max_flushers) { ++ /* An reiser4_atom_send_event() call is inside ++ reiser4_fq_put_nolock() which is called when flush is ++ finished and nr_flushers is decremented. */ ++ reiser4_atom_wait_event(*atom); ++ *atom = get_current_atom_locked(); ++ } ++ } ++ ++ /* count ourself as a flusher */ ++ (*atom)->nr_flushers++; ++ ++ writeout_mode_enable(); ++ ++ nr_queued = 0; ++ node = find_flush_start_jnode(start, *atom, fq, &nr_queued, flags); ++ ++ if (node == NULL) { ++ if (nr_queued == 0) { ++ (*atom)->nr_flushers--; ++ reiser4_fq_put_nolock(fq); ++ reiser4_atom_send_event(*atom); ++ /* current atom remains locked */ ++ writeout_mode_disable(); ++ return 0; ++ } ++ spin_unlock_atom(*atom); ++ } else { ++ jref(node); ++ BUG_ON((*atom)->super != node->tree->super); ++ spin_unlock_atom(*atom); ++ spin_unlock_jnode(node); ++ BUG_ON(nr_to_write == 0); ++ ret = jnode_flush(node, nr_to_write, nr_submitted, fq, flags); ++ jput(node); ++ } ++ ++ ret = ++ reiser4_write_fq(fq, nr_submitted, ++ WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM); ++ ++ *atom = get_current_atom_locked(); ++ (*atom)->nr_flushers--; ++ reiser4_fq_put_nolock(fq); ++ reiser4_atom_send_event(*atom); ++ spin_unlock_atom(*atom); ++ ++ writeout_mode_disable(); ++ ++ if (ret == 0) ++ ret = -E_REPEAT; ++ ++ return ret; ++} ++ ++/* REVERSE PARENT-FIRST RELOCATION POLICIES */ ++ ++/* This implements the is-it-close-enough-to-its-preceder? test for relocation in the ++ reverse parent-first relocate context. Here all we know is the preceder and the block ++ number. Since we are going in reverse, the preceder may still be relocated as well, so ++ we can't ask the block allocator "is there a closer block available to relocate?" here. ++ In the _forward_ parent-first relocate context (not here) we actually call the block ++ allocator to try and find a closer location. */ ++static int ++reverse_relocate_if_close_enough(const reiser4_block_nr * pblk, ++ const reiser4_block_nr * nblk) ++{ ++ reiser4_block_nr dist; ++ ++ assert("jmacd-7710", *pblk != 0 && *nblk != 0); ++ assert("jmacd-7711", !reiser4_blocknr_is_fake(pblk)); ++ assert("jmacd-7712", !reiser4_blocknr_is_fake(nblk)); ++ ++ /* Distance is the absolute value. */ ++ dist = (*pblk > *nblk) ? (*pblk - *nblk) : (*nblk - *pblk); ++ ++ /* If the block is less than FLUSH_RELOCATE_DISTANCE blocks away from its preceder ++ block, do not relocate. */ ++ if (dist <= get_current_super_private()->flush.relocate_distance) { ++ return 0; ++ } ++ ++ return 1; ++} ++ ++/* This function is a predicate that tests for relocation. Always called in the ++ reverse-parent-first context, when we are asking whether the current node should be ++ relocated in order to expand the flush by dirtying the parent level (and thus ++ proceeding to flush that level). When traversing in the forward parent-first direction ++ (not here), relocation decisions are handled in two places: allocate_znode() and ++ extent_needs_allocation(). */ ++static int ++reverse_relocate_test(jnode * node, const coord_t * parent_coord, ++ flush_pos_t * pos) ++{ ++ reiser4_block_nr pblk = 0; ++ reiser4_block_nr nblk = 0; ++ ++ assert("jmacd-8989", !jnode_is_root(node)); ++ ++ /* ++ * This function is called only from the ++ * reverse_relocate_check_dirty_parent() and only if the parent ++ * node is clean. This implies that the parent has the real (i.e., not ++ * fake) block number, and, so does the child, because otherwise the ++ * parent would be dirty. ++ */ ++ ++ /* New nodes are treated as if they are being relocated. */ ++ if (JF_ISSET (node, JNODE_CREATED) || ++ (pos->leaf_relocate && jnode_get_level(node) == LEAF_LEVEL)) { ++ return 1; ++ } ++ ++ /* Find the preceder. FIXME(B): When the child is an unformatted, previously ++ existing node, the coord may be leftmost even though the child is not the ++ parent-first preceder of the parent. If the first dirty node appears somewhere ++ in the middle of the first extent unit, this preceder calculation is wrong. ++ Needs more logic in here. */ ++ if (coord_is_leftmost_unit(parent_coord)) { ++ pblk = *znode_get_block(parent_coord->node); ++ } else { ++ pblk = pos->preceder.blk; ++ } ++ check_preceder(pblk); ++ ++ /* If (pblk == 0) then the preceder isn't allocated or isn't known: relocate. */ ++ if (pblk == 0) { ++ return 1; ++ } ++ ++ nblk = *jnode_get_block(node); ++ ++ if (reiser4_blocknr_is_fake(&nblk)) ++ /* child is unallocated, mark parent dirty */ ++ return 1; ++ ++ return reverse_relocate_if_close_enough(&pblk, &nblk); ++} ++ ++/* This function calls reverse_relocate_test to make a reverse-parent-first ++ relocation decision and then, if yes, it marks the parent dirty. */ ++static int ++reverse_relocate_check_dirty_parent(jnode * node, const coord_t * parent_coord, ++ flush_pos_t * pos) ++{ ++ int ret; ++ ++ if (!JF_ISSET(ZJNODE(parent_coord->node), JNODE_DIRTY)) { ++ ++ ret = reverse_relocate_test(node, parent_coord, pos); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ /* FIXME-ZAM ++ if parent is already relocated - we do not want to grab space, right? */ ++ if (ret == 1) { ++ int grabbed; ++ ++ grabbed = get_current_context()->grabbed_blocks; ++ if (reiser4_grab_space_force((__u64) 1, BA_RESERVED) != ++ 0) ++ reiser4_panic("umka-1250", ++ "No space left during flush."); ++ ++ assert("jmacd-18923", ++ znode_is_write_locked(parent_coord->node)); ++ znode_make_dirty(parent_coord->node); ++ grabbed2free_mark(grabbed); ++ } ++ } ++ ++ return 0; ++} ++ ++/* INITIAL ALLOCATE ANCESTORS STEP (REVERSE PARENT-FIRST ALLOCATION BEFORE FORWARD ++ PARENT-FIRST LOOP BEGINS) */ ++ ++/* Get the leftmost child for given coord. */ ++static int get_leftmost_child_of_unit(const coord_t * coord, jnode ** child) ++{ ++ int ret; ++ ++ ret = item_utmost_child(coord, LEFT_SIDE, child); ++ ++ if (ret) ++ return ret; ++ ++ if (IS_ERR(*child)) ++ return PTR_ERR(*child); ++ ++ return 0; ++} ++ ++/* This step occurs after the left- and right-scans are completed, before starting the ++ forward parent-first traversal. Here we attempt to allocate ancestors of the starting ++ flush point, which means continuing in the reverse parent-first direction to the ++ parent, grandparent, and so on (as long as the child is a leftmost child). This ++ routine calls a recursive process, alloc_one_ancestor, which does the real work, ++ except there is special-case handling here for the first ancestor, which may be a twig. ++ At each level (here and alloc_one_ancestor), we check for relocation and then, if ++ the child is a leftmost child, repeat at the next level. On the way back down (the ++ recursion), we allocate the ancestors in parent-first order. */ ++static int alloc_pos_and_ancestors(flush_pos_t * pos) ++{ ++ int ret = 0; ++ lock_handle plock; ++ load_count pload; ++ coord_t pcoord; ++ ++ if (znode_check_flushprepped(pos->lock.node)) ++ return 0; ++ ++ coord_init_invalid(&pcoord, NULL); ++ init_lh(&plock); ++ init_load_count(&pload); ++ ++ if (pos->state == POS_ON_EPOINT) { ++ /* a special case for pos on twig level, where we already have ++ a lock on parent node. */ ++ /* The parent may not be dirty, in which case we should decide ++ whether to relocate the child now. If decision is made to ++ relocate the child, the parent is marked dirty. */ ++ ret = ++ reverse_relocate_check_dirty_parent(pos->child, &pos->coord, ++ pos); ++ if (ret) ++ goto exit; ++ ++ /* FIXME_NFQUCMPD: We only need to allocate the twig (if child ++ is leftmost) and the leaf/child, so recursion is not needed. ++ Levels above the twig will be allocated for ++ write-optimization before the transaction commits. */ ++ ++ /* Do the recursive step, allocating zero or more of our ++ * ancestors. */ ++ ret = alloc_one_ancestor(&pos->coord, pos); ++ ++ } else { ++ if (!znode_is_root(pos->lock.node)) { ++ /* all formatted nodes except tree root */ ++ ret = ++ reiser4_get_parent(&plock, pos->lock.node, ++ ZNODE_WRITE_LOCK); ++ if (ret) ++ goto exit; ++ ++ ret = incr_load_count_znode(&pload, plock.node); ++ if (ret) ++ goto exit; ++ ++ ret = ++ find_child_ptr(plock.node, pos->lock.node, &pcoord); ++ if (ret) ++ goto exit; ++ ++ ret = ++ reverse_relocate_check_dirty_parent(ZJNODE ++ (pos->lock. ++ node), &pcoord, ++ pos); ++ if (ret) ++ goto exit; ++ ++ ret = alloc_one_ancestor(&pcoord, pos); ++ if (ret) ++ goto exit; ++ } ++ ++ ret = allocate_znode(pos->lock.node, &pcoord, pos); ++ } ++ exit: ++ done_load_count(&pload); ++ done_lh(&plock); ++ return ret; ++} ++ ++/* This is the recursive step described in alloc_pos_and_ancestors, above. Ignoring the ++ call to set_preceder, which is the next function described, this checks if the ++ child is a leftmost child and returns if it is not. If the child is a leftmost child ++ it checks for relocation, possibly dirtying the parent. Then it performs the recursive ++ step. */ ++static int alloc_one_ancestor(const coord_t * coord, flush_pos_t * pos) ++{ ++ int ret = 0; ++ lock_handle alock; ++ load_count aload; ++ coord_t acoord; ++ ++ /* As we ascend at the left-edge of the region to flush, take this opportunity at ++ the twig level to find our parent-first preceder unless we have already set ++ it. */ ++ if (pos->preceder.blk == 0) { ++ ret = set_preceder(coord, pos); ++ if (ret != 0) ++ return ret; ++ } ++ ++ /* If the ancestor is clean or already allocated, or if the child is not a ++ leftmost child, stop going up, even leaving coord->node not flushprepped. */ ++ if (znode_check_flushprepped(coord->node) ++ || !coord_is_leftmost_unit(coord)) ++ return 0; ++ ++ init_lh(&alock); ++ init_load_count(&aload); ++ coord_init_invalid(&acoord, NULL); ++ ++ /* Only ascend to the next level if it is a leftmost child, but write-lock the ++ parent in case we will relocate the child. */ ++ if (!znode_is_root(coord->node)) { ++ ++ ret = ++ jnode_lock_parent_coord(ZJNODE(coord->node), &acoord, ++ &alock, &aload, ZNODE_WRITE_LOCK, ++ 0); ++ if (ret != 0) { ++ /* FIXME(C): check EINVAL, E_DEADLOCK */ ++ goto exit; ++ } ++ ++ ret = ++ reverse_relocate_check_dirty_parent(ZJNODE(coord->node), ++ &acoord, pos); ++ if (ret != 0) { ++ goto exit; ++ } ++ ++ /* Recursive call. */ ++ if (!znode_check_flushprepped(acoord.node)) { ++ ret = alloc_one_ancestor(&acoord, pos); ++ if (ret) ++ goto exit; ++ } ++ } ++ ++ /* Note: we call allocate with the parent write-locked (except at the root) in ++ case we relocate the child, in which case it will modify the parent during this ++ call. */ ++ ret = allocate_znode(coord->node, &acoord, pos); ++ ++ exit: ++ done_load_count(&aload); ++ done_lh(&alock); ++ return ret; ++} ++ ++/* During the reverse parent-first alloc_pos_and_ancestors process described above there is ++ a call to this function at the twig level. During alloc_pos_and_ancestors we may ask: ++ should this node be relocated (in reverse parent-first context)? We repeat this ++ process as long as the child is the leftmost child, eventually reaching an ancestor of ++ the flush point that is not a leftmost child. The preceder of that ancestors, which is ++ not a leftmost child, is actually on the leaf level. The preceder of that block is the ++ left-neighbor of the flush point. The preceder of that block is the rightmost child of ++ the twig on the left. So, when alloc_pos_and_ancestors passes upward through the twig ++ level, it stops momentarily to remember the block of the rightmost child of the twig on ++ the left and sets it to the flush_position's preceder_hint. ++ ++ There is one other place where we may set the flush_position's preceder hint, which is ++ during scan-left. ++*/ ++static int set_preceder(const coord_t * coord_in, flush_pos_t * pos) ++{ ++ int ret; ++ coord_t coord; ++ lock_handle left_lock; ++ load_count left_load; ++ ++ coord_dup(&coord, coord_in); ++ ++ init_lh(&left_lock); ++ init_load_count(&left_load); ++ ++ /* FIXME(B): Same FIXME as in "Find the preceder" in reverse_relocate_test. ++ coord_is_leftmost_unit is not the right test if the unformatted child is in the ++ middle of the first extent unit. */ ++ if (!coord_is_leftmost_unit(&coord)) { ++ coord_prev_unit(&coord); ++ } else { ++ ret = ++ reiser4_get_left_neighbor(&left_lock, coord.node, ++ ZNODE_READ_LOCK, GN_SAME_ATOM); ++ if (ret) { ++ /* If we fail for any reason it doesn't matter because the ++ preceder is only a hint. We are low-priority at this point, so ++ this must be the case. */ ++ if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR || ++ ret == -ENOENT || ret == -EINVAL ++ || ret == -E_DEADLOCK) { ++ ret = 0; ++ } ++ goto exit; ++ } ++ ++ ret = incr_load_count_znode(&left_load, left_lock.node); ++ if (ret) ++ goto exit; ++ ++ coord_init_last_unit(&coord, left_lock.node); ++ } ++ ++ ret = ++ item_utmost_child_real_block(&coord, RIGHT_SIDE, ++ &pos->preceder.blk); ++ exit: ++ check_preceder(pos->preceder.blk); ++ done_load_count(&left_load); ++ done_lh(&left_lock); ++ return ret; ++} ++ ++/* MAIN SQUEEZE AND ALLOCATE LOOP (THREE BIG FUNCTIONS) */ ++ ++/* This procedure implements the outer loop of the flush algorithm. To put this in ++ context, here is the general list of steps taken by the flush routine as a whole: ++ ++ 1. Scan-left ++ 2. Scan-right (maybe) ++ 3. Allocate initial flush position and its ancestors ++ 4. ++ 5. ++ 6. ++ ++ This procedure implements the loop in steps 4 through 6 in the above listing. ++ ++ Step 4: if the current flush position is an extent item (position on the twig level), ++ it allocates the extent (allocate_extent_item_in_place) then shifts to the next ++ coordinate. If the next coordinate's leftmost child needs flushprep, we will continue. ++ If the next coordinate is an internal item, we descend back to the leaf level, ++ otherwise we repeat a step #4 (labeled ALLOC_EXTENTS below). If the "next coordinate" ++ brings us past the end of the twig level, then we call ++ reverse_relocate_end_of_twig to possibly dirty the next (right) twig, prior to ++ step #5 which moves to the right. ++ ++ Step 5: calls squalloc_changed_ancestors, which initiates a recursive call up the ++ tree to allocate any ancestors of the next-right flush position that are not also ++ ancestors of the current position. Those ancestors (in top-down order) are the next in ++ parent-first order. We squeeze adjacent nodes on the way up until the right node and ++ current node share the same parent, then allocate on the way back down. Finally, this ++ step sets the flush position to the next-right node. Then repeat steps 4 and 5. ++*/ ++ ++/* SQUEEZE CODE */ ++ ++/* squalloc_right_twig helper function, cut a range of extent items from ++ cut node to->node from the beginning up to coord @to. */ ++static int squalloc_right_twig_cut(coord_t * to, reiser4_key * to_key, ++ znode * left) ++{ ++ coord_t from; ++ reiser4_key from_key; ++ ++ coord_init_first_unit(&from, to->node); ++ item_key_by_coord(&from, &from_key); ++ ++ return cut_node_content(&from, to, &from_key, to_key, NULL); ++} ++ ++/* Copy as much of the leading extents from @right to @left, allocating ++ unallocated extents as they are copied. Returns SQUEEZE_TARGET_FULL or ++ SQUEEZE_SOURCE_EMPTY when no more can be shifted. If the next item is an ++ internal item it calls shift_one_internal_unit and may then return ++ SUBTREE_MOVED. */ ++static int squeeze_right_twig(znode * left, znode * right, flush_pos_t * pos) ++{ ++ int ret = SUBTREE_MOVED; ++ coord_t coord; /* used to iterate over items */ ++ reiser4_key stop_key; ++ ++ assert("jmacd-2008", !node_is_empty(right)); ++ coord_init_first_unit(&coord, right); ++ ++ /* FIXME: can be optimized to cut once */ ++ while (!node_is_empty(coord.node) && item_is_extent(&coord)) { ++ ON_DEBUG(void *vp); ++ ++ assert("vs-1468", coord_is_leftmost_unit(&coord)); ++ ON_DEBUG(vp = shift_check_prepare(left, coord.node)); ++ ++ /* stop_key is used to find what was copied and what to cut */ ++ stop_key = *reiser4_min_key(); ++ ret = squalloc_extent(left, &coord, pos, &stop_key); ++ if (ret != SQUEEZE_CONTINUE) { ++ ON_DEBUG(kfree(vp)); ++ break; ++ } ++ assert("vs-1465", !keyeq(&stop_key, reiser4_min_key())); ++ ++ /* Helper function to do the cutting. */ ++ set_key_offset(&stop_key, get_key_offset(&stop_key) - 1); ++ check_me("vs-1466", ++ squalloc_right_twig_cut(&coord, &stop_key, left) == 0); ++ ++ ON_DEBUG(shift_check(vp, left, coord.node)); ++ } ++ ++ if (node_is_empty(coord.node)) ++ ret = SQUEEZE_SOURCE_EMPTY; ++ ++ if (ret == SQUEEZE_TARGET_FULL) { ++ goto out; ++ } ++ ++ if (node_is_empty(right)) { ++ /* The whole right node was copied into @left. */ ++ assert("vs-464", ret == SQUEEZE_SOURCE_EMPTY); ++ goto out; ++ } ++ ++ coord_init_first_unit(&coord, right); ++ ++ if (!item_is_internal(&coord)) { ++ /* we do not want to squeeze anything else to left neighbor because "slum" ++ is over */ ++ ret = SQUEEZE_TARGET_FULL; ++ goto out; ++ } ++ assert("jmacd-433", item_is_internal(&coord)); ++ ++ /* Shift an internal unit. The child must be allocated before shifting any more ++ extents, so we stop here. */ ++ ret = shift_one_internal_unit(left, right); ++ ++ out: ++ assert("jmacd-8612", ret < 0 || ret == SQUEEZE_TARGET_FULL ++ || ret == SUBTREE_MOVED || ret == SQUEEZE_SOURCE_EMPTY); ++ ++ if (ret == SQUEEZE_TARGET_FULL) { ++ /* We submit prepped nodes here and expect that this @left twig ++ * will not be modified again during this jnode_flush() call. */ ++ int ret1; ++ ++ /* NOTE: seems like io is done under long term locks. */ ++ ret1 = write_prepped_nodes(pos); ++ if (ret1 < 0) ++ return ret1; ++ } ++ ++ return ret; ++} ++ ++#if REISER4_DEBUG ++static void item_convert_invariant(flush_pos_t * pos) ++{ ++ assert("edward-1225", coord_is_existing_item(&pos->coord)); ++ if (chaining_data_present(pos)) { ++ item_plugin *iplug = item_convert_plug(pos); ++ ++ assert("edward-1000", ++ iplug == item_plugin_by_coord(&pos->coord)); ++ assert("edward-1001", iplug->f.convert != NULL); ++ } else ++ assert("edward-1226", pos->child == NULL); ++} ++#else ++ ++#define item_convert_invariant(pos) noop ++ ++#endif ++ ++/* Scan node items starting from the first one and apply for each ++ item its flush ->convert() method (if any). This method may ++ resize/kill the item so the tree will be changed. ++*/ ++static int convert_node(flush_pos_t * pos, znode * node) ++{ ++ int ret = 0; ++ item_plugin *iplug; ++ ++ assert("edward-304", pos != NULL); ++ assert("edward-305", pos->child == NULL); ++ assert("edward-475", znode_convertible(node)); ++ assert("edward-669", znode_is_wlocked(node)); ++ assert("edward-1210", !node_is_empty(node)); ++ ++ if (znode_get_level(node) != LEAF_LEVEL) ++ /* unsupported */ ++ goto exit; ++ ++ coord_init_first_unit(&pos->coord, node); ++ ++ while (1) { ++ ret = 0; ++ coord_set_to_left(&pos->coord); ++ item_convert_invariant(pos); ++ ++ iplug = item_plugin_by_coord(&pos->coord); ++ assert("edward-844", iplug != NULL); ++ ++ if (iplug->f.convert) { ++ ret = iplug->f.convert(pos); ++ if (ret) ++ goto exit; ++ } ++ assert("edward-307", pos->child == NULL); ++ ++ if (coord_next_item(&pos->coord)) { ++ /* node is over */ ++ ++ if (!chaining_data_present(pos)) ++ /* finished this node */ ++ break; ++ if (should_chain_next_node(pos)) { ++ /* go to next node */ ++ move_chaining_data(pos, 0 /* to next node */ ); ++ break; ++ } ++ /* repeat this node */ ++ move_chaining_data(pos, 1 /* this node */ ); ++ continue; ++ } ++ /* Node is not over. ++ Check if there is attached convert data. ++ If so roll one item position back and repeat ++ on this node ++ */ ++ if (chaining_data_present(pos)) { ++ ++ if (iplug != item_plugin_by_coord(&pos->coord)) ++ set_item_convert_count(pos, 0); ++ ++ ret = coord_prev_item(&pos->coord); ++ assert("edward-1003", !ret); ++ ++ move_chaining_data(pos, 1 /* this node */ ); ++ } ++ } ++ JF_CLR(ZJNODE(node), JNODE_CONVERTIBLE); ++ znode_make_dirty(node); ++ exit: ++ assert("edward-1004", !ret); ++ return ret; ++} ++ ++/* Squeeze and allocate the right neighbor. This is called after @left and ++ its current children have been squeezed and allocated already. This ++ procedure's job is to squeeze and items from @right to @left. ++ ++ If at the leaf level, use the shift_everything_left memcpy-optimized ++ version of shifting (squeeze_right_leaf). ++ ++ If at the twig level, extents are allocated as they are shifted from @right ++ to @left (squalloc_right_twig). ++ ++ At any other level, shift one internal item and return to the caller ++ (squalloc_parent_first) so that the shifted-subtree can be processed in ++ parent-first order. ++ ++ When unit of internal item is moved, squeezing stops and SUBTREE_MOVED is ++ returned. When all content of @right is squeezed, SQUEEZE_SOURCE_EMPTY is ++ returned. If nothing can be moved into @left anymore, SQUEEZE_TARGET_FULL ++ is returned. ++*/ ++ ++static int squeeze_right_neighbor(flush_pos_t * pos, znode * left, ++ znode * right) ++{ ++ int ret; ++ ++ /* FIXME it is possible to see empty hasn't-heard-banshee node in a ++ * tree owing to error (for example, ENOSPC) in write */ ++ /* assert("jmacd-9321", !node_is_empty(left)); */ ++ assert("jmacd-9322", !node_is_empty(right)); ++ assert("jmacd-9323", znode_get_level(left) == znode_get_level(right)); ++ ++ switch (znode_get_level(left)) { ++ case TWIG_LEVEL: ++ /* Shift with extent allocating until either an internal item ++ is encountered or everything is shifted or no free space ++ left in @left */ ++ ret = squeeze_right_twig(left, right, pos); ++ break; ++ ++ default: ++ /* All other levels can use shift_everything until we implement per-item ++ flush plugins. */ ++ ret = squeeze_right_non_twig(left, right); ++ break; ++ } ++ ++ assert("jmacd-2011", (ret < 0 || ++ ret == SQUEEZE_SOURCE_EMPTY ++ || ret == SQUEEZE_TARGET_FULL ++ || ret == SUBTREE_MOVED)); ++ return ret; ++} ++ ++static int squeeze_right_twig_and_advance_coord(flush_pos_t * pos, ++ znode * right) ++{ ++ int ret; ++ ++ ret = squeeze_right_twig(pos->lock.node, right, pos); ++ if (ret < 0) ++ return ret; ++ if (ret > 0) { ++ coord_init_after_last_item(&pos->coord, pos->lock.node); ++ return ret; ++ } ++ ++ coord_init_last_unit(&pos->coord, pos->lock.node); ++ return 0; ++} ++ ++/* forward declaration */ ++static int squalloc_upper_levels(flush_pos_t *, znode *, znode *); ++ ++/* do a fast check for "same parents" condition before calling ++ * squalloc_upper_levels() */ ++static inline int check_parents_and_squalloc_upper_levels(flush_pos_t * pos, ++ znode * left, ++ znode * right) ++{ ++ if (znode_same_parents(left, right)) ++ return 0; ++ ++ return squalloc_upper_levels(pos, left, right); ++} ++ ++/* Check whether the parent of given @right node needs to be processes ++ ((re)allocated) prior to processing of the child. If @left and @right do not ++ share at least the parent of the @right is after the @left but before the ++ @right in parent-first order, we have to (re)allocate it before the @right ++ gets (re)allocated. */ ++static int squalloc_upper_levels(flush_pos_t * pos, znode * left, znode * right) ++{ ++ int ret; ++ ++ lock_handle left_parent_lock; ++ lock_handle right_parent_lock; ++ ++ load_count left_parent_load; ++ load_count right_parent_load; ++ ++ init_lh(&left_parent_lock); ++ init_lh(&right_parent_lock); ++ ++ init_load_count(&left_parent_load); ++ init_load_count(&right_parent_load); ++ ++ ret = reiser4_get_parent(&left_parent_lock, left, ZNODE_WRITE_LOCK); ++ if (ret) ++ goto out; ++ ++ ret = reiser4_get_parent(&right_parent_lock, right, ZNODE_WRITE_LOCK); ++ if (ret) ++ goto out; ++ ++ /* Check for same parents */ ++ if (left_parent_lock.node == right_parent_lock.node) ++ goto out; ++ ++ if (znode_check_flushprepped(right_parent_lock.node)) { ++ /* Keep parent-first order. In the order, the right parent node stands ++ before the @right node. If it is already allocated, we set the ++ preceder (next block search start point) to its block number, @right ++ node should be allocated after it. ++ ++ However, preceder is set only if the right parent is on twig level. ++ The explanation is the following: new branch nodes are allocated over ++ already allocated children while the tree grows, it is difficult to ++ keep tree ordered, we assume that only leaves and twings are correctly ++ allocated. So, only twigs are used as a preceder for allocating of the ++ rest of the slum. */ ++ if (znode_get_level(right_parent_lock.node) == TWIG_LEVEL) { ++ pos->preceder.blk = ++ *znode_get_block(right_parent_lock.node); ++ check_preceder(pos->preceder.blk); ++ } ++ goto out; ++ } ++ ++ ret = incr_load_count_znode(&left_parent_load, left_parent_lock.node); ++ if (ret) ++ goto out; ++ ++ ret = incr_load_count_znode(&right_parent_load, right_parent_lock.node); ++ if (ret) ++ goto out; ++ ++ ret = ++ squeeze_right_neighbor(pos, left_parent_lock.node, ++ right_parent_lock.node); ++ /* We stop if error. We stop if some items/units were shifted (ret == 0) ++ * and thus @right changed its parent. It means we have not process ++ * right_parent node prior to processing of @right. Positive return ++ * values say that shifting items was not happen because of "empty ++ * source" or "target full" conditions. */ ++ if (ret <= 0) ++ goto out; ++ ++ /* parent(@left) and parent(@right) may have different parents also. We ++ * do a recursive call for checking that. */ ++ ret = ++ check_parents_and_squalloc_upper_levels(pos, left_parent_lock.node, ++ right_parent_lock.node); ++ if (ret) ++ goto out; ++ ++ /* allocate znode when going down */ ++ ret = lock_parent_and_allocate_znode(right_parent_lock.node, pos); ++ ++ out: ++ done_load_count(&left_parent_load); ++ done_load_count(&right_parent_load); ++ ++ done_lh(&left_parent_lock); ++ done_lh(&right_parent_lock); ++ ++ return ret; ++} ++ ++/* Check the leftmost child "flushprepped" status, also returns true if child ++ * node was not found in cache. */ ++static int leftmost_child_of_unit_check_flushprepped(const coord_t * coord) ++{ ++ int ret; ++ int prepped; ++ ++ jnode *child; ++ ++ ret = get_leftmost_child_of_unit(coord, &child); ++ ++ if (ret) ++ return ret; ++ ++ if (child) { ++ prepped = jnode_check_flushprepped(child); ++ jput(child); ++ } else { ++ /* We consider not existing child as a node which slum ++ processing should not continue to. Not cached node is clean, ++ so it is flushprepped. */ ++ prepped = 1; ++ } ++ ++ return prepped; ++} ++ ++/* (re)allocate znode with automated getting parent node */ ++static int lock_parent_and_allocate_znode(znode * node, flush_pos_t * pos) ++{ ++ int ret; ++ lock_handle parent_lock; ++ load_count parent_load; ++ coord_t pcoord; ++ ++ assert("zam-851", znode_is_write_locked(node)); ++ ++ init_lh(&parent_lock); ++ init_load_count(&parent_load); ++ ++ ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK); ++ if (ret) ++ goto out; ++ ++ ret = incr_load_count_znode(&parent_load, parent_lock.node); ++ if (ret) ++ goto out; ++ ++ ret = find_child_ptr(parent_lock.node, node, &pcoord); ++ if (ret) ++ goto out; ++ ++ ret = allocate_znode(node, &pcoord, pos); ++ ++ out: ++ done_load_count(&parent_load); ++ done_lh(&parent_lock); ++ return ret; ++} ++ ++/* Process nodes on leaf level until unformatted node or rightmost node in the ++ * slum reached. */ ++static int handle_pos_on_formatted(flush_pos_t * pos) ++{ ++ int ret; ++ lock_handle right_lock; ++ load_count right_load; ++ ++ init_lh(&right_lock); ++ init_load_count(&right_load); ++ ++ if (should_convert_node(pos, pos->lock.node)) { ++ ret = convert_node(pos, pos->lock.node); ++ if (ret) ++ return ret; ++ } ++ ++ while (1) { ++ ret = ++ neighbor_in_slum(pos->lock.node, &right_lock, RIGHT_SIDE, ++ ZNODE_WRITE_LOCK, ++ !should_convert_next_node(pos, ++ right_lock. ++ node)); ++ if (ret) ++ break; ++ ++ /* we don't prep(allocate) nodes for flushing twice. This can be suboptimal, or it ++ * can be optimal. For now we choose to live with the risk that it will ++ * be suboptimal because it would be quite complex to code it to be ++ * smarter. */ ++ if (znode_check_flushprepped(right_lock.node) ++ && !znode_convertible(right_lock.node)) { ++ assert("edward-1005", ++ !should_convert_next_node(pos, right_lock.node)); ++ pos_stop(pos); ++ break; ++ } ++ ++ ret = incr_load_count_znode(&right_load, right_lock.node); ++ if (ret) ++ break; ++ ++ if (should_convert_node(pos, right_lock.node)) { ++ ret = convert_node(pos, right_lock.node); ++ if (ret) ++ break; ++ if (node_is_empty(right_lock.node)) { ++ /* node became empty after converting, repeat */ ++ done_load_count(&right_load); ++ done_lh(&right_lock); ++ continue; ++ } ++ } ++ ++ /* squeeze _before_ going upward. */ ++ ret = ++ squeeze_right_neighbor(pos, pos->lock.node, ++ right_lock.node); ++ if (ret < 0) ++ break; ++ ++ if (znode_check_flushprepped(right_lock.node)) { ++ if (should_convert_next_node(pos, right_lock.node)) { ++ /* in spite of flushprepped status of the node, ++ its right slum neighbor should be converted */ ++ assert("edward-953", convert_data(pos)); ++ assert("edward-954", item_convert_data(pos)); ++ ++ if (node_is_empty(right_lock.node)) { ++ done_load_count(&right_load); ++ done_lh(&right_lock); ++ } else ++ move_flush_pos(pos, &right_lock, ++ &right_load, NULL); ++ continue; ++ } ++ pos_stop(pos); ++ break; ++ } ++ ++ if (node_is_empty(right_lock.node)) { ++ /* repeat if right node was squeezed completely */ ++ done_load_count(&right_load); ++ done_lh(&right_lock); ++ continue; ++ } ++ ++ /* parent(right_lock.node) has to be processed before ++ * (right_lock.node) due to "parent-first" allocation order. */ ++ ret = ++ check_parents_and_squalloc_upper_levels(pos, pos->lock.node, ++ right_lock.node); ++ if (ret) ++ break; ++ /* (re)allocate _after_ going upward */ ++ ret = lock_parent_and_allocate_znode(right_lock.node, pos); ++ if (ret) ++ break; ++ ++ if (should_terminate_squalloc(pos)) { ++ set_item_convert_count(pos, 0); ++ break; ++ } ++ ++ /* advance the flush position to the right neighbor */ ++ move_flush_pos(pos, &right_lock, &right_load, NULL); ++ ++ ret = rapid_flush(pos); ++ if (ret) ++ break; ++ } ++ ++ assert("edward-1006", !convert_data(pos) || !item_convert_data(pos)); ++ ++ done_load_count(&right_load); ++ done_lh(&right_lock); ++ ++ /* This function indicates via pos whether to stop or go to twig or continue on current ++ * level. */ ++ return ret; ++ ++} ++ ++/* Process nodes on leaf level until unformatted node or rightmost node in the ++ * slum reached. */ ++static int handle_pos_on_leaf(flush_pos_t * pos) ++{ ++ int ret; ++ ++ assert("zam-845", pos->state == POS_ON_LEAF); ++ ++ ret = handle_pos_on_formatted(pos); ++ ++ if (ret == -E_NO_NEIGHBOR) { ++ /* cannot get right neighbor, go process extents. */ ++ pos->state = POS_TO_TWIG; ++ return 0; ++ } ++ ++ return ret; ++} ++ ++/* Process slum on level > 1 */ ++static int handle_pos_on_internal(flush_pos_t * pos) ++{ ++ assert("zam-850", pos->state == POS_ON_INTERNAL); ++ return handle_pos_on_formatted(pos); ++} ++ ++/* check whether squalloc should stop before processing given extent */ ++static int squalloc_extent_should_stop(flush_pos_t * pos) ++{ ++ assert("zam-869", item_is_extent(&pos->coord)); ++ ++ /* pos->child is a jnode handle_pos_on_extent() should start with in ++ * stead of the first child of the first extent unit. */ ++ if (pos->child) { ++ int prepped; ++ ++ assert("vs-1383", jnode_is_unformatted(pos->child)); ++ prepped = jnode_check_flushprepped(pos->child); ++ pos->pos_in_unit = ++ jnode_get_index(pos->child) - ++ extent_unit_index(&pos->coord); ++ assert("vs-1470", ++ pos->pos_in_unit < extent_unit_width(&pos->coord)); ++ assert("nikita-3434", ++ ergo(extent_is_unallocated(&pos->coord), ++ pos->pos_in_unit == 0)); ++ jput(pos->child); ++ pos->child = NULL; ++ ++ return prepped; ++ } ++ ++ pos->pos_in_unit = 0; ++ if (extent_is_unallocated(&pos->coord)) ++ return 0; ++ ++ return leftmost_child_of_unit_check_flushprepped(&pos->coord); ++} ++ ++/* Handle the case when regular reiser4 tree (znodes connected one to its ++ * neighbors by sibling pointers) is interrupted on leaf level by one or more ++ * unformatted nodes. By having a lock on twig level and use extent code ++ * routines to process unformatted nodes we swim around an irregular part of ++ * reiser4 tree. */ ++static int handle_pos_on_twig(flush_pos_t * pos) ++{ ++ int ret; ++ ++ assert("zam-844", pos->state == POS_ON_EPOINT); ++ assert("zam-843", item_is_extent(&pos->coord)); ++ ++ /* We decide should we continue slum processing with current extent ++ unit: if leftmost child of current extent unit is flushprepped ++ (i.e. clean or already processed by flush) we stop squalloc(). There ++ is a fast check for unallocated extents which we assume contain all ++ not flushprepped nodes. */ ++ /* FIXME: Here we implement simple check, we are only looking on the ++ leftmost child. */ ++ ret = squalloc_extent_should_stop(pos); ++ if (ret != 0) { ++ pos_stop(pos); ++ return ret; ++ } ++ ++ while (pos_valid(pos) && coord_is_existing_unit(&pos->coord) ++ && item_is_extent(&pos->coord)) { ++ ret = reiser4_alloc_extent(pos); ++ if (ret) { ++ break; ++ } ++ coord_next_unit(&pos->coord); ++ } ++ ++ if (coord_is_after_rightmost(&pos->coord)) { ++ pos->state = POS_END_OF_TWIG; ++ return 0; ++ } ++ if (item_is_internal(&pos->coord)) { ++ pos->state = POS_TO_LEAF; ++ return 0; ++ } ++ ++ assert("zam-860", item_is_extent(&pos->coord)); ++ ++ /* "slum" is over */ ++ pos->state = POS_INVALID; ++ return 0; ++} ++ ++/* When we about to return flush position from twig to leaf level we can process ++ * the right twig node or move position to the leaf. This processes right twig ++ * if it is possible and jump to leaf level if not. */ ++static int handle_pos_end_of_twig(flush_pos_t * pos) ++{ ++ int ret; ++ lock_handle right_lock; ++ load_count right_load; ++ coord_t at_right; ++ jnode *child = NULL; ++ ++ assert("zam-848", pos->state == POS_END_OF_TWIG); ++ assert("zam-849", coord_is_after_rightmost(&pos->coord)); ++ ++ init_lh(&right_lock); ++ init_load_count(&right_load); ++ ++ /* We get a lock on the right twig node even it is not dirty because ++ * slum continues or discontinues on leaf level not on next twig. This ++ * lock on the right twig is needed for getting its leftmost child. */ ++ ret = ++ reiser4_get_right_neighbor(&right_lock, pos->lock.node, ++ ZNODE_WRITE_LOCK, GN_SAME_ATOM); ++ if (ret) ++ goto out; ++ ++ ret = incr_load_count_znode(&right_load, right_lock.node); ++ if (ret) ++ goto out; ++ ++ /* right twig could be not dirty */ ++ if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY)) { ++ /* If right twig node is dirty we always attempt to squeeze it ++ * content to the left... */ ++ became_dirty: ++ ret = ++ squeeze_right_twig_and_advance_coord(pos, right_lock.node); ++ if (ret <= 0) { ++ /* pos->coord is on internal item, go to leaf level, or ++ * we have an error which will be caught in squalloc() */ ++ pos->state = POS_TO_LEAF; ++ goto out; ++ } ++ ++ /* If right twig was squeezed completely we wave to re-lock ++ * right twig. now it is done through the top-level squalloc ++ * routine. */ ++ if (node_is_empty(right_lock.node)) ++ goto out; ++ ++ /* ... and prep it if it is not yet prepped */ ++ if (!znode_check_flushprepped(right_lock.node)) { ++ /* As usual, process parent before ... */ ++ ret = ++ check_parents_and_squalloc_upper_levels(pos, ++ pos->lock. ++ node, ++ right_lock. ++ node); ++ if (ret) ++ goto out; ++ ++ /* ... processing the child */ ++ ret = ++ lock_parent_and_allocate_znode(right_lock.node, ++ pos); ++ if (ret) ++ goto out; ++ } ++ } else { ++ coord_init_first_unit(&at_right, right_lock.node); ++ ++ /* check first child of next twig, should we continue there ? */ ++ ret = get_leftmost_child_of_unit(&at_right, &child); ++ if (ret || child == NULL || jnode_check_flushprepped(child)) { ++ pos_stop(pos); ++ goto out; ++ } ++ ++ /* check clean twig for possible relocation */ ++ if (!znode_check_flushprepped(right_lock.node)) { ++ ret = ++ reverse_relocate_check_dirty_parent(child, ++ &at_right, pos); ++ if (ret) ++ goto out; ++ if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY)) ++ goto became_dirty; ++ } ++ } ++ ++ assert("zam-875", znode_check_flushprepped(right_lock.node)); ++ ++ /* Update the preceder by a block number of just processed right twig ++ * node. The code above could miss the preceder updating because ++ * allocate_znode() could not be called for this node. */ ++ pos->preceder.blk = *znode_get_block(right_lock.node); ++ check_preceder(pos->preceder.blk); ++ ++ coord_init_first_unit(&at_right, right_lock.node); ++ assert("zam-868", coord_is_existing_unit(&at_right)); ++ ++ pos->state = item_is_extent(&at_right) ? POS_ON_EPOINT : POS_TO_LEAF; ++ move_flush_pos(pos, &right_lock, &right_load, &at_right); ++ ++ out: ++ done_load_count(&right_load); ++ done_lh(&right_lock); ++ ++ if (child) ++ jput(child); ++ ++ return ret; ++} ++ ++/* Move the pos->lock to leaf node pointed by pos->coord, check should we ++ * continue there. */ ++static int handle_pos_to_leaf(flush_pos_t * pos) ++{ ++ int ret; ++ lock_handle child_lock; ++ load_count child_load; ++ jnode *child; ++ ++ assert("zam-846", pos->state == POS_TO_LEAF); ++ assert("zam-847", item_is_internal(&pos->coord)); ++ ++ init_lh(&child_lock); ++ init_load_count(&child_load); ++ ++ ret = get_leftmost_child_of_unit(&pos->coord, &child); ++ if (ret) ++ return ret; ++ if (child == NULL) { ++ pos_stop(pos); ++ return 0; ++ } ++ ++ if (jnode_check_flushprepped(child)) { ++ pos->state = POS_INVALID; ++ goto out; ++ } ++ ++ ret = ++ longterm_lock_znode(&child_lock, JZNODE(child), ZNODE_WRITE_LOCK, ++ ZNODE_LOCK_LOPRI); ++ if (ret) ++ goto out; ++ ++ ret = incr_load_count_znode(&child_load, JZNODE(child)); ++ if (ret) ++ goto out; ++ ++ ret = allocate_znode(JZNODE(child), &pos->coord, pos); ++ if (ret) ++ goto out; ++ ++ /* move flush position to leaf level */ ++ pos->state = POS_ON_LEAF; ++ move_flush_pos(pos, &child_lock, &child_load, NULL); ++ ++ if (node_is_empty(JZNODE(child))) { ++ ret = delete_empty_node(JZNODE(child)); ++ pos->state = POS_INVALID; ++ } ++ out: ++ done_load_count(&child_load); ++ done_lh(&child_lock); ++ jput(child); ++ ++ return ret; ++} ++ ++/* move pos from leaf to twig, and move lock from leaf to twig. */ ++/* Move pos->lock to upper (twig) level */ ++static int handle_pos_to_twig(flush_pos_t * pos) ++{ ++ int ret; ++ ++ lock_handle parent_lock; ++ load_count parent_load; ++ coord_t pcoord; ++ ++ assert("zam-852", pos->state == POS_TO_TWIG); ++ ++ init_lh(&parent_lock); ++ init_load_count(&parent_load); ++ ++ ret = ++ reiser4_get_parent(&parent_lock, pos->lock.node, ZNODE_WRITE_LOCK); ++ if (ret) ++ goto out; ++ ++ ret = incr_load_count_znode(&parent_load, parent_lock.node); ++ if (ret) ++ goto out; ++ ++ ret = find_child_ptr(parent_lock.node, pos->lock.node, &pcoord); ++ if (ret) ++ goto out; ++ ++ assert("zam-870", item_is_internal(&pcoord)); ++ coord_next_item(&pcoord); ++ ++ if (coord_is_after_rightmost(&pcoord)) ++ pos->state = POS_END_OF_TWIG; ++ else if (item_is_extent(&pcoord)) ++ pos->state = POS_ON_EPOINT; ++ else { ++ /* Here we understand that getting -E_NO_NEIGHBOR in ++ * handle_pos_on_leaf() was because of just a reaching edge of ++ * slum */ ++ pos_stop(pos); ++ goto out; ++ } ++ ++ move_flush_pos(pos, &parent_lock, &parent_load, &pcoord); ++ ++ out: ++ done_load_count(&parent_load); ++ done_lh(&parent_lock); ++ ++ return ret; ++} ++ ++typedef int (*pos_state_handle_t) (flush_pos_t *); ++static pos_state_handle_t flush_pos_handlers[] = { ++ /* process formatted nodes on leaf level, keep lock on a leaf node */ ++ [POS_ON_LEAF] = handle_pos_on_leaf, ++ /* process unformatted nodes, keep lock on twig node, pos->coord points to extent currently ++ * being processed */ ++ [POS_ON_EPOINT] = handle_pos_on_twig, ++ /* move a lock from leaf node to its parent for further processing of unformatted nodes */ ++ [POS_TO_TWIG] = handle_pos_to_twig, ++ /* move a lock from twig to leaf level when a processing of unformatted nodes finishes, ++ * pos->coord points to the leaf node we jump to */ ++ [POS_TO_LEAF] = handle_pos_to_leaf, ++ /* after processing last extent in the twig node, attempting to shift items from the twigs ++ * right neighbor and process them while shifting */ ++ [POS_END_OF_TWIG] = handle_pos_end_of_twig, ++ /* process formatted nodes on internal level, keep lock on an internal node */ ++ [POS_ON_INTERNAL] = handle_pos_on_internal ++}; ++ ++/* Advance flush position horizontally, prepare for flushing ((re)allocate, squeeze, ++ * encrypt) nodes and their ancestors in "parent-first" order */ ++static int squalloc(flush_pos_t * pos) ++{ ++ int ret = 0; ++ ++ /* maybe needs to be made a case statement with handle_pos_on_leaf as first case, for ++ * greater CPU efficiency? Measure and see.... -Hans */ ++ while (pos_valid(pos)) { ++ ret = flush_pos_handlers[pos->state] (pos); ++ if (ret < 0) ++ break; ++ ++ ret = rapid_flush(pos); ++ if (ret) ++ break; ++ } ++ ++ /* any positive value or -E_NO_NEIGHBOR are legal return codes for handle_pos* ++ routines, -E_NO_NEIGHBOR means that slum edge was reached */ ++ if (ret > 0 || ret == -E_NO_NEIGHBOR) ++ ret = 0; ++ ++ return ret; ++} ++ ++static void update_ldkey(znode * node) ++{ ++ reiser4_key ldkey; ++ ++ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock)); ++ if (node_is_empty(node)) ++ return; ++ ++ znode_set_ld_key(node, leftmost_key_in_node(node, &ldkey)); ++} ++ ++/* this is to be called after calling of shift node's method to shift data from @right to ++ @left. It sets left delimiting keys of @left and @right to keys of first items of @left ++ and @right correspondingly and sets right delimiting key of @left to first key of @right */ ++static void update_znode_dkeys(znode * left, znode * right) ++{ ++ assert_rw_write_locked(&(znode_get_tree(right)->dk_lock)); ++ assert("vs-1629", (znode_is_write_locked(left) && ++ znode_is_write_locked(right))); ++ ++ /* we need to update left delimiting of left if it was empty before shift */ ++ update_ldkey(left); ++ update_ldkey(right); ++ if (node_is_empty(right)) ++ znode_set_rd_key(left, znode_get_rd_key(right)); ++ else ++ znode_set_rd_key(left, znode_get_ld_key(right)); ++} ++ ++/* try to shift everything from @right to @left. If everything was shifted - ++ @right is removed from the tree. Result is the number of bytes shifted. */ ++static int ++shift_everything_left(znode * right, znode * left, carry_level * todo) ++{ ++ coord_t from; ++ node_plugin *nplug; ++ carry_plugin_info info; ++ ++ coord_init_after_last_item(&from, right); ++ ++ nplug = node_plugin_by_node(right); ++ info.doing = NULL; ++ info.todo = todo; ++ return nplug->shift(&from, left, SHIFT_LEFT, ++ 1 /* delete @right if it becomes empty */ , ++ 1 ++ /* move coord @from to node @left if everything will be shifted */ ++ , ++ &info); ++} ++ ++/* Shift as much as possible from @right to @left using the memcpy-optimized ++ shift_everything_left. @left and @right are formatted neighboring nodes on ++ leaf level. */ ++static int squeeze_right_non_twig(znode * left, znode * right) ++{ ++ int ret; ++ carry_pool *pool; ++ carry_level *todo; ++ ++ assert("nikita-2246", znode_get_level(left) == znode_get_level(right)); ++ ++ if (!JF_ISSET(ZJNODE(left), JNODE_DIRTY) || ++ !JF_ISSET(ZJNODE(right), JNODE_DIRTY)) ++ return SQUEEZE_TARGET_FULL; ++ ++ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo)); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ todo = (carry_level *) (pool + 1); ++ init_carry_level(todo, pool); ++ ++ ret = shift_everything_left(right, left, todo); ++ if (ret > 0) { ++ /* something was shifted */ ++ reiser4_tree *tree; ++ __u64 grabbed; ++ ++ znode_make_dirty(left); ++ znode_make_dirty(right); ++ ++ /* update delimiting keys of nodes which participated in ++ shift. FIXME: it would be better to have this in shift ++ node's operation. But it can not be done there. Nobody ++ remembers why, though */ ++ tree = znode_get_tree(left); ++ write_lock_dk(tree); ++ update_znode_dkeys(left, right); ++ write_unlock_dk(tree); ++ ++ /* Carry is called to update delimiting key and, maybe, to remove empty ++ node. */ ++ grabbed = get_current_context()->grabbed_blocks; ++ ret = reiser4_grab_space_force(tree->height, BA_RESERVED); ++ assert("nikita-3003", ret == 0); /* reserved space is exhausted. Ask Hans. */ ++ ret = reiser4_carry(todo, NULL /* previous level */ ); ++ grabbed2free_mark(grabbed); ++ } else { ++ /* Shifting impossible, we return appropriate result code */ ++ ret = ++ node_is_empty(right) ? SQUEEZE_SOURCE_EMPTY : ++ SQUEEZE_TARGET_FULL; ++ } ++ ++ done_carry_pool(pool); ++ ++ return ret; ++} ++ ++#if REISER4_DEBUG ++static int sibling_link_is_ok(const znode *left, const znode *right) ++{ ++ int result; ++ ++ read_lock_tree(znode_get_tree(left)); ++ result = (left->right == right && left == right->left); ++ read_unlock_tree(znode_get_tree(left)); ++ return result; ++} ++#endif ++ ++/* Shift first unit of first item if it is an internal one. Return ++ SQUEEZE_TARGET_FULL if it fails to shift an item, otherwise return ++ SUBTREE_MOVED. */ ++static int shift_one_internal_unit(znode * left, znode * right) ++{ ++ int ret; ++ carry_pool *pool; ++ carry_level *todo; ++ coord_t *coord; ++ carry_plugin_info *info; ++ int size, moved; ++ ++ assert("nikita-2247", znode_get_level(left) == znode_get_level(right)); ++ assert("nikita-2435", znode_is_write_locked(left)); ++ assert("nikita-2436", znode_is_write_locked(right)); ++ assert("nikita-2434", sibling_link_is_ok(left, right)); ++ ++ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) + ++ sizeof(*coord) + sizeof(*info) ++#if REISER4_DEBUG ++ + sizeof(*coord) + 2 * sizeof(reiser4_key) ++#endif ++ ); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ todo = (carry_level *) (pool + 1); ++ init_carry_level(todo, pool); ++ ++ coord = (coord_t *) (todo + 3); ++ coord_init_first_unit(coord, right); ++ info = (carry_plugin_info *) (coord + 1); ++ ++#if REISER4_DEBUG ++ if (!node_is_empty(left)) { ++ coord_t *last; ++ reiser4_key *right_key; ++ reiser4_key *left_key; ++ ++ last = (coord_t *) (info + 1); ++ right_key = (reiser4_key *) (last + 1); ++ left_key = right_key + 1; ++ coord_init_last_unit(last, left); ++ ++ assert("nikita-2463", ++ keyle(item_key_by_coord(last, left_key), ++ item_key_by_coord(coord, right_key))); ++ } ++#endif ++ ++ assert("jmacd-2007", item_is_internal(coord)); ++ ++ size = item_length_by_coord(coord); ++ info->todo = todo; ++ info->doing = NULL; ++ ++ ret = node_plugin_by_node(left)->shift(coord, left, SHIFT_LEFT, ++ 1 ++ /* delete @right if it becomes empty */ ++ , ++ 0 ++ /* do not move coord @coord to node @left */ ++ , ++ info); ++ ++ /* If shift returns positive, then we shifted the item. */ ++ assert("vs-423", ret <= 0 || size == ret); ++ moved = (ret > 0); ++ ++ if (moved) { ++ /* something was moved */ ++ reiser4_tree *tree; ++ int grabbed; ++ ++ znode_make_dirty(left); ++ znode_make_dirty(right); ++ tree = znode_get_tree(left); ++ write_lock_dk(tree); ++ update_znode_dkeys(left, right); ++ write_unlock_dk(tree); ++ ++ /* reserve space for delimiting keys after shifting */ ++ grabbed = get_current_context()->grabbed_blocks; ++ ret = reiser4_grab_space_force(tree->height, BA_RESERVED); ++ assert("nikita-3003", ret == 0); /* reserved space is exhausted. Ask Hans. */ ++ ++ ret = reiser4_carry(todo, NULL /* previous level */ ); ++ grabbed2free_mark(grabbed); ++ } ++ ++ done_carry_pool(pool); ++ ++ if (ret != 0) { ++ /* Shift or carry operation failed. */ ++ assert("jmacd-7325", ret < 0); ++ return ret; ++ } ++ ++ return moved ? SUBTREE_MOVED : SQUEEZE_TARGET_FULL; ++} ++ ++/* Make the final relocate/wander decision during forward parent-first squalloc for a ++ znode. For unformatted nodes this is done in plugin/item/extent.c:extent_needs_allocation(). */ ++static int ++allocate_znode_loaded(znode * node, ++ const coord_t * parent_coord, flush_pos_t * pos) ++{ ++ int ret; ++ reiser4_super_info_data *sbinfo = get_current_super_private(); ++ /* FIXME(D): We have the node write-locked and should have checked for ! ++ allocated() somewhere before reaching this point, but there can be a race, so ++ this assertion is bogus. */ ++ assert("jmacd-7987", !jnode_check_flushprepped(ZJNODE(node))); ++ assert("jmacd-7988", znode_is_write_locked(node)); ++ assert("jmacd-7989", coord_is_invalid(parent_coord) ++ || znode_is_write_locked(parent_coord->node)); ++ ++ if (ZF_ISSET(node, JNODE_REPACK) || ZF_ISSET(node, JNODE_CREATED) || ++ znode_is_root(node) || ++ /* We have enough nodes to relocate no matter what. */ ++ (pos->leaf_relocate != 0 && znode_get_level(node) == LEAF_LEVEL)) { ++ /* No need to decide with new nodes, they are treated the same as ++ relocate. If the root node is dirty, relocate. */ ++ if (pos->preceder.blk == 0) { ++ /* preceder is unknown and we have decided to relocate node -- ++ using of default value for search start is better than search ++ from block #0. */ ++ get_blocknr_hint_default(&pos->preceder.blk); ++ check_preceder(pos->preceder.blk); ++ } ++ ++ goto best_reloc; ++ ++ } else if (pos->preceder.blk == 0) { ++ /* If we don't know the preceder, leave it where it is. */ ++ jnode_make_wander(ZJNODE(node)); ++ } else { ++ /* Make a decision based on block distance. */ ++ reiser4_block_nr dist; ++ reiser4_block_nr nblk = *znode_get_block(node); ++ ++ assert("jmacd-6172", !reiser4_blocknr_is_fake(&nblk)); ++ assert("jmacd-6173", !reiser4_blocknr_is_fake(&pos->preceder.blk)); ++ assert("jmacd-6174", pos->preceder.blk != 0); ++ ++ if (pos->preceder.blk == nblk - 1) { ++ /* Ideal. */ ++ jnode_make_wander(ZJNODE(node)); ++ } else { ++ ++ dist = ++ (nblk < ++ pos->preceder.blk) ? (pos->preceder.blk - ++ nblk) : (nblk - ++ pos->preceder.blk); ++ ++ /* See if we can find a closer block (forward direction only). */ ++ pos->preceder.max_dist = ++ min((reiser4_block_nr) sbinfo->flush. ++ relocate_distance, dist); ++ pos->preceder.level = znode_get_level(node); ++ ++ ret = allocate_znode_update(node, parent_coord, pos); ++ ++ pos->preceder.max_dist = 0; ++ ++ if (ret && (ret != -ENOSPC)) ++ return ret; ++ ++ if (ret == 0) { ++ /* Got a better allocation. */ ++ znode_make_reloc(node, pos->fq); ++ } else if (dist < sbinfo->flush.relocate_distance) { ++ /* The present allocation is good enough. */ ++ jnode_make_wander(ZJNODE(node)); ++ } else { ++ /* Otherwise, try to relocate to the best position. */ ++ best_reloc: ++ ret = ++ allocate_znode_update(node, parent_coord, ++ pos); ++ if (ret != 0) ++ return ret; ++ ++ /* set JNODE_RELOC bit _after_ node gets allocated */ ++ znode_make_reloc(node, pos->fq); ++ } ++ } ++ } ++ ++ /* This is the new preceder. */ ++ pos->preceder.blk = *znode_get_block(node); ++ check_preceder(pos->preceder.blk); ++ pos->alloc_cnt += 1; ++ ++ assert("jmacd-4277", !reiser4_blocknr_is_fake(&pos->preceder.blk)); ++ ++ return 0; ++} ++ ++static int ++allocate_znode(znode * node, const coord_t * parent_coord, flush_pos_t * pos) ++{ ++ /* ++ * perform znode allocation with znode pinned in memory to avoid races ++ * with asynchronous emergency flush (which plays with ++ * JNODE_FLUSH_RESERVED bit). ++ */ ++ return WITH_DATA(node, allocate_znode_loaded(node, parent_coord, pos)); ++} ++ ++/* A subroutine of allocate_znode, this is called first to see if there is a close ++ position to relocate to. It may return ENOSPC if there is no close position. If there ++ is no close position it may not relocate. This takes care of updating the parent node ++ with the relocated block address. */ ++static int ++allocate_znode_update(znode * node, const coord_t * parent_coord, ++ flush_pos_t * pos) ++{ ++ int ret; ++ reiser4_block_nr blk; ++ lock_handle uber_lock; ++ int flush_reserved_used = 0; ++ int grabbed; ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ init_lh(&uber_lock); ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ grabbed = ctx->grabbed_blocks; ++ ++ /* discard e-flush allocation */ ++ ret = zload(node); ++ if (ret) ++ return ret; ++ ++ if (ZF_ISSET(node, JNODE_CREATED)) { ++ assert("zam-816", reiser4_blocknr_is_fake(znode_get_block(node))); ++ pos->preceder.block_stage = BLOCK_UNALLOCATED; ++ } else { ++ pos->preceder.block_stage = BLOCK_GRABBED; ++ ++ /* The disk space for relocating the @node is already reserved in "flush reserved" ++ * counter if @node is leaf, otherwise we grab space using BA_RESERVED (means grab ++ * space from whole disk not from only 95%). */ ++ if (znode_get_level(node) == LEAF_LEVEL) { ++ /* ++ * earlier (during do_jnode_make_dirty()) we decided ++ * that @node can possibly go into overwrite set and ++ * reserved block for its wandering location. ++ */ ++ txn_atom *atom = get_current_atom_locked(); ++ assert("nikita-3449", ++ ZF_ISSET(node, JNODE_FLUSH_RESERVED)); ++ flush_reserved2grabbed(atom, (__u64) 1); ++ spin_unlock_atom(atom); ++ /* ++ * we are trying to move node into relocate ++ * set. Allocation of relocated position "uses" ++ * reserved block. ++ */ ++ ZF_CLR(node, JNODE_FLUSH_RESERVED); ++ flush_reserved_used = 1; ++ } else { ++ ret = reiser4_grab_space_force((__u64) 1, BA_RESERVED); ++ if (ret != 0) ++ goto exit; ++ } ++ } ++ ++ /* We may do not use 5% of reserved disk space here and flush will not pack tightly. */ ++ ret = reiser4_alloc_block(&pos->preceder, &blk, ++ BA_FORMATTED | BA_PERMANENT); ++ if (ret) ++ goto exit; ++ ++ if (!ZF_ISSET(node, JNODE_CREATED) && ++ (ret = ++ reiser4_dealloc_block(znode_get_block(node), 0, ++ BA_DEFER | BA_FORMATTED))) ++ goto exit; ++ ++ if (likely(!znode_is_root(node))) { ++ item_plugin *iplug; ++ ++ iplug = item_plugin_by_coord(parent_coord); ++ assert("nikita-2954", iplug->f.update != NULL); ++ iplug->f.update(parent_coord, &blk); ++ ++ znode_make_dirty(parent_coord->node); ++ ++ } else { ++ reiser4_tree *tree = znode_get_tree(node); ++ znode *uber; ++ ++ /* We take a longterm lock on the fake node in order to change ++ the root block number. This may cause atom fusion. */ ++ ret = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI, ++ &uber_lock); ++ /* The fake node cannot be deleted, and we must have priority ++ here, and may not be confused with ENOSPC. */ ++ assert("jmacd-74412", ++ ret != -EINVAL && ret != -E_DEADLOCK && ret != -ENOSPC); ++ ++ if (ret) ++ goto exit; ++ ++ uber = uber_lock.node; ++ ++ write_lock_tree(tree); ++ tree->root_block = blk; ++ write_unlock_tree(tree); ++ ++ znode_make_dirty(uber); ++ } ++ ++ ret = znode_rehash(node, &blk); ++ exit: ++ if (ret) { ++ /* Get flush reserved block back if something fails, because ++ * callers assume that on error block wasn't relocated and its ++ * flush reserved block wasn't used. */ ++ if (flush_reserved_used) { ++ /* ++ * ok, we failed to move node into relocate ++ * set. Restore status quo. ++ */ ++ grabbed2flush_reserved((__u64) 1); ++ ZF_SET(node, JNODE_FLUSH_RESERVED); ++ } ++ } ++ zrelse(node); ++ done_lh(&uber_lock); ++ grabbed2free_mark(grabbed); ++ return ret; ++} ++ ++/* JNODE INTERFACE */ ++ ++/* Lock a node (if formatted) and then get its parent locked, set the child's ++ coordinate in the parent. If the child is the root node, the above_root ++ znode is returned but the coord is not set. This function may cause atom ++ fusion, but it is only used for read locks (at this point) and therefore ++ fusion only occurs when the parent is already dirty. */ ++/* Hans adds this note: remember to ask how expensive this operation is vs. storing parent ++ pointer in jnodes. */ ++static int ++jnode_lock_parent_coord(jnode * node, ++ coord_t * coord, ++ lock_handle * parent_lh, ++ load_count * parent_zh, ++ znode_lock_mode parent_mode, int try) ++{ ++ int ret; ++ ++ assert("edward-53", jnode_is_unformatted(node) || jnode_is_znode(node)); ++ assert("edward-54", jnode_is_unformatted(node) ++ || znode_is_any_locked(JZNODE(node))); ++ ++ if (!jnode_is_znode(node)) { ++ reiser4_key key; ++ tree_level stop_level = TWIG_LEVEL; ++ lookup_bias bias = FIND_EXACT; ++ ++ assert("edward-168", !(jnode_get_type(node) == JNODE_BITMAP)); ++ ++ /* The case when node is not znode, but can have parent coord ++ (unformatted node, node which represents cluster page, ++ etc..). Generate a key for the appropriate entry, search ++ in the tree using coord_by_key, which handles locking for ++ us. */ ++ ++ /* ++ * nothing is locked at this moment, so, nothing prevents ++ * concurrent truncate from removing jnode from inode. To ++ * prevent this spin-lock jnode. jnode can be truncated just ++ * after call to the jnode_build_key(), but this is ok, ++ * because coord_by_key() will just fail to find appropriate ++ * extent. ++ */ ++ spin_lock_jnode(node); ++ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) { ++ jnode_build_key(node, &key); ++ ret = 0; ++ } else ++ ret = RETERR(-ENOENT); ++ spin_unlock_jnode(node); ++ ++ if (ret != 0) ++ return ret; ++ ++ if (jnode_is_cluster_page(node)) ++ stop_level = LEAF_LEVEL; ++ ++ assert("jmacd-1812", coord != NULL); ++ ++ ret = coord_by_key(jnode_get_tree(node), &key, coord, parent_lh, ++ parent_mode, bias, stop_level, stop_level, ++ CBK_UNIQUE, NULL /*ra_info */ ); ++ switch (ret) { ++ case CBK_COORD_NOTFOUND: ++ assert("edward-1038", ++ ergo(jnode_is_cluster_page(node), ++ JF_ISSET(node, JNODE_HEARD_BANSHEE))); ++ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) ++ warning("nikita-3177", "Parent not found"); ++ return ret; ++ case CBK_COORD_FOUND: ++ if (coord->between != AT_UNIT) { ++ /* FIXME: comment needed */ ++ done_lh(parent_lh); ++ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) { ++ warning("nikita-3178", ++ "Found but not happy: %i", ++ coord->between); ++ } ++ return RETERR(-ENOENT); ++ } ++ ret = incr_load_count_znode(parent_zh, parent_lh->node); ++ if (ret != 0) ++ return ret; ++ /* if (jnode_is_cluster_page(node)) { ++ races with write() are possible ++ check_child_cluster (parent_lh->node); ++ } ++ */ ++ break; ++ default: ++ return ret; ++ } ++ ++ } else { ++ int flags; ++ znode *z; ++ ++ z = JZNODE(node); ++ /* Formatted node case: */ ++ assert("jmacd-2061", !znode_is_root(z)); ++ ++ flags = GN_ALLOW_NOT_CONNECTED; ++ if (try) ++ flags |= GN_TRY_LOCK; ++ ++ ret = ++ reiser4_get_parent_flags(parent_lh, z, parent_mode, flags); ++ if (ret != 0) ++ /* -E_REPEAT is ok here, it is handled by the caller. */ ++ return ret; ++ ++ /* Make the child's position "hint" up-to-date. (Unless above ++ root, which caller must check.) */ ++ if (coord != NULL) { ++ ++ ret = incr_load_count_znode(parent_zh, parent_lh->node); ++ if (ret != 0) { ++ warning("jmacd-976812386", ++ "incr_load_count_znode failed: %d", ++ ret); ++ return ret; ++ } ++ ++ ret = find_child_ptr(parent_lh->node, z, coord); ++ if (ret != 0) { ++ warning("jmacd-976812", ++ "find_child_ptr failed: %d", ret); ++ return ret; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++/* Get the (locked) next neighbor of a znode which is dirty and a member of the same atom. ++ If there is no next neighbor or the neighbor is not in memory or if there is a ++ neighbor but it is not dirty or not in the same atom, -E_NO_NEIGHBOR is returned. ++ In some cases the slum may include nodes which are not dirty, if so @check_dirty should be 0 */ ++static int neighbor_in_slum(znode * node, /* starting point */ ++ lock_handle * lock, /* lock on starting point */ ++ sideof side, /* left or right direction we seek the next node in */ ++ znode_lock_mode mode, /* kind of lock we want */ ++ int check_dirty) ++{ /* true if the neighbor should be dirty */ ++ int ret; ++ ++ assert("jmacd-6334", znode_is_connected(node)); ++ ++ ret = ++ reiser4_get_neighbor(lock, node, mode, ++ GN_SAME_ATOM | (side == ++ LEFT_SIDE ? GN_GO_LEFT : 0)); ++ ++ if (ret) { ++ /* May return -ENOENT or -E_NO_NEIGHBOR. */ ++ /* FIXME(C): check EINVAL, E_DEADLOCK */ ++ if (ret == -ENOENT) { ++ ret = RETERR(-E_NO_NEIGHBOR); ++ } ++ ++ return ret; ++ } ++ if (!check_dirty) ++ return 0; ++ /* Check dirty bit of locked znode, no races here */ ++ if (JF_ISSET(ZJNODE(lock->node), JNODE_DIRTY)) ++ return 0; ++ ++ done_lh(lock); ++ return RETERR(-E_NO_NEIGHBOR); ++} ++ ++/* Return true if two znodes have the same parent. This is called with both nodes ++ write-locked (for squeezing) so no tree lock is needed. */ ++static int znode_same_parents(znode * a, znode * b) ++{ ++ int result; ++ ++ assert("jmacd-7011", znode_is_write_locked(a)); ++ assert("jmacd-7012", znode_is_write_locked(b)); ++ ++ /* We lock the whole tree for this check.... I really don't like whole tree ++ * locks... -Hans */ ++ read_lock_tree(znode_get_tree(a)); ++ result = (znode_parent(a) == znode_parent(b)); ++ read_unlock_tree(znode_get_tree(a)); ++ return result; ++} ++ ++/* FLUSH SCAN */ ++ ++/* Initialize the flush_scan data structure. */ ++static void scan_init(flush_scan * scan) ++{ ++ memset(scan, 0, sizeof(*scan)); ++ init_lh(&scan->node_lock); ++ init_lh(&scan->parent_lock); ++ init_load_count(&scan->parent_load); ++ init_load_count(&scan->node_load); ++ coord_init_invalid(&scan->parent_coord, NULL); ++} ++ ++/* Release any resources held by the flush scan, e.g., release locks, free memory, etc. */ ++static void scan_done(flush_scan * scan) ++{ ++ done_load_count(&scan->node_load); ++ if (scan->node != NULL) { ++ jput(scan->node); ++ scan->node = NULL; ++ } ++ done_load_count(&scan->parent_load); ++ done_lh(&scan->parent_lock); ++ done_lh(&scan->node_lock); ++} ++ ++/* Returns true if flush scanning is finished. */ ++int reiser4_scan_finished(flush_scan * scan) ++{ ++ return scan->stop || (scan->direction == RIGHT_SIDE && ++ scan->count >= scan->max_count); ++} ++ ++/* Return true if the scan should continue to the @tonode. True if the node meets the ++ same_slum_check condition. If not, deref the "left" node and stop the scan. */ ++int reiser4_scan_goto(flush_scan * scan, jnode * tonode) ++{ ++ int go = same_slum_check(scan->node, tonode, 1, 0); ++ ++ if (!go) { ++ scan->stop = 1; ++ jput(tonode); ++ } ++ ++ return go; ++} ++ ++/* Set the current scan->node, refcount it, increment count by the @add_count (number to ++ count, e.g., skipped unallocated nodes), deref previous current, and copy the current ++ parent coordinate. */ ++int ++scan_set_current(flush_scan * scan, jnode * node, unsigned add_count, ++ const coord_t * parent) ++{ ++ /* Release the old references, take the new reference. */ ++ done_load_count(&scan->node_load); ++ ++ if (scan->node != NULL) { ++ jput(scan->node); ++ } ++ scan->node = node; ++ scan->count += add_count; ++ ++ /* This next stmt is somewhat inefficient. The reiser4_scan_extent() code could ++ delay this update step until it finishes and update the parent_coord only once. ++ It did that before, but there was a bug and this was the easiest way to make it ++ correct. */ ++ if (parent != NULL) { ++ coord_dup(&scan->parent_coord, parent); ++ } ++ ++ /* Failure may happen at the incr_load_count call, but the caller can assume the reference ++ is safely taken. */ ++ return incr_load_count_jnode(&scan->node_load, node); ++} ++ ++/* Return true if scanning in the leftward direction. */ ++int reiser4_scanning_left(flush_scan * scan) ++{ ++ return scan->direction == LEFT_SIDE; ++} ++ ++/* Performs leftward scanning starting from either kind of node. Counts the starting ++ node. The right-scan object is passed in for the left-scan in order to copy the parent ++ of an unformatted starting position. This way we avoid searching for the unformatted ++ node's parent when scanning in each direction. If we search for the parent once it is ++ set in both scan objects. The limit parameter tells flush-scan when to stop. ++ ++ Rapid scanning is used only during scan_left, where we are interested in finding the ++ 'leftpoint' where we begin flushing. We are interested in stopping at the left child ++ of a twig that does not have a dirty left neighbor. THIS IS A SPECIAL CASE. The ++ problem is finding a way to flush only those nodes without unallocated children, and it ++ is difficult to solve in the bottom-up flushing algorithm we are currently using. The ++ problem can be solved by scanning left at every level as we go upward, but this would ++ basically bring us back to using a top-down allocation strategy, which we already tried ++ (see BK history from May 2002), and has a different set of problems. The top-down ++ strategy makes avoiding unallocated children easier, but makes it difficult to ++ propertly flush dirty children with clean parents that would otherwise stop the ++ top-down flush, only later to dirty the parent once the children are flushed. So we ++ solve the problem in the bottom-up algorithm with a special case for twigs and leaves ++ only. ++ ++ The first step in solving the problem is this rapid leftward scan. After we determine ++ that there are at least enough nodes counted to qualify for FLUSH_RELOCATE_THRESHOLD we ++ are no longer interested in the exact count, we are only interested in finding a the ++ best place to start the flush. We could choose one of two possibilities: ++ ++ 1. Stop at the leftmost child (of a twig) that does not have a dirty left neighbor. ++ This requires checking one leaf per rapid-scan twig ++ ++ 2. Stop at the leftmost child (of a twig) where there are no dirty children of the twig ++ to the left. This requires checking possibly all of the in-memory children of each ++ twig during the rapid scan. ++ ++ For now we implement the first policy. ++*/ ++static int ++scan_left(flush_scan * scan, flush_scan * right, jnode * node, unsigned limit) ++{ ++ int ret = 0; ++ ++ scan->max_count = limit; ++ scan->direction = LEFT_SIDE; ++ ++ ret = scan_set_current(scan, jref(node), 1, NULL); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ ret = scan_common(scan, right); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ /* Before rapid scanning, we need a lock on scan->node so that we can get its ++ parent, only if formatted. */ ++ if (jnode_is_znode(scan->node)) { ++ ret = longterm_lock_znode(&scan->node_lock, JZNODE(scan->node), ++ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI); ++ } ++ ++ /* Rapid_scan would go here (with limit set to FLUSH_RELOCATE_THRESHOLD). */ ++ return ret; ++} ++ ++/* Performs rightward scanning... Does not count the starting node. The limit parameter ++ is described in scan_left. If the starting node is unformatted then the ++ parent_coord was already set during scan_left. The rapid_after parameter is not used ++ during right-scanning. ++ ++ scan_right is only called if the scan_left operation does not count at least ++ FLUSH_RELOCATE_THRESHOLD nodes for flushing. Otherwise, the limit parameter is set to ++ the difference between scan-left's count and FLUSH_RELOCATE_THRESHOLD, meaning ++ scan-right counts as high as FLUSH_RELOCATE_THRESHOLD and then stops. */ ++static int scan_right(flush_scan * scan, jnode * node, unsigned limit) ++{ ++ int ret; ++ ++ scan->max_count = limit; ++ scan->direction = RIGHT_SIDE; ++ ++ ret = scan_set_current(scan, jref(node), 0, NULL); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ return scan_common(scan, NULL); ++} ++ ++/* Common code to perform left or right scanning. */ ++static int scan_common(flush_scan * scan, flush_scan * other) ++{ ++ int ret; ++ ++ assert("nikita-2376", scan->node != NULL); ++ assert("edward-54", jnode_is_unformatted(scan->node) ++ || jnode_is_znode(scan->node)); ++ ++ /* Special case for starting at an unformatted node. Optimization: we only want ++ to search for the parent (which requires a tree traversal) once. Obviously, we ++ shouldn't have to call it once for the left scan and once for the right scan. ++ For this reason, if we search for the parent during scan-left we then duplicate ++ the coord/lock/load into the scan-right object. */ ++ if (jnode_is_unformatted(scan->node)) { ++ ret = scan_unformatted(scan, other); ++ if (ret != 0) ++ return ret; ++ } ++ /* This loop expects to start at a formatted position and performs chaining of ++ formatted regions */ ++ while (!reiser4_scan_finished(scan)) { ++ ++ ret = scan_formatted(scan); ++ if (ret != 0) { ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int scan_unformatted(flush_scan * scan, flush_scan * other) ++{ ++ int ret = 0; ++ int try = 0; ++ ++ if (!coord_is_invalid(&scan->parent_coord)) ++ goto scan; ++ ++ /* set parent coord from */ ++ if (!jnode_is_unformatted(scan->node)) { ++ /* formatted position */ ++ ++ lock_handle lock; ++ assert("edward-301", jnode_is_znode(scan->node)); ++ init_lh(&lock); ++ ++ /* ++ * when flush starts from unformatted node, first thing it ++ * does is tree traversal to find formatted parent of starting ++ * node. This parent is then kept lock across scans to the ++ * left and to the right. This means that during scan to the ++ * left we cannot take left-ward lock, because this is ++ * dead-lock prone. So, if we are scanning to the left and ++ * there is already lock held by this thread, ++ * jnode_lock_parent_coord() should use try-lock. ++ */ ++ try = reiser4_scanning_left(scan) ++ && !lock_stack_isclean(get_current_lock_stack()); ++ /* Need the node locked to get the parent lock, We have to ++ take write lock since there is at least one call path ++ where this znode is already write-locked by us. */ ++ ret = ++ longterm_lock_znode(&lock, JZNODE(scan->node), ++ ZNODE_WRITE_LOCK, ++ reiser4_scanning_left(scan) ? ++ ZNODE_LOCK_LOPRI : ++ ZNODE_LOCK_HIPRI); ++ if (ret != 0) ++ /* EINVAL or E_DEADLOCK here mean... try again! At this point we've ++ scanned too far and can't back out, just start over. */ ++ return ret; ++ ++ ret = jnode_lock_parent_coord(scan->node, ++ &scan->parent_coord, ++ &scan->parent_lock, ++ &scan->parent_load, ++ ZNODE_WRITE_LOCK, try); ++ ++ /* FIXME(C): check EINVAL, E_DEADLOCK */ ++ done_lh(&lock); ++ if (ret == -E_REPEAT) { ++ scan->stop = 1; ++ return 0; ++ } ++ if (ret) ++ return ret; ++ ++ } else { ++ /* unformatted position */ ++ ++ ret = ++ jnode_lock_parent_coord(scan->node, &scan->parent_coord, ++ &scan->parent_lock, ++ &scan->parent_load, ++ ZNODE_WRITE_LOCK, try); ++ ++ if (IS_CBKERR(ret)) ++ return ret; ++ ++ if (ret == CBK_COORD_NOTFOUND) ++ /* FIXME(C): check EINVAL, E_DEADLOCK */ ++ return ret; ++ ++ /* parent was found */ ++ assert("jmacd-8661", other != NULL); ++ /* Duplicate the reference into the other flush_scan. */ ++ coord_dup(&other->parent_coord, &scan->parent_coord); ++ copy_lh(&other->parent_lock, &scan->parent_lock); ++ copy_load_count(&other->parent_load, &scan->parent_load); ++ } ++ scan: ++ return scan_by_coord(scan); ++} ++ ++/* Performs left- or rightward scanning starting from a formatted node. Follow left ++ pointers under tree lock as long as: ++ ++ - node->left/right is non-NULL ++ - node->left/right is connected, dirty ++ - node->left/right belongs to the same atom ++ - scan has not reached maximum count ++*/ ++static int scan_formatted(flush_scan * scan) ++{ ++ int ret; ++ znode *neighbor = NULL; ++ ++ assert("jmacd-1401", !reiser4_scan_finished(scan)); ++ ++ do { ++ znode *node = JZNODE(scan->node); ++ ++ /* Node should be connected, but if not stop the scan. */ ++ if (!znode_is_connected(node)) { ++ scan->stop = 1; ++ break; ++ } ++ ++ /* Lock the tree, check-for and reference the next sibling. */ ++ read_lock_tree(znode_get_tree(node)); ++ ++ /* It may be that a node is inserted or removed between a node and its ++ left sibling while the tree lock is released, but the flush-scan count ++ does not need to be precise. Thus, we release the tree lock as soon as ++ we get the neighboring node. */ ++ neighbor = ++ reiser4_scanning_left(scan) ? node->left : node->right; ++ if (neighbor != NULL) { ++ zref(neighbor); ++ } ++ ++ read_unlock_tree(znode_get_tree(node)); ++ ++ /* If neighbor is NULL at the leaf level, need to check for an unformatted ++ sibling using the parent--break in any case. */ ++ if (neighbor == NULL) { ++ break; ++ } ++ ++ /* Check the condition for going left, break if it is not met. This also ++ releases (jputs) the neighbor if false. */ ++ if (!reiser4_scan_goto(scan, ZJNODE(neighbor))) { ++ break; ++ } ++ ++ /* Advance the flush_scan state to the left, repeat. */ ++ ret = scan_set_current(scan, ZJNODE(neighbor), 1, NULL); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ } while (!reiser4_scan_finished(scan)); ++ ++ /* If neighbor is NULL then we reached the end of a formatted region, or else the ++ sibling is out of memory, now check for an extent to the left (as long as ++ LEAF_LEVEL). */ ++ if (neighbor != NULL || jnode_get_level(scan->node) != LEAF_LEVEL ++ || reiser4_scan_finished(scan)) { ++ scan->stop = 1; ++ return 0; ++ } ++ /* Otherwise, calls scan_by_coord for the right(left)most item of the ++ left(right) neighbor on the parent level, then possibly continue. */ ++ ++ coord_init_invalid(&scan->parent_coord, NULL); ++ return scan_unformatted(scan, NULL); ++} ++ ++/* NOTE-EDWARD: ++ This scans adjacent items of the same type and calls scan flush plugin for each one. ++ Performs left(right)ward scanning starting from a (possibly) unformatted node. If we start ++ from unformatted node, then we continue only if the next neighbor is also unformatted. ++ When called from scan_formatted, we skip first iteration (to make sure that ++ right(left)most item of the left(right) neighbor on the parent level is of the same ++ type and set appropriate coord). */ ++static int scan_by_coord(flush_scan * scan) ++{ ++ int ret = 0; ++ int scan_this_coord; ++ lock_handle next_lock; ++ load_count next_load; ++ coord_t next_coord; ++ jnode *child; ++ item_plugin *iplug; ++ ++ init_lh(&next_lock); ++ init_load_count(&next_load); ++ scan_this_coord = (jnode_is_unformatted(scan->node) ? 1 : 0); ++ ++ /* set initial item id */ ++ iplug = item_plugin_by_coord(&scan->parent_coord); ++ ++ for (; !reiser4_scan_finished(scan); scan_this_coord = 1) { ++ if (scan_this_coord) { ++ /* Here we expect that unit is scannable. it would not be so due ++ * to race with extent->tail conversion. */ ++ if (iplug->f.scan == NULL) { ++ scan->stop = 1; ++ ret = -E_REPEAT; ++ /* skip the check at the end. */ ++ goto race; ++ } ++ ++ ret = iplug->f.scan(scan); ++ if (ret != 0) ++ goto exit; ++ ++ if (reiser4_scan_finished(scan)) { ++ checkchild(scan); ++ break; ++ } ++ } else { ++ /* the same race against truncate as above is possible ++ * here, it seems */ ++ ++ /* NOTE-JMACD: In this case, apply the same end-of-node logic but don't scan ++ the first coordinate. */ ++ assert("jmacd-1231", ++ item_is_internal(&scan->parent_coord)); ++ } ++ ++ if (iplug->f.utmost_child == NULL ++ || znode_get_level(scan->parent_coord.node) != TWIG_LEVEL) { ++ /* stop this coord and continue on parrent level */ ++ ret = ++ scan_set_current(scan, ++ ZJNODE(zref ++ (scan->parent_coord.node)), ++ 1, NULL); ++ if (ret != 0) ++ goto exit; ++ break; ++ } ++ ++ /* Either way, the invariant is that scan->parent_coord is set to the ++ parent of scan->node. Now get the next unit. */ ++ coord_dup(&next_coord, &scan->parent_coord); ++ coord_sideof_unit(&next_coord, scan->direction); ++ ++ /* If off-the-end of the twig, try the next twig. */ ++ if (coord_is_after_sideof_unit(&next_coord, scan->direction)) { ++ /* We take the write lock because we may start flushing from this ++ * coordinate. */ ++ ret = ++ neighbor_in_slum(next_coord.node, &next_lock, ++ scan->direction, ZNODE_WRITE_LOCK, ++ 1 /* check dirty */ ); ++ if (ret == -E_NO_NEIGHBOR) { ++ scan->stop = 1; ++ ret = 0; ++ break; ++ } ++ ++ if (ret != 0) { ++ goto exit; ++ } ++ ++ ret = incr_load_count_znode(&next_load, next_lock.node); ++ if (ret != 0) { ++ goto exit; ++ } ++ ++ coord_init_sideof_unit(&next_coord, next_lock.node, ++ sideof_reverse(scan->direction)); ++ } ++ ++ iplug = item_plugin_by_coord(&next_coord); ++ ++ /* Get the next child. */ ++ ret = ++ iplug->f.utmost_child(&next_coord, ++ sideof_reverse(scan->direction), ++ &child); ++ if (ret != 0) ++ goto exit; ++ /* If the next child is not in memory, or, item_utmost_child ++ failed (due to race with unlink, most probably), stop ++ here. */ ++ if (child == NULL || IS_ERR(child)) { ++ scan->stop = 1; ++ checkchild(scan); ++ break; ++ } ++ ++ assert("nikita-2374", jnode_is_unformatted(child) ++ || jnode_is_znode(child)); ++ ++ /* See if it is dirty, part of the same atom. */ ++ if (!reiser4_scan_goto(scan, child)) { ++ checkchild(scan); ++ break; ++ } ++ ++ /* If so, make this child current. */ ++ ret = scan_set_current(scan, child, 1, &next_coord); ++ if (ret != 0) ++ goto exit; ++ ++ /* Now continue. If formatted we release the parent lock and return, then ++ proceed. */ ++ if (jnode_is_znode(child)) ++ break; ++ ++ /* Otherwise, repeat the above loop with next_coord. */ ++ if (next_load.node != NULL) { ++ done_lh(&scan->parent_lock); ++ move_lh(&scan->parent_lock, &next_lock); ++ move_load_count(&scan->parent_load, &next_load); ++ } ++ } ++ ++ assert("jmacd-6233", ++ reiser4_scan_finished(scan) || jnode_is_znode(scan->node)); ++ exit: ++ checkchild(scan); ++ race: /* skip the above check */ ++ if (jnode_is_znode(scan->node)) { ++ done_lh(&scan->parent_lock); ++ done_load_count(&scan->parent_load); ++ } ++ ++ done_load_count(&next_load); ++ done_lh(&next_lock); ++ return ret; ++} ++ ++/* FLUSH POS HELPERS */ ++ ++/* Initialize the fields of a flush_position. */ ++static void pos_init(flush_pos_t * pos) ++{ ++ memset(pos, 0, sizeof *pos); ++ ++ pos->state = POS_INVALID; ++ coord_init_invalid(&pos->coord, NULL); ++ init_lh(&pos->lock); ++ init_load_count(&pos->load); ++ ++ reiser4_blocknr_hint_init(&pos->preceder); ++} ++ ++/* The flush loop inside squalloc periodically checks pos_valid to ++ determine when "enough flushing" has been performed. This will return true until one ++ of the following conditions is met: ++ ++ 1. the number of flush-queued nodes has reached the kernel-supplied "int *nr_to_flush" ++ parameter, meaning we have flushed as many blocks as the kernel requested. When ++ flushing to commit, this parameter is NULL. ++ ++ 2. pos_stop() is called because squalloc discovers that the "next" node in the ++ flush order is either non-existant, not dirty, or not in the same atom. ++*/ ++ ++static int pos_valid(flush_pos_t * pos) ++{ ++ return pos->state != POS_INVALID; ++} ++ ++/* Release any resources of a flush_position. Called when jnode_flush finishes. */ ++static void pos_done(flush_pos_t * pos) ++{ ++ pos_stop(pos); ++ reiser4_blocknr_hint_done(&pos->preceder); ++ if (convert_data(pos)) ++ free_convert_data(pos); ++} ++ ++/* Reset the point and parent. Called during flush subroutines to terminate the ++ squalloc loop. */ ++static int pos_stop(flush_pos_t * pos) ++{ ++ pos->state = POS_INVALID; ++ done_lh(&pos->lock); ++ done_load_count(&pos->load); ++ coord_init_invalid(&pos->coord, NULL); ++ ++ if (pos->child) { ++ jput(pos->child); ++ pos->child = NULL; ++ } ++ ++ return 0; ++} ++ ++/* Return the flush_position's block allocator hint. */ ++reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t * pos) ++{ ++ return &pos->preceder; ++} ++ ++flush_queue_t * reiser4_pos_fq(flush_pos_t * pos) ++{ ++ return pos->fq; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 90 ++ LocalWords: preceder ++ End: ++*/ +diff --git a/fs/reiser4/flush.h b/fs/reiser4/flush.h +new file mode 100644 +index 0000000..beab76b +--- /dev/null ++++ b/fs/reiser4/flush.h +@@ -0,0 +1,274 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* DECLARATIONS: */ ++ ++#if !defined(__REISER4_FLUSH_H__) ++#define __REISER4_FLUSH_H__ ++ ++#include "plugin/cluster.h" ++ ++/* The flush_scan data structure maintains the state of an in-progress flush-scan on a ++ single level of the tree. A flush-scan is used for counting the number of adjacent ++ nodes to flush, which is used to determine whether we should relocate, and it is also ++ used to find a starting point for flush. A flush-scan object can scan in both right ++ and left directions via the scan_left() and scan_right() interfaces. The ++ right- and left-variations are similar but perform different functions. When scanning ++ left we (optionally perform rapid scanning and then) longterm-lock the endpoint node. ++ When scanning right we are simply counting the number of adjacent, dirty nodes. */ ++struct flush_scan { ++ ++ /* The current number of nodes scanned on this level. */ ++ unsigned count; ++ ++ /* There may be a maximum number of nodes for a scan on any single level. When ++ going leftward, max_count is determined by FLUSH_SCAN_MAXNODES (see reiser4.h) */ ++ unsigned max_count; ++ ++ /* Direction: Set to one of the sideof enumeration: { LEFT_SIDE, RIGHT_SIDE }. */ ++ sideof direction; ++ ++ /* Initially @stop is set to false then set true once some condition stops the ++ search (e.g., we found a clean node before reaching max_count or we found a ++ node belonging to another atom). */ ++ int stop; ++ ++ /* The current scan position. If @node is non-NULL then its reference count has ++ been incremented to reflect this reference. */ ++ jnode *node; ++ ++ /* A handle for zload/zrelse of current scan position node. */ ++ load_count node_load; ++ ++ /* During left-scan, if the final position (a.k.a. endpoint node) is formatted the ++ node is locked using this lock handle. The endpoint needs to be locked for ++ transfer to the flush_position object after scanning finishes. */ ++ lock_handle node_lock; ++ ++ /* When the position is unformatted, its parent, coordinate, and parent ++ zload/zrelse handle. */ ++ lock_handle parent_lock; ++ coord_t parent_coord; ++ load_count parent_load; ++ ++ /* The block allocator preceder hint. Sometimes flush_scan determines what the ++ preceder is and if so it sets it here, after which it is copied into the ++ flush_position. Otherwise, the preceder is computed later. */ ++ reiser4_block_nr preceder_blk; ++}; ++ ++typedef struct convert_item_info { ++ dc_item_stat d_cur; /* disk cluster state of the current item */ ++ dc_item_stat d_next; /* disk cluster state of the next slum item */ ++ struct inode *inode; ++ flow_t flow; ++} convert_item_info_t; ++ ++typedef struct convert_info { ++ int count; /* for squalloc terminating */ ++ reiser4_cluster_t clust; /* transform cluster */ ++ item_plugin *iplug; /* current item plugin */ ++ convert_item_info_t *itm; /* current item info */ ++} convert_info_t; ++ ++typedef enum flush_position_state { ++ POS_INVALID, /* Invalid or stopped pos, do not continue slum ++ * processing */ ++ POS_ON_LEAF, /* pos points to already prepped, locked formatted node at ++ * leaf level */ ++ POS_ON_EPOINT, /* pos keeps a lock on twig level, "coord" field is used ++ * to traverse unformatted nodes */ ++ POS_TO_LEAF, /* pos is being moved to leaf level */ ++ POS_TO_TWIG, /* pos is being moved to twig level */ ++ POS_END_OF_TWIG, /* special case of POS_ON_TWIG, when coord is after ++ * rightmost unit of the current twig */ ++ POS_ON_INTERNAL /* same as POS_ON_LEAF, but points to internal node */ ++} flushpos_state_t; ++ ++/* An encapsulation of the current flush point and all the parameters that are passed ++ through the entire squeeze-and-allocate stage of the flush routine. A single ++ flush_position object is constructed after left- and right-scanning finishes. */ ++struct flush_position { ++ flushpos_state_t state; ++ ++ coord_t coord; /* coord to traverse unformatted nodes */ ++ lock_handle lock; /* current lock we hold */ ++ load_count load; /* load status for current locked formatted node */ ++ ++ jnode *child; /* for passing a reference to unformatted child ++ * across pos state changes */ ++ ++ reiser4_blocknr_hint preceder; /* The flush 'hint' state. */ ++ int leaf_relocate; /* True if enough leaf-level nodes were ++ * found to suggest a relocate policy. */ ++ int alloc_cnt; /* The number of nodes allocated during squeeze and allococate. */ ++ int prep_or_free_cnt; /* The number of nodes prepared for write (allocate) or squeezed and freed. */ ++ flush_queue_t *fq; ++ long *nr_written; /* number of nodes submitted to disk */ ++ int flags; /* a copy of jnode_flush flags argument */ ++ ++ znode *prev_twig; /* previous parent pointer value, used to catch ++ * processing of new twig node */ ++ convert_info_t *sq; /* convert info */ ++ ++ unsigned long pos_in_unit; /* for extents only. Position ++ within an extent unit of first ++ jnode of slum */ ++ long nr_to_write; /* number of unformatted nodes to handle on flush */ ++}; ++ ++static inline int item_convert_count(flush_pos_t * pos) ++{ ++ return pos->sq->count; ++} ++static inline void inc_item_convert_count(flush_pos_t * pos) ++{ ++ pos->sq->count++; ++} ++static inline void set_item_convert_count(flush_pos_t * pos, int count) ++{ ++ pos->sq->count = count; ++} ++static inline item_plugin *item_convert_plug(flush_pos_t * pos) ++{ ++ return pos->sq->iplug; ++} ++ ++static inline convert_info_t *convert_data(flush_pos_t * pos) ++{ ++ return pos->sq; ++} ++ ++static inline convert_item_info_t *item_convert_data(flush_pos_t * pos) ++{ ++ assert("edward-955", convert_data(pos)); ++ return pos->sq->itm; ++} ++ ++static inline tfm_cluster_t *tfm_cluster_sq(flush_pos_t * pos) ++{ ++ return &pos->sq->clust.tc; ++} ++ ++static inline tfm_stream_t *tfm_stream_sq(flush_pos_t * pos, tfm_stream_id id) ++{ ++ assert("edward-854", pos->sq != NULL); ++ return tfm_stream(tfm_cluster_sq(pos), id); ++} ++ ++static inline int chaining_data_present(flush_pos_t * pos) ++{ ++ return convert_data(pos) && item_convert_data(pos); ++} ++ ++/* Returns true if next node contains next item of the disk cluster ++ so item convert data should be moved to the right slum neighbor. ++*/ ++static inline int should_chain_next_node(flush_pos_t * pos) ++{ ++ int result = 0; ++ ++ assert("edward-1007", chaining_data_present(pos)); ++ ++ switch (item_convert_data(pos)->d_next) { ++ case DC_CHAINED_ITEM: ++ result = 1; ++ break; ++ case DC_AFTER_CLUSTER: ++ break; ++ default: ++ impossible("edward-1009", "bad state of next slum item"); ++ } ++ return result; ++} ++ ++/* update item state in a disk cluster to assign conversion mode */ ++static inline void ++move_chaining_data(flush_pos_t * pos, int this_node /* where is next item */ ) ++{ ++ ++ assert("edward-1010", chaining_data_present(pos)); ++ ++ if (this_node == 0) { ++ /* next item is on the right neighbor */ ++ assert("edward-1011", ++ item_convert_data(pos)->d_cur == DC_FIRST_ITEM || ++ item_convert_data(pos)->d_cur == DC_CHAINED_ITEM); ++ assert("edward-1012", ++ item_convert_data(pos)->d_next == DC_CHAINED_ITEM); ++ ++ item_convert_data(pos)->d_cur = DC_CHAINED_ITEM; ++ item_convert_data(pos)->d_next = DC_INVALID_STATE; ++ } else { ++ /* next item is on the same node */ ++ assert("edward-1013", ++ item_convert_data(pos)->d_cur == DC_FIRST_ITEM || ++ item_convert_data(pos)->d_cur == DC_CHAINED_ITEM); ++ assert("edward-1227", ++ item_convert_data(pos)->d_next == DC_AFTER_CLUSTER || ++ item_convert_data(pos)->d_next == DC_INVALID_STATE); ++ ++ item_convert_data(pos)->d_cur = DC_AFTER_CLUSTER; ++ item_convert_data(pos)->d_next = DC_INVALID_STATE; ++ } ++} ++ ++static inline int should_convert_node(flush_pos_t * pos, znode * node) ++{ ++ return znode_convertible(node); ++} ++ ++/* true if there is attached convert item info */ ++static inline int should_convert_next_node(flush_pos_t * pos, znode * node) ++{ ++ return convert_data(pos) && item_convert_data(pos); ++} ++ ++#define SQUALLOC_THRESHOLD 256 ++ ++static inline int should_terminate_squalloc(flush_pos_t * pos) ++{ ++ return convert_data(pos) && ++ !item_convert_data(pos) && ++ item_convert_count(pos) >= SQUALLOC_THRESHOLD; ++} ++ ++void free_convert_data(flush_pos_t * pos); ++/* used in extent.c */ ++int scan_set_current(flush_scan * scan, jnode * node, unsigned add_size, ++ const coord_t * parent); ++int reiser4_scan_finished(flush_scan * scan); ++int reiser4_scanning_left(flush_scan * scan); ++int reiser4_scan_goto(flush_scan * scan, jnode * tonode); ++txn_atom *atom_locked_by_fq(flush_queue_t * fq); ++int reiser4_alloc_extent(flush_pos_t *flush_pos); ++squeeze_result squalloc_extent(znode *left, const coord_t *, flush_pos_t *, ++ reiser4_key *stop_key); ++extern int reiser4_init_fqs(void); ++extern void reiser4_done_fqs(void); ++ ++#if REISER4_DEBUG ++ ++extern void reiser4_check_fq(const txn_atom *atom); ++extern atomic_t flush_cnt; ++ ++#define check_preceder(blk) \ ++assert("nikita-2588", blk < reiser4_block_count(reiser4_get_current_sb())); ++extern void check_pos(flush_pos_t * pos); ++#else ++#define check_preceder(b) noop ++#define check_pos(pos) noop ++#endif ++ ++/* __REISER4_FLUSH_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 90 ++ LocalWords: preceder ++ End: ++*/ +diff --git a/fs/reiser4/flush_queue.c b/fs/reiser4/flush_queue.c +new file mode 100644 +index 0000000..f6c5d9a +--- /dev/null ++++ b/fs/reiser4/flush_queue.c +@@ -0,0 +1,680 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "debug.h" ++#include "super.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "page_cache.h" ++#include "wander.h" ++#include "vfs_ops.h" ++#include "writeout.h" ++#include "flush.h" ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* A flush queue object is an accumulator for keeping jnodes prepared ++ by the jnode_flush() function for writing to disk. Those "queued" jnodes are ++ kept on the flush queue until memory pressure or atom commit asks ++ flush queues to write some or all from their jnodes. */ ++ ++/* ++ LOCKING: ++ ++ fq->guard spin lock protects fq->atom pointer and nothing else. fq->prepped ++ list protected by atom spin lock. fq->prepped list uses the following ++ locking: ++ ++ two ways to protect fq->prepped list for read-only list traversal: ++ ++ 1. atom spin-lock atom. ++ 2. fq is IN_USE, atom->nr_running_queues increased. ++ ++ and one for list modification: ++ ++ 1. atom is spin-locked and one condition is true: fq is IN_USE or ++ atom->nr_running_queues == 0. ++ ++ The deadlock-safe order for flush queues and atoms is: first lock atom, then ++ lock flush queue, then lock jnode. ++*/ ++ ++#define fq_in_use(fq) ((fq)->state & FQ_IN_USE) ++#define fq_ready(fq) (!fq_in_use(fq)) ++ ++#define mark_fq_in_use(fq) do { (fq)->state |= FQ_IN_USE; } while (0) ++#define mark_fq_ready(fq) do { (fq)->state &= ~FQ_IN_USE; } while (0) ++ ++/* get lock on atom from locked flush queue object */ ++static txn_atom *atom_locked_by_fq_nolock(flush_queue_t * fq) ++{ ++ /* This code is similar to jnode_get_atom(), look at it for the ++ * explanation. */ ++ txn_atom *atom; ++ ++ assert_spin_locked(&(fq->guard)); ++ ++ while (1) { ++ atom = fq->atom; ++ if (atom == NULL) ++ break; ++ ++ if (spin_trylock_atom(atom)) ++ break; ++ ++ atomic_inc(&atom->refcount); ++ spin_unlock(&(fq->guard)); ++ spin_lock_atom(atom); ++ spin_lock(&(fq->guard)); ++ ++ if (fq->atom == atom) { ++ atomic_dec(&atom->refcount); ++ break; ++ } ++ ++ spin_unlock(&(fq->guard)); ++ atom_dec_and_unlock(atom); ++ spin_lock(&(fq->guard)); ++ } ++ ++ return atom; ++} ++ ++txn_atom *atom_locked_by_fq(flush_queue_t * fq) ++{ ++ txn_atom *atom; ++ ++ spin_lock(&(fq->guard)); ++ atom = atom_locked_by_fq_nolock(fq); ++ spin_unlock(&(fq->guard)); ++ return atom; ++} ++ ++static void init_fq(flush_queue_t * fq) ++{ ++ memset(fq, 0, sizeof *fq); ++ ++ atomic_set(&fq->nr_submitted, 0); ++ ++ INIT_LIST_HEAD(ATOM_FQ_LIST(fq)); ++ ++ init_waitqueue_head(&fq->wait); ++ spin_lock_init(&fq->guard); ++} ++ ++/* slab for flush queues */ ++static struct kmem_cache *fq_slab; ++ ++/** ++ * reiser4_init_fqs - create flush queue cache ++ * ++ * Initializes slab cache of flush queues. It is part of reiser4 module ++ * initialization. ++ */ ++int reiser4_init_fqs(void) ++{ ++ fq_slab = kmem_cache_create("fq", ++ sizeof(flush_queue_t), ++ 0, SLAB_HWCACHE_ALIGN, NULL, NULL); ++ if (fq_slab == NULL) ++ return RETERR(-ENOMEM); ++ return 0; ++} ++ ++/** ++ * reiser4_done_fqs - delete flush queue cache ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++void reiser4_done_fqs(void) ++{ ++ destroy_reiser4_cache(&fq_slab); ++} ++ ++/* create new flush queue object */ ++static flush_queue_t *create_fq(gfp_t gfp) ++{ ++ flush_queue_t *fq; ++ ++ fq = kmem_cache_alloc(fq_slab, gfp); ++ if (fq) ++ init_fq(fq); ++ ++ return fq; ++} ++ ++/* adjust atom's and flush queue's counters of queued nodes */ ++static void count_enqueued_node(flush_queue_t * fq) ++{ ++ ON_DEBUG(fq->atom->num_queued++); ++} ++ ++static void count_dequeued_node(flush_queue_t * fq) ++{ ++ assert("zam-993", fq->atom->num_queued > 0); ++ ON_DEBUG(fq->atom->num_queued--); ++} ++ ++/* attach flush queue object to the atom */ ++static void attach_fq(txn_atom *atom, flush_queue_t *fq) ++{ ++ assert_spin_locked(&(atom->alock)); ++ list_add(&fq->alink, &atom->flush_queues); ++ fq->atom = atom; ++ ON_DEBUG(atom->nr_flush_queues++); ++} ++ ++static void detach_fq(flush_queue_t * fq) ++{ ++ assert_spin_locked(&(fq->atom->alock)); ++ ++ spin_lock(&(fq->guard)); ++ list_del_init(&fq->alink); ++ assert("vs-1456", fq->atom->nr_flush_queues > 0); ++ ON_DEBUG(fq->atom->nr_flush_queues--); ++ fq->atom = NULL; ++ spin_unlock(&(fq->guard)); ++} ++ ++/* destroy flush queue object */ ++static void done_fq(flush_queue_t * fq) ++{ ++ assert("zam-763", list_empty_careful(ATOM_FQ_LIST(fq))); ++ assert("zam-766", atomic_read(&fq->nr_submitted) == 0); ++ ++ kmem_cache_free(fq_slab, fq); ++} ++ ++/* */ ++static void mark_jnode_queued(flush_queue_t * fq, jnode * node) ++{ ++ JF_SET(node, JNODE_FLUSH_QUEUED); ++ count_enqueued_node(fq); ++} ++ ++/* Putting jnode into the flush queue. Both atom and jnode should be ++ spin-locked. */ ++void queue_jnode(flush_queue_t * fq, jnode * node) ++{ ++ assert_spin_locked(&(node->guard)); ++ assert("zam-713", node->atom != NULL); ++ assert_spin_locked(&(node->atom->alock)); ++ assert("zam-716", fq->atom != NULL); ++ assert("zam-717", fq->atom == node->atom); ++ assert("zam-907", fq_in_use(fq)); ++ ++ assert("zam-714", JF_ISSET(node, JNODE_DIRTY)); ++ assert("zam-826", JF_ISSET(node, JNODE_RELOC)); ++ assert("vs-1481", !JF_ISSET(node, JNODE_FLUSH_QUEUED)); ++ assert("vs-1481", NODE_LIST(node) != FQ_LIST); ++ ++ mark_jnode_queued(fq, node); ++ list_move_tail(&node->capture_link, ATOM_FQ_LIST(fq)); ++ ++ ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node), ++ FQ_LIST, 1)); ++} ++ ++/* repeatable process for waiting io completion on a flush queue object */ ++static int wait_io(flush_queue_t * fq, int *nr_io_errors) ++{ ++ assert("zam-738", fq->atom != NULL); ++ assert_spin_locked(&(fq->atom->alock)); ++ assert("zam-736", fq_in_use(fq)); ++ assert("zam-911", list_empty_careful(ATOM_FQ_LIST(fq))); ++ ++ if (atomic_read(&fq->nr_submitted) != 0) { ++ struct super_block *super; ++ ++ spin_unlock_atom(fq->atom); ++ ++ assert("nikita-3013", reiser4_schedulable()); ++ ++ super = reiser4_get_current_sb(); ++ ++ /* FIXME: this is instead of blk_run_queues() */ ++ blk_run_address_space(reiser4_get_super_fake(super)->i_mapping); ++ ++ if (!(super->s_flags & MS_RDONLY)) ++ wait_event(fq->wait, atomic_read(&fq->nr_submitted) == 0); ++ ++ /* Ask the caller to re-acquire the locks and call this ++ function again. Note: this technique is commonly used in ++ the txnmgr code. */ ++ return -E_REPEAT; ++ } ++ ++ *nr_io_errors += atomic_read(&fq->nr_errors); ++ return 0; ++} ++ ++/* wait on I/O completion, re-submit dirty nodes to write */ ++static int finish_fq(flush_queue_t * fq, int *nr_io_errors) ++{ ++ int ret; ++ txn_atom *atom = fq->atom; ++ ++ assert("zam-801", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ assert("zam-762", fq_in_use(fq)); ++ ++ ret = wait_io(fq, nr_io_errors); ++ if (ret) ++ return ret; ++ ++ detach_fq(fq); ++ done_fq(fq); ++ ++ reiser4_atom_send_event(atom); ++ ++ return 0; ++} ++ ++/* wait for all i/o for given atom to be completed, actually do one iteration ++ on that and return -E_REPEAT if there more iterations needed */ ++static int finish_all_fq(txn_atom * atom, int *nr_io_errors) ++{ ++ flush_queue_t *fq; ++ ++ assert_spin_locked(&(atom->alock)); ++ ++ if (list_empty_careful(&atom->flush_queues)) ++ return 0; ++ ++ list_for_each_entry(fq, &atom->flush_queues, alink) { ++ if (fq_ready(fq)) { ++ int ret; ++ ++ mark_fq_in_use(fq); ++ assert("vs-1247", fq->owner == NULL); ++ ON_DEBUG(fq->owner = current); ++ ret = finish_fq(fq, nr_io_errors); ++ ++ if (*nr_io_errors) ++ reiser4_handle_error(); ++ ++ if (ret) { ++ reiser4_fq_put(fq); ++ return ret; ++ } ++ ++ spin_unlock_atom(atom); ++ ++ return -E_REPEAT; ++ } ++ } ++ ++ /* All flush queues are in use; atom remains locked */ ++ return -EBUSY; ++} ++ ++/* wait all i/o for current atom */ ++int current_atom_finish_all_fq(void) ++{ ++ txn_atom *atom; ++ int nr_io_errors = 0; ++ int ret = 0; ++ ++ do { ++ while (1) { ++ atom = get_current_atom_locked(); ++ ret = finish_all_fq(atom, &nr_io_errors); ++ if (ret != -EBUSY) ++ break; ++ reiser4_atom_wait_event(atom); ++ } ++ } while (ret == -E_REPEAT); ++ ++ /* we do not need locked atom after this function finishes, SUCCESS or ++ -EBUSY are two return codes when atom remains locked after ++ finish_all_fq */ ++ if (!ret) ++ spin_unlock_atom(atom); ++ ++ assert_spin_not_locked(&(atom->alock)); ++ ++ if (ret) ++ return ret; ++ ++ if (nr_io_errors) ++ return RETERR(-EIO); ++ ++ return 0; ++} ++ ++/* change node->atom field for all jnode from given list */ ++static void ++scan_fq_and_update_atom_ref(struct list_head *list, txn_atom *atom) ++{ ++ jnode *cur; ++ ++ list_for_each_entry(cur, list, capture_link) { ++ spin_lock_jnode(cur); ++ cur->atom = atom; ++ spin_unlock_jnode(cur); ++ } ++} ++ ++/* support for atom fusion operation */ ++void reiser4_fuse_fq(txn_atom *to, txn_atom *from) ++{ ++ flush_queue_t *fq; ++ ++ assert_spin_locked(&(to->alock)); ++ assert_spin_locked(&(from->alock)); ++ ++ list_for_each_entry(fq, &from->flush_queues, alink) { ++ scan_fq_and_update_atom_ref(ATOM_FQ_LIST(fq), to); ++ spin_lock(&(fq->guard)); ++ fq->atom = to; ++ spin_unlock(&(fq->guard)); ++ } ++ ++ list_splice_init(&from->flush_queues, to->flush_queues.prev); ++ ++#if REISER4_DEBUG ++ to->num_queued += from->num_queued; ++ to->nr_flush_queues += from->nr_flush_queues; ++ from->nr_flush_queues = 0; ++#endif ++} ++ ++#if REISER4_DEBUG ++int atom_fq_parts_are_clean(txn_atom * atom) ++{ ++ assert("zam-915", atom != NULL); ++ return list_empty_careful(&atom->flush_queues); ++} ++#endif ++/* Bio i/o completion routine for reiser4 write operations. */ ++static int ++end_io_handler(struct bio *bio, unsigned int bytes_done UNUSED_ARG, ++ int err) ++{ ++ int i; ++ int nr_errors = 0; ++ flush_queue_t *fq; ++ ++ assert("zam-958", bio->bi_rw & WRITE); ++ ++ /* i/o op. is not fully completed */ ++ if (bio->bi_size != 0) ++ return 1; ++ ++ if (err == -EOPNOTSUPP) ++ set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); ++ ++ /* we expect that bio->private is set to NULL or fq object which is used ++ * for synchronization and error counting. */ ++ fq = bio->bi_private; ++ /* Check all elements of io_vec for correct write completion. */ ++ for (i = 0; i < bio->bi_vcnt; i += 1) { ++ struct page *pg = bio->bi_io_vec[i].bv_page; ++ ++ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { ++ SetPageError(pg); ++ nr_errors++; ++ } ++ ++ { ++ /* jnode WRITEBACK ("write is in progress bit") is ++ * atomically cleared here. */ ++ jnode *node; ++ ++ assert("zam-736", pg != NULL); ++ assert("zam-736", PagePrivate(pg)); ++ node = jprivate(pg); ++ ++ JF_CLR(node, JNODE_WRITEBACK); ++ } ++ ++ end_page_writeback(pg); ++ page_cache_release(pg); ++ } ++ ++ if (fq) { ++ /* count i/o error in fq object */ ++ atomic_add(nr_errors, &fq->nr_errors); ++ ++ /* If all write requests registered in this "fq" are done we up ++ * the waiter. */ ++ if (atomic_sub_and_test(bio->bi_vcnt, &fq->nr_submitted)) ++ wake_up(&fq->wait); ++ } ++ ++ bio_put(bio); ++ return 0; ++} ++ ++/* Count I/O requests which will be submitted by @bio in given flush queues ++ @fq */ ++void add_fq_to_bio(flush_queue_t * fq, struct bio *bio) ++{ ++ bio->bi_private = fq; ++ bio->bi_end_io = end_io_handler; ++ ++ if (fq) ++ atomic_add(bio->bi_vcnt, &fq->nr_submitted); ++} ++ ++/* Move all queued nodes out from @fq->prepped list. */ ++static void release_prepped_list(flush_queue_t * fq) ++{ ++ txn_atom *atom; ++ ++ assert("zam-904", fq_in_use(fq)); ++ atom = atom_locked_by_fq(fq); ++ ++ while (!list_empty(ATOM_FQ_LIST(fq))) { ++ jnode *cur; ++ ++ cur = list_entry(ATOM_FQ_LIST(fq)->next, jnode, capture_link); ++ list_del_init(&cur->capture_link); ++ ++ count_dequeued_node(fq); ++ spin_lock_jnode(cur); ++ assert("nikita-3154", !JF_ISSET(cur, JNODE_OVRWR)); ++ assert("nikita-3154", JF_ISSET(cur, JNODE_RELOC)); ++ assert("nikita-3154", JF_ISSET(cur, JNODE_FLUSH_QUEUED)); ++ JF_CLR(cur, JNODE_FLUSH_QUEUED); ++ ++ if (JF_ISSET(cur, JNODE_DIRTY)) { ++ list_add_tail(&cur->capture_link, ++ ATOM_DIRTY_LIST(atom, jnode_get_level(cur))); ++ ON_DEBUG(count_jnode(atom, cur, FQ_LIST, ++ DIRTY_LIST, 1)); ++ } else { ++ list_add_tail(&cur->capture_link, ATOM_CLEAN_LIST(atom)); ++ ON_DEBUG(count_jnode(atom, cur, FQ_LIST, ++ CLEAN_LIST, 1)); ++ } ++ ++ spin_unlock_jnode(cur); ++ } ++ ++ if (--atom->nr_running_queues == 0) ++ reiser4_atom_send_event(atom); ++ ++ spin_unlock_atom(atom); ++} ++ ++/* Submit write requests for nodes on the already filled flush queue @fq. ++ ++ @fq: flush queue object which contains jnodes we can (and will) write. ++ @return: number of submitted blocks (>=0) if success, otherwise -- an error ++ code (<0). */ ++int reiser4_write_fq(flush_queue_t * fq, long *nr_submitted, int flags) ++{ ++ int ret; ++ txn_atom *atom; ++ ++ while (1) { ++ atom = atom_locked_by_fq(fq); ++ assert("zam-924", atom); ++ /* do not write fq in parallel. */ ++ if (atom->nr_running_queues == 0 ++ || !(flags & WRITEOUT_SINGLE_STREAM)) ++ break; ++ reiser4_atom_wait_event(atom); ++ } ++ ++ atom->nr_running_queues++; ++ spin_unlock_atom(atom); ++ ++ ret = write_jnode_list(ATOM_FQ_LIST(fq), fq, nr_submitted, flags); ++ release_prepped_list(fq); ++ ++ return ret; ++} ++ ++/* Getting flush queue object for exclusive use by one thread. May require ++ several iterations which is indicated by -E_REPEAT return code. ++ ++ This function does not contain code for obtaining an atom lock because an ++ atom lock is obtained by different ways in different parts of reiser4, ++ usually it is current atom, but we need a possibility for getting fq for the ++ atom of given jnode. */ ++static int fq_by_atom_gfp(txn_atom *atom, flush_queue_t **new_fq, gfp_t gfp) ++{ ++ flush_queue_t *fq; ++ ++ assert_spin_locked(&(atom->alock)); ++ ++ fq = list_entry(atom->flush_queues.next, flush_queue_t, alink); ++ while (&atom->flush_queues != &fq->alink) { ++ spin_lock(&(fq->guard)); ++ ++ if (fq_ready(fq)) { ++ mark_fq_in_use(fq); ++ assert("vs-1246", fq->owner == NULL); ++ ON_DEBUG(fq->owner = current); ++ spin_unlock(&(fq->guard)); ++ ++ if (*new_fq) ++ done_fq(*new_fq); ++ ++ *new_fq = fq; ++ ++ return 0; ++ } ++ ++ spin_unlock(&(fq->guard)); ++ ++ fq = list_entry(fq->alink.next, flush_queue_t, alink); ++ } ++ ++ /* Use previously allocated fq object */ ++ if (*new_fq) { ++ mark_fq_in_use(*new_fq); ++ assert("vs-1248", (*new_fq)->owner == 0); ++ ON_DEBUG((*new_fq)->owner = current); ++ attach_fq(atom, *new_fq); ++ ++ return 0; ++ } ++ ++ spin_unlock_atom(atom); ++ ++ *new_fq = create_fq(gfp); ++ ++ if (*new_fq == NULL) ++ return RETERR(-ENOMEM); ++ ++ return RETERR(-E_REPEAT); ++} ++ ++int reiser4_fq_by_atom(txn_atom * atom, flush_queue_t ** new_fq) ++{ ++ return fq_by_atom_gfp(atom, new_fq, reiser4_ctx_gfp_mask_get()); ++} ++ ++/* A wrapper around reiser4_fq_by_atom for getting a flush queue ++ object for current atom, if success fq->atom remains locked. */ ++flush_queue_t *get_fq_for_current_atom(void) ++{ ++ flush_queue_t *fq = NULL; ++ txn_atom *atom; ++ int ret; ++ ++ do { ++ atom = get_current_atom_locked(); ++ ret = reiser4_fq_by_atom(atom, &fq); ++ } while (ret == -E_REPEAT); ++ ++ if (ret) ++ return ERR_PTR(ret); ++ return fq; ++} ++ ++/* Releasing flush queue object after exclusive use */ ++void reiser4_fq_put_nolock(flush_queue_t *fq) ++{ ++ assert("zam-747", fq->atom != NULL); ++ assert("zam-902", list_empty_careful(ATOM_FQ_LIST(fq))); ++ mark_fq_ready(fq); ++ assert("vs-1245", fq->owner == current); ++ ON_DEBUG(fq->owner = NULL); ++} ++ ++void reiser4_fq_put(flush_queue_t * fq) ++{ ++ txn_atom *atom; ++ ++ spin_lock(&(fq->guard)); ++ atom = atom_locked_by_fq_nolock(fq); ++ ++ assert("zam-746", atom != NULL); ++ ++ reiser4_fq_put_nolock(fq); ++ reiser4_atom_send_event(atom); ++ ++ spin_unlock(&(fq->guard)); ++ spin_unlock_atom(atom); ++} ++ ++/* A part of atom object initialization related to the embedded flush queue ++ list head */ ++ ++void init_atom_fq_parts(txn_atom *atom) ++{ ++ INIT_LIST_HEAD(&atom->flush_queues); ++} ++ ++#if REISER4_DEBUG ++ ++void reiser4_check_fq(const txn_atom *atom) ++{ ++ /* check number of nodes on all atom's flush queues */ ++ flush_queue_t *fq; ++ int count; ++ struct list_head *pos; ++ ++ count = 0; ++ list_for_each_entry(fq, &atom->flush_queues, alink) { ++ spin_lock(&(fq->guard)); ++ /* calculate number of jnodes on fq' list of prepped jnodes */ ++ list_for_each(pos, ATOM_FQ_LIST(fq)) ++ count++; ++ spin_unlock(&(fq->guard)); ++ } ++ if (count != atom->fq) ++ warning("", "fq counter %d, real %d\n", atom->fq, count); ++ ++} ++ ++#endif ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/forward.h b/fs/reiser4/forward.h +new file mode 100644 +index 0000000..8536833 +--- /dev/null ++++ b/fs/reiser4/forward.h +@@ -0,0 +1,256 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Forward declarations. Thank you Kernighan. */ ++ ++#if !defined( __REISER4_FORWARD_H__ ) ++#define __REISER4_FORWARD_H__ ++ ++#include ++#include ++ ++typedef struct zlock zlock; ++typedef struct lock_stack lock_stack; ++typedef struct lock_handle lock_handle; ++typedef struct znode znode; ++typedef struct flow flow_t; ++typedef struct coord coord_t; ++typedef struct tree_access_pointer tap_t; ++typedef struct item_coord item_coord; ++typedef struct shift_params shift_params; ++typedef struct reiser4_object_create_data reiser4_object_create_data; ++typedef union reiser4_plugin reiser4_plugin; ++typedef __u16 reiser4_plugin_id; ++typedef __u64 reiser4_plugin_groups; ++typedef struct item_plugin item_plugin; ++typedef struct jnode_plugin jnode_plugin; ++typedef struct reiser4_item_data reiser4_item_data; ++typedef union reiser4_key reiser4_key; ++typedef struct reiser4_tree reiser4_tree; ++typedef struct carry_cut_data carry_cut_data; ++typedef struct carry_kill_data carry_kill_data; ++typedef struct carry_tree_op carry_tree_op; ++typedef struct carry_tree_node carry_tree_node; ++typedef struct carry_plugin_info carry_plugin_info; ++typedef struct reiser4_journal reiser4_journal; ++typedef struct txn_atom txn_atom; ++typedef struct txn_handle txn_handle; ++typedef struct txn_mgr txn_mgr; ++typedef struct reiser4_dir_entry_desc reiser4_dir_entry_desc; ++typedef struct reiser4_context reiser4_context; ++typedef struct carry_level carry_level; ++typedef struct blocknr_set_entry blocknr_set_entry; ++/* super_block->s_fs_info points to this */ ++typedef struct reiser4_super_info_data reiser4_super_info_data; ++/* next two objects are fields of reiser4_super_info_data */ ++typedef struct reiser4_oid_allocator reiser4_oid_allocator; ++typedef struct reiser4_space_allocator reiser4_space_allocator; ++ ++typedef struct flush_scan flush_scan; ++typedef struct flush_position flush_pos_t; ++ ++typedef unsigned short pos_in_node_t; ++#define MAX_POS_IN_NODE 65535 ++ ++typedef struct jnode jnode; ++typedef struct reiser4_blocknr_hint reiser4_blocknr_hint; ++ ++typedef struct uf_coord uf_coord_t; ++typedef struct hint hint_t; ++ ++typedef struct ktxnmgrd_context ktxnmgrd_context; ++ ++typedef struct reiser4_xattr_plugin reiser4_xattr_plugin; ++ ++struct inode; ++struct page; ++struct file; ++struct dentry; ++struct super_block; ++ ++/* return values of coord_by_key(). cbk == coord_by_key */ ++typedef enum { ++ CBK_COORD_FOUND = 0, ++ CBK_COORD_NOTFOUND = -ENOENT, ++} lookup_result; ++ ++/* results of lookup with directory file */ ++typedef enum { ++ FILE_NAME_FOUND = 0, ++ FILE_NAME_NOTFOUND = -ENOENT, ++ FILE_IO_ERROR = -EIO, /* FIXME: it seems silly to have special OOM, IO_ERROR return codes for each search. */ ++ FILE_OOM = -ENOMEM /* FIXME: it seems silly to have special OOM, IO_ERROR return codes for each search. */ ++} file_lookup_result; ++ ++/* behaviors of lookup. If coord we are looking for is actually in a tree, ++ both coincide. */ ++typedef enum { ++ /* search exactly for the coord with key given */ ++ FIND_EXACT, ++ /* search for coord with the maximal key not greater than one ++ given */ ++ FIND_MAX_NOT_MORE_THAN /*LEFT_SLANT_BIAS */ ++} lookup_bias; ++ ++typedef enum { ++ /* number of leaf level of the tree ++ The fake root has (tree_level=0). */ ++ LEAF_LEVEL = 1, ++ ++ /* number of level one above leaf level of the tree. ++ ++ It is supposed that internal tree used by reiser4 to store file ++ system data and meta data will have height 2 initially (when ++ created by mkfs). ++ */ ++ TWIG_LEVEL = 2, ++} tree_level; ++ ++/* The "real" maximum ztree height is the 0-origin size of any per-level ++ array, since the zero'th level is not used. */ ++#define REAL_MAX_ZTREE_HEIGHT (REISER4_MAX_ZTREE_HEIGHT-LEAF_LEVEL) ++ ++/* enumeration of possible mutual position of item and coord. This enum is ++ return type of ->is_in_item() item plugin method which see. */ ++typedef enum { ++ /* coord is on the left of an item */ ++ IP_ON_THE_LEFT, ++ /* coord is inside item */ ++ IP_INSIDE, ++ /* coord is inside item, but to the right of the rightmost unit of ++ this item */ ++ IP_RIGHT_EDGE, ++ /* coord is on the right of an item */ ++ IP_ON_THE_RIGHT ++} interposition; ++ ++/* type of lock to acquire on znode before returning it to caller */ ++typedef enum { ++ ZNODE_NO_LOCK = 0, ++ ZNODE_READ_LOCK = 1, ++ ZNODE_WRITE_LOCK = 2, ++} znode_lock_mode; ++ ++/* type of lock request */ ++typedef enum { ++ ZNODE_LOCK_LOPRI = 0, ++ ZNODE_LOCK_HIPRI = (1 << 0), ++ ++ /* By setting the ZNODE_LOCK_NONBLOCK flag in a lock request the call to longterm_lock_znode will not sleep ++ waiting for the lock to become available. If the lock is unavailable, reiser4_znode_lock will immediately ++ return the value -E_REPEAT. */ ++ ZNODE_LOCK_NONBLOCK = (1 << 1), ++ /* An option for longterm_lock_znode which prevents atom fusion */ ++ ZNODE_LOCK_DONT_FUSE = (1 << 2) ++} znode_lock_request; ++ ++typedef enum { READ_OP = 0, WRITE_OP = 1 } rw_op; ++ ++/* used to specify direction of shift. These must be -1 and 1 */ ++typedef enum { ++ SHIFT_LEFT = 1, ++ SHIFT_RIGHT = -1 ++} shift_direction; ++ ++typedef enum { ++ LEFT_SIDE, ++ RIGHT_SIDE ++} sideof; ++ ++#define round_up( value, order ) \ ++ ( ( typeof( value ) )( ( ( long ) ( value ) + ( order ) - 1U ) & \ ++ ~( ( order ) - 1 ) ) ) ++ ++/* values returned by squalloc_right_neighbor and its auxiliary functions */ ++typedef enum { ++ /* unit of internal item is moved */ ++ SUBTREE_MOVED = 0, ++ /* nothing else can be squeezed into left neighbor */ ++ SQUEEZE_TARGET_FULL = 1, ++ /* all content of node is squeezed into its left neighbor */ ++ SQUEEZE_SOURCE_EMPTY = 2, ++ /* one more item is copied (this is only returned by ++ allocate_and_copy_extent to squalloc_twig)) */ ++ SQUEEZE_CONTINUE = 3 ++} squeeze_result; ++ ++/* Do not change items ids. If you do - there will be format change */ ++typedef enum { ++ STATIC_STAT_DATA_ID = 0x0, ++ SIMPLE_DIR_ENTRY_ID = 0x1, ++ COMPOUND_DIR_ID = 0x2, ++ NODE_POINTER_ID = 0x3, ++ EXTENT_POINTER_ID = 0x5, ++ FORMATTING_ID = 0x6, ++ CTAIL_ID = 0x7, ++ BLACK_BOX_ID = 0x8, ++ LAST_ITEM_ID = 0x9 ++} item_id; ++ ++/* Flags passed to jnode_flush() to allow it to distinguish default settings based on ++ whether commit() was called or VM memory pressure was applied. */ ++typedef enum { ++ /* submit flush queue to disk at jnode_flush completion */ ++ JNODE_FLUSH_WRITE_BLOCKS = 1, ++ ++ /* flush is called for commit */ ++ JNODE_FLUSH_COMMIT = 2, ++ /* not implemented */ ++ JNODE_FLUSH_MEMORY_FORMATTED = 4, ++ ++ /* not implemented */ ++ JNODE_FLUSH_MEMORY_UNFORMATTED = 8, ++} jnode_flush_flags; ++ ++/* Flags to insert/paste carry operations. Currently they only used in ++ flushing code, but in future, they can be used to optimize for repetitive ++ accesses. */ ++typedef enum { ++ /* carry is not allowed to shift data to the left when trying to find ++ free space */ ++ COPI_DONT_SHIFT_LEFT = (1 << 0), ++ /* carry is not allowed to shift data to the right when trying to find ++ free space */ ++ COPI_DONT_SHIFT_RIGHT = (1 << 1), ++ /* carry is not allowed to allocate new node(s) when trying to find ++ free space */ ++ COPI_DONT_ALLOCATE = (1 << 2), ++ /* try to load left neighbor if its not in a cache */ ++ COPI_LOAD_LEFT = (1 << 3), ++ /* try to load right neighbor if its not in a cache */ ++ COPI_LOAD_RIGHT = (1 << 4), ++ /* shift insertion point to the left neighbor */ ++ COPI_GO_LEFT = (1 << 5), ++ /* shift insertion point to the right neighbor */ ++ COPI_GO_RIGHT = (1 << 6), ++ /* try to step back into original node if insertion into new node ++ fails after shifting data there. */ ++ COPI_STEP_BACK = (1 << 7) ++} cop_insert_flag; ++ ++typedef enum { ++ SAFE_UNLINK, /* safe-link for unlink */ ++ SAFE_TRUNCATE /* safe-link for truncate */ ++} reiser4_safe_link_t; ++ ++/* this is to show on which list of atom jnode is */ ++typedef enum { ++ NOT_CAPTURED, ++ DIRTY_LIST, ++ CLEAN_LIST, ++ FQ_LIST, ++ WB_LIST, ++ OVRWR_LIST ++} atom_list; ++ ++/* __REISER4_FORWARD_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/fsdata.c b/fs/reiser4/fsdata.c +new file mode 100644 +index 0000000..47da01c +--- /dev/null ++++ b/fs/reiser4/fsdata.c +@@ -0,0 +1,804 @@ ++/* Copyright 2001, 2002, 2003, 2004, 2005 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#include "fsdata.h" ++#include "inode.h" ++ ++ ++/* cache or dir_cursors */ ++static struct kmem_cache *d_cursor_cache; ++static struct shrinker *d_cursor_shrinker; ++ ++/* list of unused cursors */ ++static LIST_HEAD(cursor_cache); ++ ++/* number of cursors in list of ununsed cursors */ ++static unsigned long d_cursor_unused = 0; ++ ++/* spinlock protecting manipulations with dir_cursor's hash table and lists */ ++DEFINE_SPINLOCK(d_lock); ++ ++static reiser4_file_fsdata *create_fsdata(struct file *file); ++static int file_is_stateless(struct file *file); ++static void free_fsdata(reiser4_file_fsdata *fsdata); ++static void kill_cursor(dir_cursor *); ++ ++/** ++ * d_cursor_shrink - shrink callback for cache of dir_cursor-s ++ * @nr: number of objects to free ++ * @mask: GFP mask ++ * ++ * Shrinks d_cursor_cache. Scan LRU list of unused cursors, freeing requested ++ * number. Return number of still freeable cursors. ++ */ ++static int d_cursor_shrink(int nr, gfp_t mask) ++{ ++ if (nr != 0) { ++ dir_cursor *scan; ++ int killed; ++ ++ killed = 0; ++ spin_lock(&d_lock); ++ while (!list_empty(&cursor_cache)) { ++ scan = list_entry(cursor_cache.next, dir_cursor, alist); ++ assert("nikita-3567", scan->ref == 0); ++ kill_cursor(scan); ++ ++killed; ++ --nr; ++ if (nr == 0) ++ break; ++ } ++ spin_unlock(&d_lock); ++ } ++ return d_cursor_unused; ++} ++ ++/** ++ * reiser4_init_d_cursor - create d_cursor cache ++ * ++ * Initializes slab cache of d_cursors. It is part of reiser4 module ++ * initialization. ++ */ ++int reiser4_init_d_cursor(void) ++{ ++ d_cursor_cache = kmem_cache_create("d_cursor", sizeof(dir_cursor), 0, ++ SLAB_HWCACHE_ALIGN, NULL, NULL); ++ if (d_cursor_cache == NULL) ++ return RETERR(-ENOMEM); ++ ++ /* ++ * actually, d_cursors are "priceless", because there is no way to ++ * recover information stored in them. On the other hand, we don't ++ * want to consume all kernel memory by them. As a compromise, just ++ * assign higher "seeks" value to d_cursor cache, so that it will be ++ * shrunk only if system is really tight on memory. ++ */ ++ d_cursor_shrinker = set_shrinker(DEFAULT_SEEKS << 3, ++ d_cursor_shrink); ++ if (d_cursor_shrinker == NULL) { ++ destroy_reiser4_cache(&d_cursor_cache); ++ d_cursor_cache = NULL; ++ return RETERR(-ENOMEM); ++ } ++ return 0; ++} ++ ++/** ++ * reiser4_done_d_cursor - delete d_cursor cache and d_cursor shrinker ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++void reiser4_done_d_cursor(void) ++{ ++ BUG_ON(d_cursor_shrinker == NULL); ++ remove_shrinker(d_cursor_shrinker); ++ d_cursor_shrinker = NULL; ++ ++ destroy_reiser4_cache(&d_cursor_cache); ++} ++ ++#define D_CURSOR_TABLE_SIZE (256) ++ ++static inline unsigned long ++d_cursor_hash(d_cursor_hash_table *table, const d_cursor_key *key) ++{ ++ assert("nikita-3555", IS_POW(D_CURSOR_TABLE_SIZE)); ++ return (key->oid + key->cid) & (D_CURSOR_TABLE_SIZE - 1); ++} ++ ++static inline int d_cursor_eq(const d_cursor_key *k1, const d_cursor_key *k2) ++{ ++ return k1->cid == k2->cid && k1->oid == k2->oid; ++} ++ ++/* ++ * define functions to manipulate reiser4 super block's hash table of ++ * dir_cursors ++ */ ++#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get()) ++#define KFREE(ptr, size) kfree(ptr) ++TYPE_SAFE_HASH_DEFINE(d_cursor, ++ dir_cursor, ++ d_cursor_key, key, hash, d_cursor_hash, d_cursor_eq); ++#undef KFREE ++#undef KMALLOC ++ ++/** ++ * reiser4_init_super_d_info - initialize per-super-block d_cursor resources ++ * @super: super block to initialize ++ * ++ * Initializes per-super-block d_cursor's hash table and radix tree. It is part ++ * of mount. ++ */ ++int reiser4_init_super_d_info(struct super_block *super) ++{ ++ d_cursor_info *p; ++ ++ p = &get_super_private(super)->d_info; ++ ++ INIT_RADIX_TREE(&p->tree, reiser4_ctx_gfp_mask_get()); ++ return d_cursor_hash_init(&p->table, D_CURSOR_TABLE_SIZE); ++} ++ ++/** ++ * reiser4_done_super_d_info - release per-super-block d_cursor resources ++ * @super: super block being umounted ++ * ++ * It is called on umount. Kills all directory cursors attached to suoer block. ++ */ ++void reiser4_done_super_d_info(struct super_block *super) ++{ ++ d_cursor_info *d_info; ++ dir_cursor *cursor, *next; ++ ++ d_info = &get_super_private(super)->d_info; ++ for_all_in_htable(&d_info->table, d_cursor, cursor, next) ++ kill_cursor(cursor); ++ ++ BUG_ON(d_info->tree.rnode != NULL); ++ d_cursor_hash_done(&d_info->table); ++} ++ ++/** ++ * kill_cursor - free dir_cursor and reiser4_file_fsdata attached to it ++ * @cursor: cursor to free ++ * ++ * Removes reiser4_file_fsdata attached to @cursor from readdir list of ++ * reiser4_inode, frees that reiser4_file_fsdata. Removes @cursor from from ++ * indices, hash table, list of unused cursors and frees it. ++ */ ++static void kill_cursor(dir_cursor *cursor) ++{ ++ unsigned long index; ++ ++ assert("nikita-3566", cursor->ref == 0); ++ assert("nikita-3572", cursor->fsdata != NULL); ++ ++ index = (unsigned long)cursor->key.oid; ++ list_del_init(&cursor->fsdata->dir.linkage); ++ free_fsdata(cursor->fsdata); ++ cursor->fsdata = NULL; ++ ++ if (list_empty_careful(&cursor->list)) ++ /* this is last cursor for a file. Kill radix-tree entry */ ++ radix_tree_delete(&cursor->info->tree, index); ++ else { ++ void **slot; ++ ++ /* ++ * there are other cursors for the same oid. ++ */ ++ ++ /* ++ * if radix tree point to the cursor being removed, re-target ++ * radix tree slot to the next cursor in the (non-empty as was ++ * checked above) element of the circular list of all cursors ++ * for this oid. ++ */ ++ slot = radix_tree_lookup_slot(&cursor->info->tree, index); ++ assert("nikita-3571", *slot != NULL); ++ if (*slot == cursor) ++ *slot = list_entry(cursor->list.next, dir_cursor, list); ++ /* remove cursor from circular list */ ++ list_del_init(&cursor->list); ++ } ++ /* remove cursor from the list of unused cursors */ ++ list_del_init(&cursor->alist); ++ /* remove cursor from the hash table */ ++ d_cursor_hash_remove(&cursor->info->table, cursor); ++ /* and free it */ ++ kmem_cache_free(d_cursor_cache, cursor); ++ --d_cursor_unused; ++} ++ ++/* possible actions that can be performed on all cursors for the given file */ ++enum cursor_action { ++ /* ++ * load all detached state: this is called when stat-data is loaded ++ * from the disk to recover information about all pending readdirs ++ */ ++ CURSOR_LOAD, ++ /* ++ * detach all state from inode, leaving it in the cache. This is called ++ * when inode is removed form the memory by memory pressure ++ */ ++ CURSOR_DISPOSE, ++ /* ++ * detach cursors from the inode, and free them. This is called when ++ * inode is destroyed ++ */ ++ CURSOR_KILL ++}; ++ ++/* ++ * return d_cursor data for the file system @inode is in. ++ */ ++static inline d_cursor_info *d_info(struct inode *inode) ++{ ++ return &get_super_private(inode->i_sb)->d_info; ++} ++ ++/* ++ * lookup d_cursor in the per-super-block radix tree. ++ */ ++static inline dir_cursor *lookup(d_cursor_info * info, unsigned long index) ++{ ++ return (dir_cursor *) radix_tree_lookup(&info->tree, index); ++} ++ ++/* ++ * attach @cursor to the radix tree. There may be multiple cursors for the ++ * same oid, they are chained into circular list. ++ */ ++static void bind_cursor(dir_cursor * cursor, unsigned long index) ++{ ++ dir_cursor *head; ++ ++ head = lookup(cursor->info, index); ++ if (head == NULL) { ++ /* this is the first cursor for this index */ ++ INIT_LIST_HEAD(&cursor->list); ++ radix_tree_insert(&cursor->info->tree, index, cursor); ++ } else { ++ /* some cursor already exists. Chain ours */ ++ list_add(&cursor->list, &head->list); ++ } ++} ++ ++/* ++ * detach fsdata (if detachable) from file descriptor, and put cursor on the ++ * "unused" list. Called when file descriptor is not longer in active use. ++ */ ++static void clean_fsdata(struct file *file) ++{ ++ dir_cursor *cursor; ++ reiser4_file_fsdata *fsdata; ++ ++ assert("nikita-3570", file_is_stateless(file)); ++ ++ fsdata = (reiser4_file_fsdata *) file->private_data; ++ if (fsdata != NULL) { ++ cursor = fsdata->cursor; ++ if (cursor != NULL) { ++ spin_lock(&d_lock); ++ --cursor->ref; ++ if (cursor->ref == 0) { ++ list_add_tail(&cursor->alist, &cursor_cache); ++ ++d_cursor_unused; ++ } ++ spin_unlock(&d_lock); ++ file->private_data = NULL; ++ } ++ } ++} ++ ++/* ++ * global counter used to generate "client ids". These ids are encoded into ++ * high bits of fpos. ++ */ ++static __u32 cid_counter = 0; ++#define CID_SHIFT (20) ++#define CID_MASK (0xfffffull) ++ ++static void free_file_fsdata_nolock(struct file *); ++ ++/** ++ * insert_cursor - allocate file_fsdata, insert cursor to tree and hash table ++ * @cursor: ++ * @file: ++ * @inode: ++ * ++ * Allocates reiser4_file_fsdata, attaches it to @cursor, inserts cursor to ++ * reiser4 super block's hash table and radix tree. ++ add detachable readdir ++ * state to the @f ++ */ ++static int insert_cursor(dir_cursor *cursor, struct file *file, ++ struct inode *inode) ++{ ++ int result; ++ reiser4_file_fsdata *fsdata; ++ ++ memset(cursor, 0, sizeof *cursor); ++ ++ /* this is either first call to readdir, or rewind. Anyway, create new ++ * cursor. */ ++ fsdata = create_fsdata(NULL); ++ if (fsdata != NULL) { ++ result = radix_tree_preload(reiser4_ctx_gfp_mask_get()); ++ if (result == 0) { ++ d_cursor_info *info; ++ oid_t oid; ++ ++ info = d_info(inode); ++ oid = get_inode_oid(inode); ++ /* cid occupies higher 12 bits of f->f_pos. Don't ++ * allow it to become negative: this confuses ++ * nfsd_readdir() */ ++ cursor->key.cid = (++cid_counter) & 0x7ff; ++ cursor->key.oid = oid; ++ cursor->fsdata = fsdata; ++ cursor->info = info; ++ cursor->ref = 1; ++ ++ spin_lock_inode(inode); ++ /* install cursor as @f's private_data, discarding old ++ * one if necessary */ ++#if REISER4_DEBUG ++ if (file->private_data) ++ warning("", "file has fsdata already"); ++#endif ++ clean_fsdata(file); ++ free_file_fsdata_nolock(file); ++ file->private_data = fsdata; ++ fsdata->cursor = cursor; ++ spin_unlock_inode(inode); ++ spin_lock(&d_lock); ++ /* insert cursor into hash table */ ++ d_cursor_hash_insert(&info->table, cursor); ++ /* and chain it into radix-tree */ ++ bind_cursor(cursor, (unsigned long)oid); ++ spin_unlock(&d_lock); ++ radix_tree_preload_end(); ++ file->f_pos = ((__u64) cursor->key.cid) << CID_SHIFT; ++ } ++ } else ++ result = RETERR(-ENOMEM); ++ return result; ++} ++ ++/** ++ * process_cursors - do action on each cursor attached to inode ++ * @inode: ++ * @act: action to do ++ * ++ * Finds all cursors of @inode in reiser4's super block radix tree of cursors ++ * and performs action specified by @act on each of cursors. ++ */ ++static void process_cursors(struct inode *inode, enum cursor_action act) ++{ ++ oid_t oid; ++ dir_cursor *start; ++ struct list_head *head; ++ reiser4_context *ctx; ++ d_cursor_info *info; ++ ++ /* this can be called by ++ * ++ * kswapd->...->prune_icache->..reiser4_destroy_inode ++ * ++ * without reiser4_context ++ */ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) { ++ warning("vs-23", "failed to init context"); ++ return; ++ } ++ ++ assert("nikita-3558", inode != NULL); ++ ++ info = d_info(inode); ++ oid = get_inode_oid(inode); ++ spin_lock_inode(inode); ++ head = get_readdir_list(inode); ++ spin_lock(&d_lock); ++ /* find any cursor for this oid: reference to it is hanging of radix ++ * tree */ ++ start = lookup(info, (unsigned long)oid); ++ if (start != NULL) { ++ dir_cursor *scan; ++ reiser4_file_fsdata *fsdata; ++ ++ /* process circular list of cursors for this oid */ ++ scan = start; ++ do { ++ dir_cursor *next; ++ ++ next = list_entry(scan->list.next, dir_cursor, list); ++ fsdata = scan->fsdata; ++ assert("nikita-3557", fsdata != NULL); ++ if (scan->key.oid == oid) { ++ switch (act) { ++ case CURSOR_DISPOSE: ++ list_del_init(&fsdata->dir.linkage); ++ break; ++ case CURSOR_LOAD: ++ list_add(&fsdata->dir.linkage, head); ++ break; ++ case CURSOR_KILL: ++ kill_cursor(scan); ++ break; ++ } ++ } ++ if (scan == next) ++ /* last cursor was just killed */ ++ break; ++ scan = next; ++ } while (scan != start); ++ } ++ spin_unlock(&d_lock); ++ /* check that we killed 'em all */ ++ assert("nikita-3568", ++ ergo(act == CURSOR_KILL, ++ list_empty_careful(get_readdir_list(inode)))); ++ assert("nikita-3569", ++ ergo(act == CURSOR_KILL, lookup(info, oid) == NULL)); ++ spin_unlock_inode(inode); ++ reiser4_exit_context(ctx); ++} ++ ++/** ++ * reiser4_dispose_cursors - removes cursors from inode's list ++ * @inode: inode to dispose cursors of ++ * ++ * For each of cursors corresponding to @inode - removes reiser4_file_fsdata ++ * attached to cursor from inode's readdir list. This is called when inode is ++ * removed from the memory by memory pressure. ++ */ ++void reiser4_dispose_cursors(struct inode *inode) ++{ ++ process_cursors(inode, CURSOR_DISPOSE); ++} ++ ++/** ++ * reiser4_load_cursors - attach cursors to inode ++ * @inode: inode to load cursors to ++ * ++ * For each of cursors corresponding to @inode - attaches reiser4_file_fsdata ++ * attached to cursor to inode's readdir list. This is done when inode is ++ * loaded into memory. ++ */ ++void reiser4_load_cursors(struct inode *inode) ++{ ++ process_cursors(inode, CURSOR_LOAD); ++} ++ ++/** ++ * reiser4_kill_cursors - kill all inode cursors ++ * @inode: inode to kill cursors of ++ * ++ * Frees all cursors for this inode. This is called when inode is destroyed. ++ */ ++void reiser4_kill_cursors(struct inode *inode) ++{ ++ process_cursors(inode, CURSOR_KILL); ++} ++ ++/** ++ * file_is_stateless - ++ * @file: ++ * ++ * true, if file descriptor @f is created by NFS server by "demand" to serve ++ * one file system operation. This means that there may be "detached state" ++ * for underlying inode. ++ */ ++static int file_is_stateless(struct file *file) ++{ ++ return reiser4_get_dentry_fsdata(file->f_dentry)->stateless; ++} ++ ++/** ++ * reiser4_get_dir_fpos - ++ * @dir: ++ * ++ * Calculates ->fpos from user-supplied cookie. Normally it is dir->f_pos, but ++ * in the case of stateless directory operation (readdir-over-nfs), client id ++ * was encoded in the high bits of cookie and should me masked off. ++ */ ++loff_t reiser4_get_dir_fpos(struct file *dir) ++{ ++ if (file_is_stateless(dir)) ++ return dir->f_pos & CID_MASK; ++ else ++ return dir->f_pos; ++} ++ ++/** ++ * reiser4_attach_fsdata - try to attach fsdata ++ * @file: ++ * @inode: ++ * ++ * Finds or creates cursor for readdir-over-nfs. ++ */ ++int reiser4_attach_fsdata(struct file *file, struct inode *inode) ++{ ++ loff_t pos; ++ int result; ++ dir_cursor *cursor; ++ ++ /* ++ * we are serialized by inode->i_mutex ++ */ ++ if (!file_is_stateless(file)) ++ return 0; ++ ++ pos = file->f_pos; ++ result = 0; ++ if (pos == 0) { ++ /* ++ * first call to readdir (or rewind to the beginning of ++ * directory) ++ */ ++ cursor = kmem_cache_alloc(d_cursor_cache, ++ reiser4_ctx_gfp_mask_get()); ++ if (cursor != NULL) ++ result = insert_cursor(cursor, file, inode); ++ else ++ result = RETERR(-ENOMEM); ++ } else { ++ /* try to find existing cursor */ ++ d_cursor_key key; ++ ++ key.cid = pos >> CID_SHIFT; ++ key.oid = get_inode_oid(inode); ++ spin_lock(&d_lock); ++ cursor = d_cursor_hash_find(&d_info(inode)->table, &key); ++ if (cursor != NULL) { ++ /* cursor was found */ ++ if (cursor->ref == 0) { ++ /* move it from unused list */ ++ list_del_init(&cursor->alist); ++ --d_cursor_unused; ++ } ++ ++cursor->ref; ++ } ++ spin_unlock(&d_lock); ++ if (cursor != NULL) { ++ spin_lock_inode(inode); ++ assert("nikita-3556", cursor->fsdata->back == NULL); ++ clean_fsdata(file); ++ free_file_fsdata_nolock(file); ++ file->private_data = cursor->fsdata; ++ spin_unlock_inode(inode); ++ } ++ } ++ return result; ++} ++ ++/** ++ * reiser4_detach_fsdata - ??? ++ * @file: ++ * ++ * detach fsdata, if necessary ++ */ ++void reiser4_detach_fsdata(struct file *file) ++{ ++ struct inode *inode; ++ ++ if (!file_is_stateless(file)) ++ return; ++ ++ inode = file->f_dentry->d_inode; ++ spin_lock_inode(inode); ++ clean_fsdata(file); ++ spin_unlock_inode(inode); ++} ++ ++/* slab for reiser4_dentry_fsdata */ ++static struct kmem_cache *dentry_fsdata_cache; ++ ++/** ++ * reiser4_init_dentry_fsdata - create cache of dentry_fsdata ++ * ++ * Initializes slab cache of structures attached to denty->d_fsdata. It is ++ * part of reiser4 module initialization. ++ */ ++int reiser4_init_dentry_fsdata(void) ++{ ++ dentry_fsdata_cache = kmem_cache_create("dentry_fsdata", ++ sizeof(reiser4_dentry_fsdata), ++ 0, ++ SLAB_HWCACHE_ALIGN | ++ SLAB_RECLAIM_ACCOUNT, NULL, ++ NULL); ++ if (dentry_fsdata_cache == NULL) ++ return RETERR(-ENOMEM); ++ return 0; ++} ++ ++/** ++ * reiser4_done_dentry_fsdata - delete cache of dentry_fsdata ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++void reiser4_done_dentry_fsdata(void) ++{ ++ destroy_reiser4_cache(&dentry_fsdata_cache); ++} ++ ++/** ++ * reiser4_get_dentry_fsdata - get fs-specific dentry data ++ * @dentry: queried dentry ++ * ++ * Allocates if necessary and returns per-dentry data that we attach to each ++ * dentry. ++ */ ++reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *dentry) ++{ ++ assert("nikita-1365", dentry != NULL); ++ ++ if (dentry->d_fsdata == NULL) { ++ dentry->d_fsdata = kmem_cache_alloc(dentry_fsdata_cache, ++ reiser4_ctx_gfp_mask_get()); ++ if (dentry->d_fsdata == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ memset(dentry->d_fsdata, 0, sizeof(reiser4_dentry_fsdata)); ++ } ++ return dentry->d_fsdata; ++} ++ ++/** ++ * reiser4_free_dentry_fsdata - detach and free dentry_fsdata ++ * @dentry: dentry to free fsdata of ++ * ++ * Detaches and frees fs-specific dentry data ++ */ ++void reiser4_free_dentry_fsdata(struct dentry *dentry) ++{ ++ if (dentry->d_fsdata != NULL) { ++ kmem_cache_free(dentry_fsdata_cache, dentry->d_fsdata); ++ dentry->d_fsdata = NULL; ++ } ++} ++ ++/* slab for reiser4_file_fsdata */ ++static struct kmem_cache *file_fsdata_cache; ++ ++/** ++ * reiser4_init_file_fsdata - create cache of reiser4_file_fsdata ++ * ++ * Initializes slab cache of structures attached to file->private_data. It is ++ * part of reiser4 module initialization. ++ */ ++int reiser4_init_file_fsdata(void) ++{ ++ file_fsdata_cache = kmem_cache_create("file_fsdata", ++ sizeof(reiser4_file_fsdata), ++ 0, ++ SLAB_HWCACHE_ALIGN | ++ SLAB_RECLAIM_ACCOUNT, NULL, NULL); ++ if (file_fsdata_cache == NULL) ++ return RETERR(-ENOMEM); ++ return 0; ++} ++ ++/** ++ * reiser4_done_file_fsdata - delete cache of reiser4_file_fsdata ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++void reiser4_done_file_fsdata(void) ++{ ++ destroy_reiser4_cache(&file_fsdata_cache); ++} ++ ++/** ++ * create_fsdata - allocate and initialize reiser4_file_fsdata ++ * @file: what to create file_fsdata for, may be NULL ++ * ++ * Allocates and initializes reiser4_file_fsdata structure. ++ */ ++static reiser4_file_fsdata *create_fsdata(struct file *file) ++{ ++ reiser4_file_fsdata *fsdata; ++ ++ fsdata = kmem_cache_alloc(file_fsdata_cache, ++ reiser4_ctx_gfp_mask_get()); ++ if (fsdata != NULL) { ++ memset(fsdata, 0, sizeof *fsdata); ++ fsdata->ra1.max_window_size = VM_MAX_READAHEAD * 1024; ++ fsdata->back = file; ++ INIT_LIST_HEAD(&fsdata->dir.linkage); ++ } ++ return fsdata; ++} ++ ++/** ++ * free_fsdata - free reiser4_file_fsdata ++ * @fsdata: object to free ++ * ++ * Dual to create_fsdata(). Free reiser4_file_fsdata. ++ */ ++static void free_fsdata(reiser4_file_fsdata *fsdata) ++{ ++ BUG_ON(fsdata == NULL); ++ kmem_cache_free(file_fsdata_cache, fsdata); ++} ++ ++/** ++ * reiser4_get_file_fsdata - get fs-specific file data ++ * @file: queried file ++ * ++ * Returns fs-specific data of @file. If it is NULL, allocates it and attaches ++ * to @file. ++ */ ++reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *file) ++{ ++ assert("nikita-1603", file != NULL); ++ ++ if (file->private_data == NULL) { ++ reiser4_file_fsdata *fsdata; ++ struct inode *inode; ++ ++ fsdata = create_fsdata(file); ++ if (fsdata == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ ++ inode = file->f_dentry->d_inode; ++ spin_lock_inode(inode); ++ if (file->private_data == NULL) { ++ file->private_data = fsdata; ++ fsdata = NULL; ++ } ++ spin_unlock_inode(inode); ++ if (fsdata != NULL) ++ /* other thread initialized ->fsdata */ ++ kmem_cache_free(file_fsdata_cache, fsdata); ++ } ++ assert("nikita-2665", file->private_data != NULL); ++ return file->private_data; ++} ++ ++/** ++ * free_file_fsdata_nolock - detach and free reiser4_file_fsdata ++ * @file: ++ * ++ * Detaches reiser4_file_fsdata from @file, removes reiser4_file_fsdata from ++ * readdir list, frees if it is not linked to d_cursor object. ++ */ ++static void free_file_fsdata_nolock(struct file *file) ++{ ++ reiser4_file_fsdata *fsdata; ++ ++ assert("", spin_inode_is_locked(file->f_dentry->d_inode)); ++ fsdata = file->private_data; ++ if (fsdata != NULL) { ++ list_del_init(&fsdata->dir.linkage); ++ if (fsdata->cursor == NULL) ++ free_fsdata(fsdata); ++ } ++ file->private_data = NULL; ++} ++ ++/** ++ * reiser4_free_file_fsdata - detach from struct file and free reiser4_file_fsdata ++ * @file: ++ * ++ * Spinlocks inode and calls free_file_fsdata_nolock to do the work. ++ */ ++void reiser4_free_file_fsdata(struct file *file) ++{ ++ spin_lock_inode(file->f_dentry->d_inode); ++ free_file_fsdata_nolock(file); ++ spin_unlock_inode(file->f_dentry->d_inode); ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/fsdata.h b/fs/reiser4/fsdata.h +new file mode 100644 +index 0000000..49e8ebf +--- /dev/null ++++ b/fs/reiser4/fsdata.h +@@ -0,0 +1,207 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#if !defined( __REISER4_FSDATA_H__ ) ++#define __REISER4_FSDATA_H__ ++ ++#include "debug.h" ++#include "kassign.h" ++#include "seal.h" ++#include "type_safe_hash.h" ++#include "plugin/file/file.h" ++#include "readahead.h" ++ ++/* ++ * comment about reiser4_dentry_fsdata ++ * ++ * ++ */ ++ ++/* ++ * locking: fields of per file descriptor readdir_pos and ->f_pos are ++ * protected by ->i_mutex on inode. Under this lock following invariant ++ * holds: ++ * ++ * file descriptor is "looking" at the entry_no-th directory entry from ++ * the beginning of directory. This entry has key dir_entry_key and is ++ * pos-th entry with duplicate-key sequence. ++ * ++ */ ++ ++/* logical position within directory */ ++typedef struct { ++ /* key of directory entry (actually, part of a key sufficient to ++ identify directory entry) */ ++ de_id dir_entry_key; ++ /* ordinal number of directory entry among all entries with the same ++ key. (Starting from 0.) */ ++ unsigned pos; ++} dir_pos; ++ ++typedef struct { ++ /* f_pos corresponding to this readdir position */ ++ __u64 fpos; ++ /* logical position within directory */ ++ dir_pos position; ++ /* logical number of directory entry within ++ directory */ ++ __u64 entry_no; ++} readdir_pos; ++ ++/* ++ * this is used to speed up lookups for directory entry: on initial call to ++ * ->lookup() seal and coord of directory entry (if found, that is) are stored ++ * in struct dentry and reused later to avoid tree traversals. ++ */ ++typedef struct de_location { ++ /* seal covering directory entry */ ++ seal_t entry_seal; ++ /* coord of directory entry */ ++ coord_t entry_coord; ++ /* ordinal number of directory entry among all entries with the same ++ key. (Starting from 0.) */ ++ int pos; ++} de_location; ++ ++/** ++ * reiser4_dentry_fsdata - reiser4-specific data attached to dentries ++ * ++ * This is allocated dynamically and released in d_op->d_release() ++ * ++ * Currently it only contains cached location (hint) of directory entry, but ++ * it is expected that other information will be accumulated here. ++ */ ++typedef struct reiser4_dentry_fsdata { ++ /* ++ * here will go fields filled by ->lookup() to speedup next ++ * create/unlink, like blocknr of znode with stat-data, or key of ++ * stat-data. ++ */ ++ de_location dec; ++ int stateless; /* created through reiser4_decode_fh, needs special ++ * treatment in readdir. */ ++} reiser4_dentry_fsdata; ++ ++extern int reiser4_init_dentry_fsdata(void); ++extern void reiser4_done_dentry_fsdata(void); ++extern reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *); ++extern void reiser4_free_dentry_fsdata(struct dentry *dentry); ++ ++/** ++ * reiser4_file_fsdata - reiser4-specific data attached to file->private_data ++ * ++ * This is allocated dynamically and released in inode->i_fop->release ++ */ ++typedef struct reiser4_file_fsdata { ++ /* ++ * pointer back to the struct file which this reiser4_file_fsdata is ++ * part of ++ */ ++ struct file *back; ++ /* detached cursor for stateless readdir. */ ++ struct dir_cursor *cursor; ++ /* ++ * We need both directory and regular file parts here, because there ++ * are file system objects that are files and directories. ++ */ ++ struct { ++ /* ++ * position in directory. It is updated each time directory is ++ * modified ++ */ ++ readdir_pos readdir; ++ /* head of this list is reiser4_inode->lists.readdir_list */ ++ struct list_head linkage; ++ } dir; ++ /* hints to speed up operations with regular files: read and write. */ ++ struct { ++ hint_t hint; ++ } reg; ++ struct reiser4_file_ra_state ra1; ++ ++} reiser4_file_fsdata; ++ ++extern int reiser4_init_file_fsdata(void); ++extern void reiser4_done_file_fsdata(void); ++extern reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *); ++extern void reiser4_free_file_fsdata(struct file *); ++ ++/* ++ * d_cursor is reiser4_file_fsdata not attached to struct file. d_cursors are ++ * used to address problem reiser4 has with readdir accesses via NFS. See ++ * plugin/file_ops_readdir.c for more details. ++ */ ++typedef struct { ++ __u16 cid; ++ __u64 oid; ++} d_cursor_key; ++ ++/* ++ * define structures d_cursor_hash_table d_cursor_hash_link which are used to ++ * maintain hash table of dir_cursor-s in reiser4's super block ++ */ ++typedef struct dir_cursor dir_cursor; ++TYPE_SAFE_HASH_DECLARE(d_cursor, dir_cursor); ++ ++typedef struct d_cursor_info d_cursor_info; ++ ++struct dir_cursor { ++ int ref; ++ reiser4_file_fsdata *fsdata; ++ ++ /* link to reiser4 super block hash table of cursors */ ++ d_cursor_hash_link hash; ++ ++ /* ++ * this is to link cursors to reiser4 super block's radix tree of ++ * cursors if there are more than one cursor of the same objectid ++ */ ++ struct list_head list; ++ d_cursor_key key; ++ d_cursor_info *info; ++ /* list of unused cursors */ ++ struct list_head alist; ++}; ++ ++extern int reiser4_init_d_cursor(void); ++extern void reiser4_done_d_cursor(void); ++ ++extern int reiser4_init_super_d_info(struct super_block *); ++extern void reiser4_done_super_d_info(struct super_block *); ++ ++extern loff_t reiser4_get_dir_fpos(struct file *); ++extern int reiser4_attach_fsdata(struct file *, struct inode *); ++extern void reiser4_detach_fsdata(struct file *); ++ ++/* these are needed for "stateless" readdir. See plugin/file_ops_readdir.c for ++ more details */ ++void reiser4_dispose_cursors(struct inode *inode); ++void reiser4_load_cursors(struct inode *inode); ++void reiser4_kill_cursors(struct inode *inode); ++void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de, ++ int offset, int adj); ++ ++/* ++ * this structure is embedded to reise4_super_info_data. It maintains d_cursors ++ * (detached readdir state). See plugin/file_ops_readdir.c for more details. ++ */ ++struct d_cursor_info { ++ d_cursor_hash_table table; ++ struct radix_tree_root tree; ++}; ++ ++/* spinlock protecting readdir cursors */ ++extern spinlock_t d_lock; ++ ++/* __REISER4_FSDATA_H__ */ ++#endif ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 120 ++ * End: ++ */ +diff --git a/fs/reiser4/init_super.c b/fs/reiser4/init_super.c +new file mode 100644 +index 0000000..3513d5f +--- /dev/null ++++ b/fs/reiser4/init_super.c +@@ -0,0 +1,750 @@ ++/* Copyright by Hans Reiser, 2003 */ ++ ++#include "super.h" ++#include "inode.h" ++#include "plugin/plugin_set.h" ++ ++#include ++ ++/** ++ * init_fs_info - allocate reiser4 specific super block ++ * @super: super block of filesystem ++ * ++ * Allocates and initialize reiser4_super_info_data, attaches it to ++ * super->s_fs_info, initializes structures maintaining d_cursor-s. ++ */ ++int reiser4_init_fs_info(struct super_block *super) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = kmalloc(sizeof(reiser4_super_info_data), ++ reiser4_ctx_gfp_mask_get()); ++ if (!sbinfo) ++ return RETERR(-ENOMEM); ++ ++ super->s_fs_info = sbinfo; ++ super->s_op = NULL; ++ memset(sbinfo, 0, sizeof(*sbinfo)); ++ ++ ON_DEBUG(INIT_LIST_HEAD(&sbinfo->all_jnodes)); ++ ON_DEBUG(spin_lock_init(&sbinfo->all_guard)); ++ ++ mutex_init(&sbinfo->delete_mutex); ++ spin_lock_init(&(sbinfo->guard)); ++ ++ /* initialize per-super-block d_cursor resources */ ++ reiser4_init_super_d_info(super); ++ ++ return 0; ++} ++ ++/** ++ * reiser4_done_fs_info - free reiser4 specific super block ++ * @super: super block of filesystem ++ * ++ * Performs some sanity checks, releases structures maintaining d_cursor-s, ++ * frees reiser4_super_info_data. ++ */ ++void reiser4_done_fs_info(struct super_block *super) ++{ ++ assert("zam-990", super->s_fs_info != NULL); ++ ++ /* release per-super-block d_cursor resources */ ++ reiser4_done_super_d_info(super); ++ ++ /* make sure that there are not jnodes already */ ++ assert("", list_empty(&get_super_private(super)->all_jnodes)); ++ assert("", get_current_context()->trans->atom == NULL); ++ reiser4_check_block_counters(super); ++ kfree(super->s_fs_info); ++ super->s_fs_info = NULL; ++} ++ ++/* type of option parseable by parse_option() */ ++typedef enum { ++ /* value of option is arbitrary string */ ++ OPT_STRING, ++ ++ /* ++ * option specifies bit in a bitmask. When option is set - bit in ++ * sbinfo->fs_flags is set. Examples are bsdgroups, 32bittimes, mtflush, ++ * dont_load_bitmap, atomic_write. ++ */ ++ OPT_BIT, ++ ++ /* ++ * value of option should conform to sprintf() format. Examples are ++ * tmgr.atom_max_size=N, tmgr.atom_max_age=N ++ */ ++ OPT_FORMAT, ++ ++ /* ++ * option can take one of predefined values. Example is onerror=panic or ++ * onerror=remount-ro ++ */ ++ OPT_ONEOF, ++} opt_type_t; ++ ++typedef struct opt_bitmask_bit { ++ const char *bit_name; ++ int bit_nr; ++} opt_bitmask_bit; ++ ++/* description of option parseable by parse_option() */ ++typedef struct opt_desc { ++ /* option name. ++ ++ parsed portion of string has a form "name=value". ++ */ ++ const char *name; ++ /* type of option */ ++ opt_type_t type; ++ union { ++ /* where to store value of string option (type == OPT_STRING) */ ++ char **string; ++ /* description of bits for bit option (type == OPT_BIT) */ ++ struct { ++ int nr; ++ void *addr; ++ } bit; ++ /* description of format and targets for format option (type ++ == OPT_FORMAT) */ ++ struct { ++ const char *format; ++ int nr_args; ++ void *arg1; ++ void *arg2; ++ void *arg3; ++ void *arg4; ++ } f; ++ struct { ++ int *result; ++ const char *list[10]; ++ } oneof; ++ struct { ++ void *addr; ++ int nr_bits; ++ opt_bitmask_bit *bits; ++ } bitmask; ++ } u; ++} opt_desc_t; ++ ++/** ++ * parse_option - parse one option ++ * @opt_strin: starting point of parsing ++ * @opt: option description ++ * ++ * foo=bar, ++ * ^ ^ ^ ++ * | | +-- replaced to '\0' ++ * | +-- val_start ++ * +-- opt_string ++ * Figures out option type and handles option correspondingly. ++ */ ++static int parse_option(char *opt_string, opt_desc_t *opt) ++{ ++ char *val_start; ++ int result; ++ const char *err_msg; ++ ++ /* NOTE-NIKITA think about using lib/cmdline.c functions here. */ ++ ++ val_start = strchr(opt_string, '='); ++ if (val_start != NULL) { ++ *val_start = '\0'; ++ ++val_start; ++ } ++ ++ err_msg = NULL; ++ result = 0; ++ switch (opt->type) { ++ case OPT_STRING: ++ if (val_start == NULL) { ++ err_msg = "String arg missing"; ++ result = RETERR(-EINVAL); ++ } else ++ *opt->u.string = val_start; ++ break; ++ case OPT_BIT: ++ if (val_start != NULL) ++ err_msg = "Value ignored"; ++ else ++ set_bit(opt->u.bit.nr, opt->u.bit.addr); ++ break; ++ case OPT_FORMAT: ++ if (val_start == NULL) { ++ err_msg = "Formatted arg missing"; ++ result = RETERR(-EINVAL); ++ break; ++ } ++ if (sscanf(val_start, opt->u.f.format, ++ opt->u.f.arg1, opt->u.f.arg2, opt->u.f.arg3, ++ opt->u.f.arg4) != opt->u.f.nr_args) { ++ err_msg = "Wrong conversion"; ++ result = RETERR(-EINVAL); ++ } ++ break; ++ case OPT_ONEOF: ++ { ++ int i = 0; ++ ++ if (val_start == NULL) { ++ err_msg = "Value is missing"; ++ result = RETERR(-EINVAL); ++ break; ++ } ++ err_msg = "Wrong option value"; ++ result = RETERR(-EINVAL); ++ while (opt->u.oneof.list[i]) { ++ if (!strcmp(opt->u.oneof.list[i], val_start)) { ++ result = 0; ++ err_msg = NULL; ++ *opt->u.oneof.result = i; ++ break; ++ } ++ i++; ++ } ++ break; ++ } ++ default: ++ wrong_return_value("nikita-2100", "opt -> type"); ++ break; ++ } ++ if (err_msg != NULL) { ++ warning("nikita-2496", "%s when parsing option \"%s%s%s\"", ++ err_msg, opt->name, val_start ? "=" : "", ++ val_start ? : ""); ++ } ++ return result; ++} ++ ++/** ++ * parse_options - parse reiser4 mount options ++ * @opt_string: starting point ++ * @opts: array of option description ++ * @nr_opts: number of elements in @opts ++ * ++ * Parses comma separated list of reiser4 mount options. ++ */ ++static int parse_options(char *opt_string, opt_desc_t *opts, int nr_opts) ++{ ++ int result; ++ ++ result = 0; ++ while ((result == 0) && opt_string && *opt_string) { ++ int j; ++ char *next; ++ ++ next = strchr(opt_string, ','); ++ if (next != NULL) { ++ *next = '\0'; ++ ++next; ++ } ++ for (j = 0; j < nr_opts; ++j) { ++ if (!strncmp(opt_string, opts[j].name, ++ strlen(opts[j].name))) { ++ result = parse_option(opt_string, &opts[j]); ++ break; ++ } ++ } ++ if (j == nr_opts) { ++ warning("nikita-2307", "Unrecognized option: \"%s\"", ++ opt_string); ++ /* traditionally, -EINVAL is returned on wrong mount ++ option */ ++ result = RETERR(-EINVAL); ++ } ++ opt_string = next; ++ } ++ return result; ++} ++ ++#define NUM_OPT( label, fmt, addr ) \ ++ { \ ++ .name = ( label ), \ ++ .type = OPT_FORMAT, \ ++ .u = { \ ++ .f = { \ ++ .format = ( fmt ), \ ++ .nr_args = 1, \ ++ .arg1 = ( addr ), \ ++ .arg2 = NULL, \ ++ .arg3 = NULL, \ ++ .arg4 = NULL \ ++ } \ ++ } \ ++ } ++ ++#define SB_FIELD_OPT( field, fmt ) NUM_OPT( #field, fmt, &sbinfo -> field ) ++ ++#define BIT_OPT(label, bitnr) \ ++ { \ ++ .name = label, \ ++ .type = OPT_BIT, \ ++ .u = { \ ++ .bit = { \ ++ .nr = bitnr, \ ++ .addr = &sbinfo->fs_flags \ ++ } \ ++ } \ ++ } ++ ++#define MAX_NR_OPTIONS (30) ++ ++/** ++ * reiser4_init_super_data - initialize reiser4 private super block ++ * @super: super block to initialize ++ * @opt_string: list of reiser4 mount options ++ * ++ * Sets various reiser4 parameters to default values. Parses mount options and ++ * overwrites default settings. ++ */ ++int reiser4_init_super_data(struct super_block *super, char *opt_string) ++{ ++ int result; ++ opt_desc_t *opts, *p; ++ reiser4_super_info_data *sbinfo = get_super_private(super); ++ ++ /* initialize super, export, dentry operations */ ++ sbinfo->ops.super = reiser4_super_operations; ++ sbinfo->ops.export = reiser4_export_operations; ++ sbinfo->ops.dentry = reiser4_dentry_operations; ++ super->s_op = &sbinfo->ops.super; ++ super->s_export_op = &sbinfo->ops.export; ++ ++ /* initialize transaction manager parameters to default values */ ++ sbinfo->tmgr.atom_max_size = totalram_pages / 4; ++ sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE / HZ; ++ sbinfo->tmgr.atom_min_size = 256; ++ sbinfo->tmgr.atom_max_flushers = ATOM_MAX_FLUSHERS; ++ ++ /* initialize cbk cache parameter */ ++ sbinfo->tree.cbk_cache.nr_slots = CBK_CACHE_SLOTS; ++ ++ /* initialize flush parameters */ ++ sbinfo->flush.relocate_threshold = FLUSH_RELOCATE_THRESHOLD; ++ sbinfo->flush.relocate_distance = FLUSH_RELOCATE_DISTANCE; ++ sbinfo->flush.written_threshold = FLUSH_WRITTEN_THRESHOLD; ++ sbinfo->flush.scan_maxnodes = FLUSH_SCAN_MAXNODES; ++ ++ sbinfo->optimal_io_size = REISER4_OPTIMAL_IO_SIZE; ++ ++ /* preliminary tree initializations */ ++ sbinfo->tree.super = super; ++ sbinfo->tree.carry.new_node_flags = REISER4_NEW_NODE_FLAGS; ++ sbinfo->tree.carry.new_extent_flags = REISER4_NEW_EXTENT_FLAGS; ++ sbinfo->tree.carry.paste_flags = REISER4_PASTE_FLAGS; ++ sbinfo->tree.carry.insert_flags = REISER4_INSERT_FLAGS; ++ rwlock_init(&(sbinfo->tree.tree_lock)); ++ spin_lock_init(&(sbinfo->tree.epoch_lock)); ++ ++ /* initialize default readahead params */ ++ sbinfo->ra_params.max = num_physpages / 4; ++ sbinfo->ra_params.flags = 0; ++ ++ /* allocate memory for structure describing reiser4 mount options */ ++ opts = kmalloc(sizeof(opt_desc_t) * MAX_NR_OPTIONS, ++ reiser4_ctx_gfp_mask_get()); ++ if (opts == NULL) ++ return RETERR(-ENOMEM); ++ ++ /* initialize structure describing reiser4 mount options */ ++ p = opts; ++ ++#if REISER4_DEBUG ++# define OPT_ARRAY_CHECK if ((p) > (opts) + MAX_NR_OPTIONS) { \ ++ warning ("zam-1046", "opt array is overloaded"); break; \ ++ } ++#else ++# define OPT_ARRAY_CHECK noop ++#endif ++ ++#define PUSH_OPT(...) \ ++do { \ ++ opt_desc_t o = __VA_ARGS__; \ ++ OPT_ARRAY_CHECK; \ ++ *p ++ = o; \ ++} while (0) ++ ++#define PUSH_SB_FIELD_OPT(field, format) PUSH_OPT(SB_FIELD_OPT(field, format)) ++#define PUSH_BIT_OPT(name, bit) PUSH_OPT(BIT_OPT(name, bit)) ++ ++ /* ++ * tmgr.atom_max_size=N ++ * Atoms containing more than N blocks will be forced to commit. N is ++ * decimal. ++ */ ++ PUSH_SB_FIELD_OPT(tmgr.atom_max_size, "%u"); ++ /* ++ * tmgr.atom_max_age=N ++ * Atoms older than N seconds will be forced to commit. N is decimal. ++ */ ++ PUSH_SB_FIELD_OPT(tmgr.atom_max_age, "%u"); ++ /* ++ * tmgr.atom_min_size=N ++ * In committing an atom to free dirty pages, force the atom less than ++ * N in size to fuse with another one. ++ */ ++ PUSH_SB_FIELD_OPT(tmgr.atom_min_size, "%u"); ++ /* ++ * tmgr.atom_max_flushers=N ++ * limit of concurrent flushers for one atom. 0 means no limit. ++ */ ++ PUSH_SB_FIELD_OPT(tmgr.atom_max_flushers, "%u"); ++ /* ++ * tree.cbk_cache_slots=N ++ * Number of slots in the cbk cache. ++ */ ++ PUSH_SB_FIELD_OPT(tree.cbk_cache.nr_slots, "%u"); ++ /* ++ * If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty ++ * leaf-level blocks it will force them to be relocated. ++ */ ++ PUSH_SB_FIELD_OPT(flush.relocate_threshold, "%u"); ++ /* ++ * If flush finds can find a block allocation closer than at most ++ * FLUSH_RELOCATE_DISTANCE from the preceder it will relocate to that ++ * position. ++ */ ++ PUSH_SB_FIELD_OPT(flush.relocate_distance, "%u"); ++ /* ++ * If we have written this much or more blocks before encountering busy ++ * jnode in flush list - abort flushing hoping that next time we get ++ * called this jnode will be clean already, and we will save some ++ * seeks. ++ */ ++ PUSH_SB_FIELD_OPT(flush.written_threshold, "%u"); ++ /* The maximum number of nodes to scan left on a level during flush. */ ++ PUSH_SB_FIELD_OPT(flush.scan_maxnodes, "%u"); ++ /* preferred IO size */ ++ PUSH_SB_FIELD_OPT(optimal_io_size, "%u"); ++ /* carry flags used for insertion of new nodes */ ++ PUSH_SB_FIELD_OPT(tree.carry.new_node_flags, "%u"); ++ /* carry flags used for insertion of new extents */ ++ PUSH_SB_FIELD_OPT(tree.carry.new_extent_flags, "%u"); ++ /* carry flags used for paste operations */ ++ PUSH_SB_FIELD_OPT(tree.carry.paste_flags, "%u"); ++ /* carry flags used for insert operations */ ++ PUSH_SB_FIELD_OPT(tree.carry.insert_flags, "%u"); ++ ++#ifdef CONFIG_REISER4_BADBLOCKS ++ /* ++ * Alternative master superblock location in case if it's original ++ * location is not writeable/accessable. This is offset in BYTES. ++ */ ++ PUSH_SB_FIELD_OPT(altsuper, "%lu"); ++#endif ++ ++ /* turn on BSD-style gid assignment */ ++ PUSH_BIT_OPT("bsdgroups", REISER4_BSD_GID); ++ /* turn on 32 bit times */ ++ PUSH_BIT_OPT("32bittimes", REISER4_32_BIT_TIMES); ++ /* ++ * Don't load all bitmap blocks at mount time, it is useful for ++ * machines with tiny RAM and large disks. ++ */ ++ PUSH_BIT_OPT("dont_load_bitmap", REISER4_DONT_LOAD_BITMAP); ++ /* disable transaction commits during write() */ ++ PUSH_BIT_OPT("atomic_write", REISER4_ATOMIC_WRITE); ++ /* disable use of write barriers in the reiser4 log writer. */ ++ PUSH_BIT_OPT("no_write_barrier", REISER4_NO_WRITE_BARRIER); ++ ++ PUSH_OPT( ++ { ++ /* ++ * tree traversal readahead parameters: ++ * -o readahead:MAXNUM:FLAGS ++ * MAXNUM - max number fo nodes to request readahead for: -1UL ++ * will set it to max_sane_readahead() ++ * FLAGS - combination of bits: RA_ADJCENT_ONLY, RA_ALL_LEVELS, ++ * CONTINUE_ON_PRESENT ++ */ ++ .name = "readahead", ++ .type = OPT_FORMAT, ++ .u = { ++ .f = { ++ .format = "%u:%u", ++ .nr_args = 2, ++ .arg1 = &sbinfo->ra_params.max, ++ .arg2 = &sbinfo->ra_params.flags, ++ .arg3 = NULL, ++ .arg4 = NULL ++ } ++ } ++ } ++ ); ++ ++ /* What to do in case of fs error */ ++ PUSH_OPT( ++ { ++ .name = "onerror", ++ .type = OPT_ONEOF, ++ .u = { ++ .oneof = { ++ .result = &sbinfo->onerror, ++ .list = { ++ "panic", "remount-ro", NULL ++ }, ++ } ++ } ++ } ++ ); ++ ++ /* modify default settings to values set by mount options */ ++ result = parse_options(opt_string, opts, p - opts); ++ kfree(opts); ++ if (result != 0) ++ return result; ++ ++ /* correct settings to sanity values */ ++ sbinfo->tmgr.atom_max_age *= HZ; ++ if (sbinfo->tmgr.atom_max_age <= 0) ++ /* overflow */ ++ sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE; ++ ++ /* round optimal io size up to 512 bytes */ ++ sbinfo->optimal_io_size >>= VFS_BLKSIZE_BITS; ++ sbinfo->optimal_io_size <<= VFS_BLKSIZE_BITS; ++ if (sbinfo->optimal_io_size == 0) { ++ warning("nikita-2497", "optimal_io_size is too small"); ++ return RETERR(-EINVAL); ++ } ++ return result; ++} ++ ++/** ++ * reiser4_init_read_super - read reiser4 master super block ++ * @super: super block to fill ++ * @silent: if 0 - print warnings ++ * ++ * Reads reiser4 master super block either from predefined location or from ++ * location specified by altsuper mount option, initializes disk format plugin. ++ */ ++int reiser4_init_read_super(struct super_block *super, int silent) ++{ ++ struct buffer_head *super_bh; ++ struct reiser4_master_sb *master_sb; ++ reiser4_super_info_data *sbinfo = get_super_private(super); ++ unsigned long blocksize; ++ ++ read_super_block: ++#ifdef CONFIG_REISER4_BADBLOCKS ++ if (sbinfo->altsuper) ++ /* ++ * read reiser4 master super block at position specified by ++ * mount option ++ */ ++ super_bh = sb_bread(super, ++ (sector_t)(sbinfo->altsuper / super->s_blocksize)); ++ else ++#endif ++ /* read reiser4 master super block at 16-th 4096 block */ ++ super_bh = sb_bread(super, ++ (sector_t)(REISER4_MAGIC_OFFSET / super->s_blocksize)); ++ if (!super_bh) ++ return RETERR(-EIO); ++ ++ master_sb = (struct reiser4_master_sb *)super_bh->b_data; ++ /* check reiser4 magic string */ ++ if (!strncmp(master_sb->magic, REISER4_SUPER_MAGIC_STRING, ++ sizeof(REISER4_SUPER_MAGIC_STRING))) { ++ /* reiser4 master super block contains filesystem blocksize */ ++ blocksize = le16_to_cpu(get_unaligned(&master_sb->blocksize)); ++ ++ if (blocksize != PAGE_CACHE_SIZE) { ++ /* ++ * currenly reiser4's blocksize must be equal to ++ * pagesize ++ */ ++ if (!silent) ++ warning("nikita-2609", ++ "%s: wrong block size %ld\n", super->s_id, ++ blocksize); ++ brelse(super_bh); ++ return RETERR(-EINVAL); ++ } ++ if (blocksize != super->s_blocksize) { ++ /* ++ * filesystem uses different blocksize. Reread master ++ * super block with correct blocksize ++ */ ++ brelse(super_bh); ++ if (!sb_set_blocksize(super, (int)blocksize)) ++ return RETERR(-EINVAL); ++ goto read_super_block; ++ } ++ ++ sbinfo->df_plug = ++ disk_format_plugin_by_id( ++ le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id))); ++ if (sbinfo->df_plug == NULL) { ++ if (!silent) ++ warning("nikita-26091", ++ "%s: unknown disk format plugin %d\n", ++ super->s_id, ++ le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id))); ++ brelse(super_bh); ++ return RETERR(-EINVAL); ++ } ++ sbinfo->diskmap_block = le64_to_cpu(get_unaligned(&master_sb->diskmap)); ++ brelse(super_bh); ++ return 0; ++ } ++ ++ /* there is no reiser4 on the device */ ++ if (!silent) ++ warning("nikita-2608", ++ "%s: wrong master super block magic", super->s_id); ++ brelse(super_bh); ++ return RETERR(-EINVAL); ++} ++ ++static struct { ++ reiser4_plugin_type type; ++ reiser4_plugin_id id; ++} default_plugins[PSET_LAST] = { ++ [PSET_FILE] = { ++ .type = REISER4_FILE_PLUGIN_TYPE, ++ .id = UNIX_FILE_PLUGIN_ID ++ }, ++ [PSET_DIR] = { ++ .type = REISER4_DIR_PLUGIN_TYPE, ++ .id = HASHED_DIR_PLUGIN_ID ++ }, ++ [PSET_HASH] = { ++ .type = REISER4_HASH_PLUGIN_TYPE, ++ .id = R5_HASH_ID ++ }, ++ [PSET_FIBRATION] = { ++ .type = REISER4_FIBRATION_PLUGIN_TYPE, ++ .id = FIBRATION_DOT_O ++ }, ++ [PSET_PERM] = { ++ .type = REISER4_PERM_PLUGIN_TYPE, ++ .id = NULL_PERM_ID ++ }, ++ [PSET_FORMATTING] = { ++ .type = REISER4_FORMATTING_PLUGIN_TYPE, ++ .id = SMALL_FILE_FORMATTING_ID ++ }, ++ [PSET_SD] = { ++ .type = REISER4_ITEM_PLUGIN_TYPE, ++ .id = STATIC_STAT_DATA_ID ++ }, ++ [PSET_DIR_ITEM] = { ++ .type = REISER4_ITEM_PLUGIN_TYPE, ++ .id = COMPOUND_DIR_ID ++ }, ++ [PSET_CIPHER] = { ++ .type = REISER4_CIPHER_PLUGIN_TYPE, ++ .id = NONE_CIPHER_ID ++ }, ++ [PSET_DIGEST] = { ++ .type = REISER4_DIGEST_PLUGIN_TYPE, ++ .id = SHA256_32_DIGEST_ID ++ }, ++ [PSET_COMPRESSION] = { ++ .type = REISER4_COMPRESSION_PLUGIN_TYPE, ++ .id = LZO1_COMPRESSION_ID ++ }, ++ [PSET_COMPRESSION_MODE] = { ++ .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ .id = CONVX_COMPRESSION_MODE_ID ++ }, ++ [PSET_CLUSTER] = { ++ .type = REISER4_CLUSTER_PLUGIN_TYPE, ++ .id = CLUSTER_64K_ID ++ }, ++ [PSET_CREATE] = { ++ .type = REISER4_FILE_PLUGIN_TYPE, ++ .id = UNIX_FILE_PLUGIN_ID ++ } ++}; ++ ++/* access to default plugin table */ ++reiser4_plugin *get_default_plugin(pset_member memb) ++{ ++ return plugin_by_id(default_plugins[memb].type, ++ default_plugins[memb].id); ++} ++ ++/** ++ * reiser4_init_root_inode - obtain inode of root directory ++ * @super: super block of filesystem ++ * ++ * Obtains inode of root directory (reading it from disk), initializes plugin ++ * set it was not initialized. ++ */ ++int reiser4_init_root_inode(struct super_block *super) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(super); ++ struct inode *inode; ++ int result = 0; ++ ++ inode = reiser4_iget(super, sbinfo->df_plug->root_dir_key(super), 0); ++ if (IS_ERR(inode)) ++ return RETERR(PTR_ERR(inode)); ++ ++ super->s_root = d_alloc_root(inode); ++ if (!super->s_root) { ++ iput(inode); ++ return RETERR(-ENOMEM); ++ } ++ ++ super->s_root->d_op = &sbinfo->ops.dentry; ++ ++ if (!is_inode_loaded(inode)) { ++ pset_member memb; ++ plugin_set *pset; ++ ++ pset = reiser4_inode_data(inode)->pset; ++ for (memb = 0; memb < PSET_LAST; ++memb) { ++ ++ if (aset_get(pset, memb) != NULL) ++ continue; ++ ++ result = grab_plugin_pset(inode, NULL, memb); ++ if (result != 0) ++ break; ++ ++ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN); ++ } ++ ++ if (result == 0) { ++ if (REISER4_DEBUG) { ++ for (memb = 0; memb < PSET_LAST; ++memb) ++ assert("nikita-3500", ++ aset_get(pset, memb) != NULL); ++ } ++ } else ++ warning("nikita-3448", "Cannot set plugins of root: %i", ++ result); ++ reiser4_iget_complete(inode); ++ ++ /* As the default pset kept in the root dir may has been changed ++ (length is unknown), call update_sd. */ ++ if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) { ++ result = reiser4_grab_space( ++ inode_file_plugin(inode)->estimate.update(inode), ++ BA_CAN_COMMIT); ++ ++ if (result == 0) ++ result = reiser4_update_sd(inode); ++ ++ all_grabbed2free(); ++ } ++ } ++ ++ super->s_maxbytes = MAX_LFS_FILESIZE; ++ return result; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/inode.c b/fs/reiser4/inode.c +new file mode 100644 +index 0000000..2429ac1 +--- /dev/null ++++ b/fs/reiser4/inode.c +@@ -0,0 +1,709 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Inode specific operations. */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "key.h" ++#include "kassign.h" ++#include "coord.h" ++#include "seal.h" ++#include "dscale.h" ++#include "plugin/item/item.h" ++#include "plugin/security/perm.h" ++#include "plugin/plugin.h" ++#include "plugin/object.h" ++#include "znode.h" ++#include "vfs_ops.h" ++#include "inode.h" ++#include "super.h" ++#include "reiser4.h" ++ ++#include /* for struct super_block, address_space */ ++ ++/* return reiser4 internal tree which inode belongs to */ ++/* Audited by: green(2002.06.17) */ ++reiser4_tree *reiser4_tree_by_inode(const struct inode *inode /* inode queried */ ) ++{ ++ assert("nikita-256", inode != NULL); ++ assert("nikita-257", inode->i_sb != NULL); ++ return reiser4_get_tree(inode->i_sb); ++} ++ ++/* return reiser4-specific inode flags */ ++static inline unsigned long *inode_flags(const struct inode *const inode) ++{ ++ assert("nikita-2842", inode != NULL); ++ return &reiser4_inode_data(inode)->flags; ++} ++ ++/* set reiser4-specific flag @f in @inode */ ++void reiser4_inode_set_flag(struct inode *inode, reiser4_file_plugin_flags f) ++{ ++ assert("nikita-2248", inode != NULL); ++ set_bit((int)f, inode_flags(inode)); ++} ++ ++/* clear reiser4-specific flag @f in @inode */ ++void reiser4_inode_clr_flag(struct inode *inode, reiser4_file_plugin_flags f) ++{ ++ assert("nikita-2250", inode != NULL); ++ clear_bit((int)f, inode_flags(inode)); ++} ++ ++/* true if reiser4-specific flag @f is set in @inode */ ++int reiser4_inode_get_flag(const struct inode *inode, ++ reiser4_file_plugin_flags f) ++{ ++ assert("nikita-2251", inode != NULL); ++ return test_bit((int)f, inode_flags(inode)); ++} ++ ++/* convert oid to inode number */ ++ino_t oid_to_ino(oid_t oid) ++{ ++ return (ino_t) oid; ++} ++ ++/* convert oid to user visible inode number */ ++ino_t oid_to_uino(oid_t oid) ++{ ++ /* reiser4 object is uniquely identified by oid which is 64 bit ++ quantity. Kernel in-memory inode is indexed (in the hash table) by ++ 32 bit i_ino field, but this is not a problem, because there is a ++ way to further distinguish inodes with identical inode numbers ++ (find_actor supplied to iget()). ++ ++ But user space expects unique 32 bit inode number. Obviously this ++ is impossible. Work-around is to somehow hash oid into user visible ++ inode number. ++ */ ++ oid_t max_ino = (ino_t) ~ 0; ++ ++ if (REISER4_INO_IS_OID || (oid <= max_ino)) ++ return oid; ++ else ++ /* this is remotely similar to algorithm used to find next pid ++ to use for process: after wrap-around start from some ++ offset rather than from 0. Idea is that there are some long ++ living objects with which we don't want to collide. ++ */ ++ return REISER4_UINO_SHIFT + ((oid - max_ino) & (max_ino >> 1)); ++} ++ ++/* check that "inode" is on reiser4 file-system */ ++int is_reiser4_inode(const struct inode *inode /* inode queried */ ) ++{ ++ return inode != NULL && is_reiser4_super(inode->i_sb); ++} ++ ++/* Maximal length of a name that can be stored in directory @inode. ++ ++ This is used in check during file creation and lookup. */ ++int reiser4_max_filename_len(const struct inode *inode /* inode queried */ ) ++{ ++ assert("nikita-287", is_reiser4_inode(inode)); ++ assert("nikita-1710", inode_dir_item_plugin(inode)); ++ if (inode_dir_item_plugin(inode)->s.dir.max_name_len) ++ return inode_dir_item_plugin(inode)->s.dir.max_name_len(inode); ++ else ++ return 255; ++} ++ ++#if REISER4_USE_COLLISION_LIMIT ++/* Maximal number of hash collisions for this directory. */ ++int max_hash_collisions(const struct inode *dir /* inode queried */ ) ++{ ++ assert("nikita-1711", dir != NULL); ++ return reiser4_inode_data(dir)->plugin.max_collisions; ++} ++#endif /* REISER4_USE_COLLISION_LIMIT */ ++ ++/* Install file, inode, and address_space operation on @inode, depending on ++ its mode. */ ++int setup_inode_ops(struct inode *inode /* inode to intialize */ , ++ reiser4_object_create_data * data /* parameters to create ++ * object */ ) ++{ ++ reiser4_super_info_data *sinfo; ++ file_plugin *fplug; ++ dir_plugin *dplug; ++ ++ fplug = inode_file_plugin(inode); ++ dplug = inode_dir_plugin(inode); ++ ++ sinfo = get_super_private(inode->i_sb); ++ ++ switch (inode->i_mode & S_IFMT) { ++ case S_IFSOCK: ++ case S_IFBLK: ++ case S_IFCHR: ++ case S_IFIFO: ++ { ++ dev_t rdev; /* to keep gcc happy */ ++ ++ assert("vs-46", fplug != NULL); ++ /* ugly hack with rdev */ ++ if (data == NULL) { ++ rdev = inode->i_rdev; ++ inode->i_rdev = 0; ++ } else ++ rdev = data->rdev; ++ inode->i_blocks = 0; ++ assert("vs-42", fplug->h.id == SPECIAL_FILE_PLUGIN_ID); ++ inode->i_op = &file_plugins[fplug->h.id].inode_ops; ++ /* initialize inode->i_fop and inode->i_rdev for block and char ++ devices */ ++ init_special_inode(inode, inode->i_mode, rdev); ++ /* all address space operations are null */ ++ inode->i_mapping->a_ops = ++ &file_plugins[fplug->h.id].as_ops; ++ break; ++ } ++ case S_IFLNK: ++ assert("vs-46", fplug != NULL); ++ assert("vs-42", fplug->h.id == SYMLINK_FILE_PLUGIN_ID); ++ inode->i_op = &file_plugins[fplug->h.id].inode_ops; ++ inode->i_fop = NULL; ++ /* all address space operations are null */ ++ inode->i_mapping->a_ops = &file_plugins[fplug->h.id].as_ops; ++ break; ++ case S_IFDIR: ++ assert("vs-46", dplug != NULL); ++ assert("vs-43", (dplug->h.id == HASHED_DIR_PLUGIN_ID || ++ dplug->h.id == SEEKABLE_HASHED_DIR_PLUGIN_ID)); ++ inode->i_op = &dir_plugins[dplug->h.id].inode_ops; ++ inode->i_fop = &dir_plugins[dplug->h.id].file_ops; ++ inode->i_mapping->a_ops = &dir_plugins[dplug->h.id].as_ops; ++ break; ++ case S_IFREG: ++ assert("vs-46", fplug != NULL); ++ assert("vs-43", (fplug->h.id == UNIX_FILE_PLUGIN_ID || ++ fplug->h.id == CRYPTCOMPRESS_FILE_PLUGIN_ID)); ++ inode->i_op = &file_plugins[fplug->h.id].inode_ops; ++ inode->i_fop = &file_plugins[fplug->h.id].file_ops; ++ inode->i_mapping->a_ops = &file_plugins[fplug->h.id].as_ops; ++ break; ++ default: ++ warning("nikita-291", "wrong file mode: %o for %llu", ++ inode->i_mode, ++ (unsigned long long)get_inode_oid(inode)); ++ reiser4_make_bad_inode(inode); ++ return RETERR(-EINVAL); ++ } ++ return 0; ++} ++ ++/* Initialize inode from disk data. Called with inode locked. ++ Return inode locked. */ ++static int init_inode(struct inode *inode /* inode to intialise */ , ++ coord_t * coord /* coord of stat data */ ) ++{ ++ int result; ++ item_plugin *iplug; ++ void *body; ++ int length; ++ reiser4_inode *state; ++ ++ assert("nikita-292", coord != NULL); ++ assert("nikita-293", inode != NULL); ++ ++ coord_clear_iplug(coord); ++ result = zload(coord->node); ++ if (result) ++ return result; ++ iplug = item_plugin_by_coord(coord); ++ body = item_body_by_coord(coord); ++ length = item_length_by_coord(coord); ++ ++ assert("nikita-295", iplug != NULL); ++ assert("nikita-296", body != NULL); ++ assert("nikita-297", length > 0); ++ ++ /* inode is under I_LOCK now */ ++ ++ state = reiser4_inode_data(inode); ++ /* call stat-data plugin method to load sd content into inode */ ++ result = iplug->s.sd.init_inode(inode, body, length); ++ set_plugin(&state->pset, PSET_SD, item_plugin_to_plugin(iplug)); ++ if (result == 0) { ++ result = setup_inode_ops(inode, NULL); ++ if (result == 0 && inode->i_sb->s_root && ++ inode->i_sb->s_root->d_inode) ++ result = finish_pset(inode); ++ } ++ zrelse(coord->node); ++ return result; ++} ++ ++/* read `inode' from the disk. This is what was previously in ++ reiserfs_read_inode2(). ++ ++ Must be called with inode locked. Return inode still locked. ++*/ ++static int read_inode(struct inode *inode /* inode to read from disk */ , ++ const reiser4_key * key /* key of stat data */ , ++ int silent) ++{ ++ int result; ++ lock_handle lh; ++ reiser4_inode *info; ++ coord_t coord; ++ ++ assert("nikita-298", inode != NULL); ++ assert("nikita-1945", !is_inode_loaded(inode)); ++ ++ info = reiser4_inode_data(inode); ++ assert("nikita-300", info->locality_id != 0); ++ ++ coord_init_zero(&coord); ++ init_lh(&lh); ++ /* locate stat-data in a tree and return znode locked */ ++ result = lookup_sd(inode, ZNODE_READ_LOCK, &coord, &lh, key, silent); ++ assert("nikita-301", !is_inode_loaded(inode)); ++ if (result == 0) { ++ /* use stat-data plugin to load sd into inode. */ ++ result = init_inode(inode, &coord); ++ if (result == 0) { ++ /* initialize stat-data seal */ ++ spin_lock_inode(inode); ++ reiser4_seal_init(&info->sd_seal, &coord, key); ++ info->sd_coord = coord; ++ spin_unlock_inode(inode); ++ ++ /* call file plugin's method to initialize plugin ++ * specific part of inode */ ++ if (inode_file_plugin(inode)->init_inode_data) ++ inode_file_plugin(inode)->init_inode_data(inode, ++ NULL, ++ 0); ++ /* load detached directory cursors for stateless ++ * directory readers (NFS). */ ++ reiser4_load_cursors(inode); ++ ++ /* Check the opened inode for consistency. */ ++ result = ++ get_super_private(inode->i_sb)->df_plug-> ++ check_open(inode); ++ } ++ } ++ /* lookup_sd() doesn't release coord because we want znode ++ stay read-locked while stat-data fields are accessed in ++ init_inode() */ ++ done_lh(&lh); ++ ++ if (result != 0) ++ reiser4_make_bad_inode(inode); ++ return result; ++} ++ ++/* initialise new reiser4 inode being inserted into hash table. */ ++static int init_locked_inode(struct inode *inode /* new inode */ , ++ void *opaque /* key of stat data passed to the ++ * iget5_locked as cookie */ ) ++{ ++ reiser4_key *key; ++ ++ assert("nikita-1995", inode != NULL); ++ assert("nikita-1996", opaque != NULL); ++ key = opaque; ++ set_inode_oid(inode, get_key_objectid(key)); ++ reiser4_inode_data(inode)->locality_id = get_key_locality(key); ++ return 0; ++} ++ ++/* reiser4_inode_find_actor() - "find actor" supplied by reiser4 to iget5_locked(). ++ ++ This function is called by iget5_locked() to distinguish reiser4 inodes ++ having the same inode numbers. Such inodes can only exist due to some error ++ condition. One of them should be bad. Inodes with identical inode numbers ++ (objectids) are distinguished by their packing locality. ++ ++*/ ++static int reiser4_inode_find_actor(struct inode *inode /* inode from hash table to ++ * check */ , ++ void *opaque /* "cookie" passed to ++ * iget5_locked(). This is stat data ++ * key */ ) ++{ ++ reiser4_key *key; ++ ++ key = opaque; ++ return ++ /* oid is unique, so first term is enough, actually. */ ++ get_inode_oid(inode) == get_key_objectid(key) && ++ /* ++ * also, locality should be checked, but locality is stored in ++ * the reiser4-specific part of the inode, and actor can be ++ * called against arbitrary inode that happened to be in this ++ * hash chain. Hence we first have to check that this is ++ * reiser4 inode at least. is_reiser4_inode() is probably too ++ * early to call, as inode may have ->i_op not yet ++ * initialised. ++ */ ++ is_reiser4_super(inode->i_sb) && ++ /* ++ * usually objectid is unique, but pseudo files use counter to ++ * generate objectid. All pseudo files are placed into special ++ * (otherwise unused) locality. ++ */ ++ reiser4_inode_data(inode)->locality_id == get_key_locality(key); ++} ++ ++/* hook for kmem_cache_create */ ++void loading_init_once(reiser4_inode * info) ++{ ++ mutex_init(&info->loading); ++} ++ ++/* for reiser4_alloc_inode */ ++void loading_alloc(reiser4_inode * info) ++{ ++ assert("vs-1717", !mutex_is_locked(&info->loading)); ++} ++ ++/* for reiser4_destroy */ ++void loading_destroy(reiser4_inode * info) ++{ ++ assert("vs-1717a", !mutex_is_locked(&info->loading)); ++} ++ ++static void loading_begin(reiser4_inode * info) ++{ ++ mutex_lock(&info->loading); ++} ++ ++static void loading_end(reiser4_inode * info) ++{ ++ mutex_unlock(&info->loading); ++} ++ ++/** ++ * reiser4_iget - obtain inode via iget5_locked, read from disk if necessary ++ * @super: super block of filesystem ++ * @key: key of inode's stat-data ++ * @silent: ++ * ++ * This is our helper function a la iget(). This is be called by ++ * lookup_common() and reiser4_read_super(). Return inode locked or error ++ * encountered. ++ */ ++struct inode *reiser4_iget(struct super_block *super, const reiser4_key *key, ++ int silent) ++{ ++ struct inode *inode; ++ int result; ++ reiser4_inode *info; ++ ++ assert("nikita-302", super != NULL); ++ assert("nikita-303", key != NULL); ++ ++ result = 0; ++ ++ /* call iget(). Our ->read_inode() is dummy, so this will either ++ find inode in cache or return uninitialised inode */ ++ inode = iget5_locked(super, ++ (unsigned long)get_key_objectid(key), ++ reiser4_inode_find_actor, ++ init_locked_inode, (reiser4_key *) key); ++ if (inode == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ if (is_bad_inode(inode)) { ++ warning("nikita-304", "Bad inode found"); ++ reiser4_print_key("key", key); ++ iput(inode); ++ return ERR_PTR(RETERR(-EIO)); ++ } ++ ++ info = reiser4_inode_data(inode); ++ ++ /* Reiser4 inode state bit REISER4_LOADED is used to distinguish fully ++ loaded and initialized inode from just allocated inode. If ++ REISER4_LOADED bit is not set, reiser4_iget() completes loading under ++ info->loading. The place in reiser4 which uses not initialized inode ++ is the reiser4 repacker, see repacker-related functions in ++ plugin/item/extent.c */ ++ if (!is_inode_loaded(inode)) { ++ loading_begin(info); ++ if (!is_inode_loaded(inode)) { ++ /* locking: iget5_locked returns locked inode */ ++ assert("nikita-1941", !is_inode_loaded(inode)); ++ assert("nikita-1949", ++ reiser4_inode_find_actor(inode, ++ (reiser4_key *) key)); ++ /* now, inode has objectid as ->i_ino and locality in ++ reiser4-specific part. This is enough for ++ read_inode() to read stat data from the disk */ ++ result = read_inode(inode, key, silent); ++ } else ++ loading_end(info); ++ } ++ ++ if (inode->i_state & I_NEW) ++ unlock_new_inode(inode); ++ ++ if (is_bad_inode(inode)) { ++ assert("vs-1717", result != 0); ++ loading_end(info); ++ iput(inode); ++ inode = ERR_PTR(result); ++ } else if (REISER4_DEBUG) { ++ reiser4_key found_key; ++ ++ assert("vs-1717", result == 0); ++ build_sd_key(inode, &found_key); ++ if (!keyeq(&found_key, key)) { ++ warning("nikita-305", "Wrong key in sd"); ++ reiser4_print_key("sought for", key); ++ reiser4_print_key("found", &found_key); ++ } ++ if (inode->i_nlink == 0) { ++ warning("nikita-3559", "Unlinked inode found: %llu\n", ++ (unsigned long long)get_inode_oid(inode)); ++ } ++ } ++ return inode; ++} ++ ++/* reiser4_iget() may return not fully initialized inode, this function should ++ * be called after one completes reiser4 inode initializing. */ ++void reiser4_iget_complete(struct inode *inode) ++{ ++ assert("zam-988", is_reiser4_inode(inode)); ++ ++ if (!is_inode_loaded(inode)) { ++ reiser4_inode_set_flag(inode, REISER4_LOADED); ++ loading_end(reiser4_inode_data(inode)); ++ } ++} ++ ++void reiser4_make_bad_inode(struct inode *inode) ++{ ++ assert("nikita-1934", inode != NULL); ++ ++ /* clear LOADED bit */ ++ reiser4_inode_clr_flag(inode, REISER4_LOADED); ++ make_bad_inode(inode); ++ return; ++} ++ ++file_plugin *inode_file_plugin(const struct inode * inode) ++{ ++ assert("nikita-1997", inode != NULL); ++ return reiser4_inode_data(inode)->pset->file; ++} ++ ++dir_plugin *inode_dir_plugin(const struct inode * inode) ++{ ++ assert("nikita-1998", inode != NULL); ++ return reiser4_inode_data(inode)->pset->dir; ++} ++ ++formatting_plugin *inode_formatting_plugin(const struct inode * inode) ++{ ++ assert("nikita-2000", inode != NULL); ++ return reiser4_inode_data(inode)->pset->formatting; ++} ++ ++hash_plugin *inode_hash_plugin(const struct inode * inode) ++{ ++ assert("nikita-2001", inode != NULL); ++ return reiser4_inode_data(inode)->pset->hash; ++} ++ ++fibration_plugin *inode_fibration_plugin(const struct inode * inode) ++{ ++ assert("nikita-2001", inode != NULL); ++ return reiser4_inode_data(inode)->pset->fibration; ++} ++ ++cipher_plugin *inode_cipher_plugin(const struct inode * inode) ++{ ++ assert("edward-36", inode != NULL); ++ return reiser4_inode_data(inode)->pset->cipher; ++} ++ ++compression_plugin *inode_compression_plugin(const struct inode * inode) ++{ ++ assert("edward-37", inode != NULL); ++ return reiser4_inode_data(inode)->pset->compression; ++} ++ ++compression_mode_plugin *inode_compression_mode_plugin(const struct inode * ++ inode) ++{ ++ assert("edward-1330", inode != NULL); ++ return reiser4_inode_data(inode)->pset->compression_mode; ++} ++ ++cluster_plugin *inode_cluster_plugin(const struct inode * inode) ++{ ++ assert("edward-1328", inode != NULL); ++ return reiser4_inode_data(inode)->pset->cluster; ++} ++ ++file_plugin *inode_create_plugin(const struct inode * inode) ++{ ++ assert("edward-1329", inode != NULL); ++ return reiser4_inode_data(inode)->pset->create; ++} ++ ++digest_plugin *inode_digest_plugin(const struct inode * inode) ++{ ++ assert("edward-86", inode != NULL); ++ return reiser4_inode_data(inode)->pset->digest; ++} ++ ++item_plugin *inode_sd_plugin(const struct inode * inode) ++{ ++ assert("vs-534", inode != NULL); ++ return reiser4_inode_data(inode)->pset->sd; ++} ++ ++item_plugin *inode_dir_item_plugin(const struct inode * inode) ++{ ++ assert("vs-534", inode != NULL); ++ return reiser4_inode_data(inode)->pset->dir_item; ++} ++ ++file_plugin *child_create_plugin(const struct inode * inode) ++{ ++ assert("edward-1329", inode != NULL); ++ return reiser4_inode_data(inode)->hset->create; ++} ++ ++void inode_set_extension(struct inode *inode, sd_ext_bits ext) ++{ ++ reiser4_inode *state; ++ ++ assert("nikita-2716", inode != NULL); ++ assert("nikita-2717", ext < LAST_SD_EXTENSION); ++ assert("nikita-3491", spin_inode_is_locked(inode)); ++ ++ state = reiser4_inode_data(inode); ++ state->extmask |= 1 << ext; ++ /* force re-calculation of stat-data length on next call to ++ update_sd(). */ ++ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN); ++} ++ ++void inode_clr_extension(struct inode *inode, sd_ext_bits ext) ++{ ++ reiser4_inode *state; ++ ++ assert("vpf-1926", inode != NULL); ++ assert("vpf-1927", ext < LAST_SD_EXTENSION); ++ assert("vpf-1928", spin_inode_is_locked(inode)); ++ ++ state = reiser4_inode_data(inode); ++ state->extmask &= ~(1 << ext); ++ /* force re-calculation of stat-data length on next call to ++ update_sd(). */ ++ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN); ++} ++ ++void inode_check_scale_nolock(struct inode *inode, __u64 old, __u64 new) ++{ ++ assert("edward-1287", inode != NULL); ++ if (!dscale_fit(old, new)) ++ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN); ++ return; ++} ++ ++void inode_check_scale(struct inode *inode, __u64 old, __u64 new) ++{ ++ assert("nikita-2875", inode != NULL); ++ spin_lock_inode(inode); ++ inode_check_scale_nolock(inode, old, new); ++ spin_unlock_inode(inode); ++} ++ ++/* ++ * initialize ->ordering field of inode. This field defines how file stat-data ++ * and body is ordered within a tree with respect to other objects within the ++ * same parent directory. ++ */ ++void ++init_inode_ordering(struct inode *inode, ++ reiser4_object_create_data * crd, int create) ++{ ++ reiser4_key key; ++ ++ if (create) { ++ struct inode *parent; ++ ++ parent = crd->parent; ++ assert("nikita-3224", inode_dir_plugin(parent) != NULL); ++ inode_dir_plugin(parent)->build_entry_key(parent, ++ &crd->dentry->d_name, ++ &key); ++ } else { ++ coord_t *coord; ++ ++ coord = &reiser4_inode_data(inode)->sd_coord; ++ coord_clear_iplug(coord); ++ /* safe to use ->sd_coord, because node is under long term ++ * lock */ ++ WITH_DATA(coord->node, item_key_by_coord(coord, &key)); ++ } ++ ++ set_inode_ordering(inode, get_key_ordering(&key)); ++} ++ ++znode *inode_get_vroot(struct inode *inode) ++{ ++ reiser4_block_nr blk; ++ znode *result; ++ ++ spin_lock_inode(inode); ++ blk = reiser4_inode_data(inode)->vroot; ++ spin_unlock_inode(inode); ++ if (!disk_addr_eq(&UBER_TREE_ADDR, &blk)) ++ result = zlook(reiser4_tree_by_inode(inode), &blk); ++ else ++ result = NULL; ++ return result; ++} ++ ++void inode_set_vroot(struct inode *inode, znode *vroot) ++{ ++ spin_lock_inode(inode); ++ reiser4_inode_data(inode)->vroot = *znode_get_block(vroot); ++ spin_unlock_inode(inode); ++} ++ ++#if REISER4_DEBUG ++ ++void reiser4_inode_invariant(const struct inode *inode) ++{ ++ assert("nikita-3077", spin_inode_is_locked(inode)); ++} ++ ++int inode_has_no_jnodes(reiser4_inode * r4_inode) ++{ ++ return jnode_tree_by_reiser4_inode(r4_inode)->rnode == NULL && ++ r4_inode->nr_jnodes == 0; ++} ++ ++#endif ++ ++/* true if directory is empty (only contains dot and dotdot) */ ++/* FIXME: shouldn't it be dir plugin method? */ ++int is_dir_empty(const struct inode *dir) ++{ ++ assert("nikita-1976", dir != NULL); ++ ++ /* rely on our method to maintain directory i_size being equal to the ++ number of entries. */ ++ return dir->i_size <= 2 ? 0 : RETERR(-ENOTEMPTY); ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/inode.h b/fs/reiser4/inode.h +new file mode 100644 +index 0000000..2cc1d82 +--- /dev/null ++++ b/fs/reiser4/inode.h +@@ -0,0 +1,438 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Inode functions. */ ++ ++#if !defined( __REISER4_INODE_H__ ) ++#define __REISER4_INODE_H__ ++ ++#include "forward.h" ++#include "debug.h" ++#include "key.h" ++#include "seal.h" ++#include "plugin/plugin.h" ++#include "plugin/file/cryptcompress.h" ++#include "plugin/file/file.h" ++#include "plugin/dir/dir.h" ++#include "plugin/plugin_set.h" ++#include "plugin/security/perm.h" ++#include "vfs_ops.h" ++#include "jnode.h" ++#include "fsdata.h" ++ ++#include /* for __u?? , ino_t */ ++#include /* for struct super_block, struct ++ * rw_semaphore, etc */ ++#include ++#include ++ ++/* reiser4-specific inode flags. They are "transient" and are not ++ supposed to be stored on disk. Used to trace "state" of ++ inode ++*/ ++typedef enum { ++ /* this is light-weight inode, inheriting some state from its ++ parent */ ++ REISER4_LIGHT_WEIGHT = 0, ++ /* stat data wasn't yet created */ ++ REISER4_NO_SD = 1, ++ /* internal immutable flag. Currently is only used ++ to avoid race condition during file creation. ++ See comment in create_object(). */ ++ REISER4_IMMUTABLE = 2, ++ /* inode was read from storage */ ++ REISER4_LOADED = 3, ++ /* this bit is set for symlinks. inode->i_private points to target ++ name of symlink. */ ++ REISER4_GENERIC_PTR_USED = 4, ++ /* set if size of stat-data item for this inode is known. If this is ++ * set we can avoid recalculating size of stat-data on each update. */ ++ REISER4_SDLEN_KNOWN = 5, ++ /* reiser4_inode->crypt points to the crypto stat */ ++ REISER4_CRYPTO_STAT_LOADED = 6, ++ /* cryptcompress_inode_data points to the secret key */ ++ REISER4_SECRET_KEY_INSTALLED = 7, ++ /* File (possibly) has pages corresponding to the tail items, that ++ * were created by ->readpage. It is set by mmap_unix_file() and ++ * sendfile_unix_file(). This bit is inspected by write_unix_file and ++ * kill-hook of tail items. It is never cleared once set. This bit is ++ * modified and inspected under i_mutex. */ ++ REISER4_HAS_MMAP = 8, ++ REISER4_PART_MIXED = 9, ++ REISER4_PART_IN_CONV = 10, ++ /* This flag indicates that file plugin conversion is in progress */ ++ REISER4_FILE_CONV_IN_PROGRESS = 11 ++} reiser4_file_plugin_flags; ++ ++/* state associated with each inode. ++ reiser4 inode. ++ ++ NOTE-NIKITA In 2.5 kernels it is not necessary that all file-system inodes ++ be of the same size. File-system allocates inodes by itself through ++ s_op->allocate_inode() method. So, it is possible to adjust size of inode ++ at the time of its creation. ++ ++ Invariants involving parts of this data-type: ++ ++ [inode->eflushed] ++ ++*/ ++ ++typedef struct reiser4_inode reiser4_inode; ++/* return pointer to reiser4-specific part of inode */ ++static inline reiser4_inode *reiser4_inode_data(const struct inode *inode ++ /* inode queried */ ); ++ ++#if BITS_PER_LONG == 64 ++ ++#define REISER4_INO_IS_OID (1) ++typedef struct {; ++} oid_hi_t; ++ ++/* BITS_PER_LONG == 64 */ ++#else ++ ++#define REISER4_INO_IS_OID (0) ++typedef __u32 oid_hi_t; ++ ++/* BITS_PER_LONG == 64 */ ++#endif ++ ++struct reiser4_inode { ++ /* spin lock protecting fields of this structure. */ ++ spinlock_t guard; ++ /* main plugin set that control the file ++ (see comments in plugin/plugin_set.c) */ ++ plugin_set *pset; ++ /* plugin set for inheritance ++ (see comments in plugin/plugin_set.c) */ ++ plugin_set *hset; ++ /* high 32 bits of object id */ ++ oid_hi_t oid_hi; ++ /* seal for stat-data */ ++ seal_t sd_seal; ++ /* locality id for this file */ ++ oid_t locality_id; ++#if REISER4_LARGE_KEY ++ __u64 ordering; ++#endif ++ /* coord of stat-data in sealed node */ ++ coord_t sd_coord; ++ /* bit-mask of stat-data extentions used by this file */ ++ __u64 extmask; ++ /* bitmask of non-default plugins for this inode */ ++ __u16 plugin_mask; ++ /* bitmask of set heir plugins for this inode. */ ++ __u16 heir_mask; ++ union { ++ struct list_head readdir_list; ++ struct list_head not_used; ++ } lists; ++ /* per-inode flags. Filled by values of reiser4_file_plugin_flags */ ++ unsigned long flags; ++ union { ++ /* fields specific to unix_file plugin */ ++ unix_file_info_t unix_file_info; ++ /* fields specific to cryptcompress plugin */ ++ cryptcompress_info_t cryptcompress_info; ++ } file_plugin_data; ++ ++ /* this semaphore is to serialize readers and writers of @pset->file ++ * when file plugin conversion is enabled ++ */ ++ struct rw_semaphore conv_sem; ++ ++ /* tree of jnodes. Phantom jnodes (ones not attched to any atom) are ++ tagged in that tree by EFLUSH_TAG_ANONYMOUS */ ++ struct radix_tree_root jnodes_tree; ++#if REISER4_DEBUG ++ /* number of unformatted node jnodes of this file in jnode hash table */ ++ unsigned long nr_jnodes; ++#endif ++ ++ /* block number of virtual root for this object. See comment above ++ * fs/reiser4/search.c:handle_vroot() */ ++ reiser4_block_nr vroot; ++ struct mutex loading; ++}; ++ ++void loading_init_once(reiser4_inode *); ++void loading_alloc(reiser4_inode *); ++void loading_destroy(reiser4_inode *); ++ ++typedef struct reiser4_inode_object { ++ /* private part */ ++ reiser4_inode p; ++ /* generic fields not specific to reiser4, but used by VFS */ ++ struct inode vfs_inode; ++} reiser4_inode_object; ++ ++/* return pointer to the reiser4 specific portion of @inode */ ++static inline reiser4_inode *reiser4_inode_data(const struct inode *inode ++ /* inode queried */ ) ++{ ++ assert("nikita-254", inode != NULL); ++ return &container_of(inode, reiser4_inode_object, vfs_inode)->p; ++} ++ ++static inline struct inode *inode_by_reiser4_inode(const reiser4_inode * ++ r4_inode /* inode queried */ ++ ) ++{ ++ return &container_of(r4_inode, reiser4_inode_object, p)->vfs_inode; ++} ++ ++/* ++ * reiser4 inodes are identified by 64bit object-id (oid_t), but in struct ++ * inode ->i_ino field is of type ino_t (long) that can be either 32 or 64 ++ * bits. ++ * ++ * If ->i_ino is 32 bits we store remaining 32 bits in reiser4 specific part ++ * of inode, otherwise whole oid is stored in i_ino. ++ * ++ * Wrappers below ([sg]et_inode_oid()) are used to hide this difference. ++ */ ++ ++#define OID_HI_SHIFT (sizeof(ino_t) * 8) ++ ++#if REISER4_INO_IS_OID ++ ++static inline oid_t get_inode_oid(const struct inode *inode) ++{ ++ return inode->i_ino; ++} ++ ++static inline void set_inode_oid(struct inode *inode, oid_t oid) ++{ ++ inode->i_ino = oid; ++} ++ ++/* REISER4_INO_IS_OID */ ++#else ++ ++static inline oid_t get_inode_oid(const struct inode *inode) ++{ ++ return ++ ((__u64) reiser4_inode_data(inode)->oid_hi << OID_HI_SHIFT) | ++ inode->i_ino; ++} ++ ++static inline void set_inode_oid(struct inode *inode, oid_t oid) ++{ ++ assert("nikita-2519", inode != NULL); ++ inode->i_ino = (ino_t) (oid); ++ reiser4_inode_data(inode)->oid_hi = (oid) >> OID_HI_SHIFT; ++ assert("nikita-2521", get_inode_oid(inode) == (oid)); ++} ++ ++/* REISER4_INO_IS_OID */ ++#endif ++ ++static inline oid_t get_inode_locality(const struct inode *inode) ++{ ++ return reiser4_inode_data(inode)->locality_id; ++} ++ ++#if REISER4_LARGE_KEY ++static inline __u64 get_inode_ordering(const struct inode *inode) ++{ ++ return reiser4_inode_data(inode)->ordering; ++} ++ ++static inline void set_inode_ordering(const struct inode *inode, __u64 ordering) ++{ ++ reiser4_inode_data(inode)->ordering = ordering; ++} ++ ++#else ++ ++#define get_inode_ordering(inode) (0) ++#define set_inode_ordering(inode, val) noop ++ ++#endif ++ ++/* return inode in which @uf_info is embedded */ ++static inline struct inode *unix_file_info_to_inode(const unix_file_info_t * ++ uf_info) ++{ ++ return &container_of(uf_info, reiser4_inode_object, ++ p.file_plugin_data.unix_file_info)->vfs_inode; ++} ++ ++extern ino_t oid_to_ino(oid_t oid) __attribute__ ((const)); ++extern ino_t oid_to_uino(oid_t oid) __attribute__ ((const)); ++ ++extern reiser4_tree *reiser4_tree_by_inode(const struct inode *inode); ++ ++#if REISER4_DEBUG ++extern void reiser4_inode_invariant(const struct inode *inode); ++extern int inode_has_no_jnodes(reiser4_inode *); ++#else ++#define reiser4_inode_invariant(inode) noop ++#endif ++ ++static inline int spin_inode_is_locked(const struct inode *inode) ++{ ++ assert_spin_locked(&reiser4_inode_data(inode)->guard); ++ return 1; ++} ++ ++/** ++ * spin_lock_inode - lock reiser4_inode' embedded spinlock ++ * @inode: inode to lock ++ * ++ * In debug mode it checks that lower priority locks are not held and ++ * increments reiser4_context's lock counters on which lock ordering checking ++ * is based. ++ */ ++static inline void spin_lock_inode(struct inode *inode) ++{ ++ assert("", LOCK_CNT_NIL(spin_locked)); ++ /* check lock ordering */ ++ assert_spin_not_locked(&d_lock); ++ ++ spin_lock(&reiser4_inode_data(inode)->guard); ++ ++ LOCK_CNT_INC(spin_locked_inode); ++ LOCK_CNT_INC(spin_locked); ++ ++ reiser4_inode_invariant(inode); ++} ++ ++/** ++ * spin_unlock_inode - unlock reiser4_inode' embedded spinlock ++ * @inode: inode to unlock ++ * ++ * In debug mode it checks that spinlock is held and decrements ++ * reiser4_context's lock counters on which lock ordering checking is based. ++ */ ++static inline void spin_unlock_inode(struct inode *inode) ++{ ++ assert_spin_locked(&reiser4_inode_data(inode)->guard); ++ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_inode)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ reiser4_inode_invariant(inode); ++ ++ LOCK_CNT_DEC(spin_locked_inode); ++ LOCK_CNT_DEC(spin_locked); ++ ++ spin_unlock(&reiser4_inode_data(inode)->guard); ++} ++ ++extern znode *inode_get_vroot(struct inode *inode); ++extern void inode_set_vroot(struct inode *inode, znode * vroot); ++ ++extern int reiser4_max_filename_len(const struct inode *inode); ++extern int max_hash_collisions(const struct inode *dir); ++extern void reiser4_unlock_inode(struct inode *inode); ++extern int is_reiser4_inode(const struct inode *inode); ++extern int setup_inode_ops(struct inode *inode, reiser4_object_create_data *); ++extern struct inode *reiser4_iget(struct super_block *super, ++ const reiser4_key * key, int silent); ++extern void reiser4_iget_complete(struct inode *inode); ++extern void reiser4_inode_set_flag(struct inode *inode, reiser4_file_plugin_flags f); ++extern void reiser4_inode_clr_flag(struct inode *inode, reiser4_file_plugin_flags f); ++extern int reiser4_inode_get_flag(const struct inode *inode, ++ reiser4_file_plugin_flags f); ++ ++/* has inode been initialized? */ ++static inline int ++is_inode_loaded(const struct inode *inode /* inode queried */ ) ++{ ++ assert("nikita-1120", inode != NULL); ++ return reiser4_inode_get_flag(inode, REISER4_LOADED); ++} ++ ++extern file_plugin *inode_file_plugin(const struct inode *inode); ++extern dir_plugin *inode_dir_plugin(const struct inode *inode); ++extern formatting_plugin *inode_formatting_plugin(const struct inode *inode); ++extern hash_plugin *inode_hash_plugin(const struct inode *inode); ++extern fibration_plugin *inode_fibration_plugin(const struct inode *inode); ++extern cipher_plugin *inode_cipher_plugin(const struct inode *inode); ++extern digest_plugin *inode_digest_plugin(const struct inode *inode); ++extern compression_plugin *inode_compression_plugin(const struct inode *inode); ++extern compression_mode_plugin *inode_compression_mode_plugin(const struct inode ++ *inode); ++extern cluster_plugin *inode_cluster_plugin(const struct inode *inode); ++extern file_plugin *inode_create_plugin(const struct inode *inode); ++extern item_plugin *inode_sd_plugin(const struct inode *inode); ++extern item_plugin *inode_dir_item_plugin(const struct inode *inode); ++extern file_plugin *child_create_plugin(const struct inode *inode); ++ ++extern void reiser4_make_bad_inode(struct inode *inode); ++ ++extern void inode_set_extension(struct inode *inode, sd_ext_bits ext); ++extern void inode_clr_extension(struct inode *inode, sd_ext_bits ext); ++extern void inode_check_scale(struct inode *inode, __u64 old, __u64 new); ++extern void inode_check_scale_nolock(struct inode * inode, __u64 old, __u64 new); ++ ++/* ++ * update field @field in inode @i to contain value @value. ++ */ ++#define INODE_SET_FIELD(i, field, value) \ ++({ \ ++ struct inode *__i; \ ++ typeof(value) __v; \ ++ \ ++ __i = (i); \ ++ __v = (value); \ ++ inode_check_scale(__i, __i->field, __v); \ ++ __i->field = __v; \ ++}) ++ ++#define INODE_INC_FIELD(i, field) \ ++({ \ ++ struct inode *__i; \ ++ \ ++ __i = (i); \ ++ inode_check_scale(__i, __i->field, __i->field + 1); \ ++ ++ __i->field; \ ++}) ++ ++#define INODE_DEC_FIELD(i, field) \ ++({ \ ++ struct inode *__i; \ ++ \ ++ __i = (i); \ ++ inode_check_scale(__i, __i->field, __i->field - 1); \ ++ -- __i->field; \ ++}) ++ ++/* See comment before reiser4_readdir_common() for description. */ ++static inline struct list_head *get_readdir_list(const struct inode *inode) ++{ ++ return &reiser4_inode_data(inode)->lists.readdir_list; ++} ++ ++extern void init_inode_ordering(struct inode *inode, ++ reiser4_object_create_data * crd, int create); ++ ++static inline struct radix_tree_root *jnode_tree_by_inode(struct inode *inode) ++{ ++ return &reiser4_inode_data(inode)->jnodes_tree; ++} ++ ++static inline struct radix_tree_root *jnode_tree_by_reiser4_inode(reiser4_inode ++ * r4_inode) ++{ ++ return &r4_inode->jnodes_tree; ++} ++ ++#if REISER4_DEBUG ++extern void print_inode(const char *prefix, const struct inode *i); ++#endif ++ ++int is_dir_empty(const struct inode *); ++ ++/* __REISER4_INODE_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/ioctl.h b/fs/reiser4/ioctl.h +new file mode 100644 +index 0000000..4d57737 +--- /dev/null ++++ b/fs/reiser4/ioctl.h +@@ -0,0 +1,41 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#if !defined( __REISER4_IOCTL_H__ ) ++#define __REISER4_IOCTL_H__ ++ ++#include ++ ++/* ++ * ioctl(2) command used to "unpack" reiser4 file, that is, convert it into ++ * extents and fix in this state. This is used by applications that rely on ++ * ++ * . files being block aligned, and ++ * ++ * . files never migrating on disk ++ * ++ * for example, boot loaders (LILO) need this. ++ * ++ * This ioctl should be used as ++ * ++ * result = ioctl(fd, REISER4_IOC_UNPACK); ++ * ++ * File behind fd descriptor will be converted to the extents (if necessary), ++ * and its stat-data will be updated so that it will never be converted back ++ * into tails again. ++ */ ++#define REISER4_IOC_UNPACK _IOW(0xCD,1,long) ++ ++/* __REISER4_IOCTL_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/jnode.c b/fs/reiser4/jnode.c +new file mode 100644 +index 0000000..1d16d41 +--- /dev/null ++++ b/fs/reiser4/jnode.c +@@ -0,0 +1,1925 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++/* Jnode manipulation functions. */ ++/* Jnode is entity used to track blocks with data and meta-data in reiser4. ++ ++ In particular, jnodes are used to track transactional information ++ associated with each block. Each znode contains jnode as ->zjnode field. ++ ++ Jnode stands for either Josh or Journal node. ++*/ ++ ++/* ++ * Taxonomy. ++ * ++ * Jnode represents block containing data or meta-data. There are jnodes ++ * for: ++ * ++ * unformatted blocks (jnodes proper). There are plans, however to ++ * have a handle per extent unit rather than per each unformatted ++ * block, because there are so many of them. ++ * ++ * For bitmaps. Each bitmap is actually represented by two jnodes--one ++ * for working and another for "commit" data, together forming bnode. ++ * ++ * For io-heads. These are used by log writer. ++ * ++ * For formatted nodes (znode). See comment at the top of znode.c for ++ * details specific to the formatted nodes (znodes). ++ * ++ * Node data. ++ * ++ * Jnode provides access to the data of node it represents. Data are ++ * stored in a page. Page is kept in a page cache. This means, that jnodes ++ * are highly interconnected with page cache and VM internals. ++ * ++ * jnode has a pointer to page (->pg) containing its data. Pointer to data ++ * themselves is cached in ->data field to avoid frequent calls to ++ * page_address(). ++ * ++ * jnode and page are attached to each other by jnode_attach_page(). This ++ * function places pointer to jnode in set_page_private(), sets PG_private ++ * flag and increments page counter. ++ * ++ * Opposite operation is performed by page_clear_jnode(). ++ * ++ * jnode->pg is protected by jnode spin lock, and page->private is ++ * protected by page lock. See comment at the top of page_cache.c for ++ * more. ++ * ++ * page can be detached from jnode for two reasons: ++ * ++ * . jnode is removed from a tree (file is truncated, of formatted ++ * node is removed by balancing). ++ * ++ * . during memory pressure, VM calls ->releasepage() method ++ * (reiser4_releasepage()) to evict page from memory. ++ * ++ * (there, of course, is also umount, but this is special case we are not ++ * concerned with here). ++ * ++ * To protect jnode page from eviction, one calls jload() function that ++ * "pins" page in memory (loading it if necessary), increments ++ * jnode->d_count, and kmap()s page. Page is unpinned through call to ++ * jrelse(). ++ * ++ * Jnode life cycle. ++ * ++ * jnode is created, placed in hash table, and, optionally, in per-inode ++ * radix tree. Page can be attached to jnode, pinned, released, etc. ++ * ++ * When jnode is captured into atom its reference counter is ++ * increased. While being part of an atom, jnode can be "early ++ * flushed". This means that as part of flush procedure, jnode is placed ++ * into "relocate set", and its page is submitted to the disk. After io ++ * completes, page can be detached, then loaded again, re-dirtied, etc. ++ * ++ * Thread acquired reference to jnode by calling jref() and releases it by ++ * jput(). When last reference is removed, jnode is still retained in ++ * memory (cached) if it has page attached, _unless_ it is scheduled for ++ * destruction (has JNODE_HEARD_BANSHEE bit set). ++ * ++ * Tree read-write lock was used as "existential" lock for jnodes. That is, ++ * jnode->x_count could be changed from 0 to 1 only under tree write lock, ++ * that is, tree lock protected unreferenced jnodes stored in the hash ++ * table, from recycling. ++ * ++ * This resulted in high contention on tree lock, because jref()/jput() is ++ * frequent operation. To ameliorate this problem, RCU is used: when jput() ++ * is just about to release last reference on jnode it sets JNODE_RIP bit ++ * on it, and then proceed with jnode destruction (removing jnode from hash ++ * table, cbk_cache, detaching page, etc.). All places that change jnode ++ * reference counter from 0 to 1 (jlookup(), zlook(), zget(), and ++ * cbk_cache_scan_slots()) check for JNODE_RIP bit (this is done by ++ * jnode_rip_check() function), and pretend that nothing was found in hash ++ * table if bit is set. ++ * ++ * jput defers actual return of jnode into slab cache to some later time ++ * (by call_rcu()), this guarantees that other threads can safely continue ++ * working with JNODE_RIP-ped jnode. ++ * ++ */ ++ ++#include "reiser4.h" ++#include "debug.h" ++#include "dformat.h" ++#include "jnode.h" ++#include "plugin/plugin_header.h" ++#include "plugin/plugin.h" ++#include "txnmgr.h" ++/*#include "jnode.h"*/ ++#include "znode.h" ++#include "tree.h" ++#include "tree_walk.h" ++#include "super.h" ++#include "inode.h" ++#include "page_cache.h" ++ ++#include /* UML needs this for PAGE_OFFSET */ ++#include ++#include ++#include ++#include ++#include /* for struct address_space */ ++#include /* for inode_lock */ ++ ++static struct kmem_cache *_jnode_slab = NULL; ++ ++static void jnode_set_type(jnode * node, jnode_type type); ++static int jdelete(jnode * node); ++static int jnode_try_drop(jnode * node); ++ ++#if REISER4_DEBUG ++static int jnode_invariant(const jnode * node, int tlocked, int jlocked); ++#endif ++ ++/* true if valid page is attached to jnode */ ++static inline int jnode_is_parsed(jnode * node) ++{ ++ return JF_ISSET(node, JNODE_PARSED); ++} ++ ++/* hash table support */ ++ ++/* compare two jnode keys for equality. Used by hash-table macros */ ++static inline int jnode_key_eq(const jnode_key_t * k1, const jnode_key_t * k2) ++{ ++ assert("nikita-2350", k1 != NULL); ++ assert("nikita-2351", k2 != NULL); ++ ++ return (k1->index == k2->index && k1->objectid == k2->objectid); ++} ++ ++/* Hash jnode by its key (inode plus offset). Used by hash-table macros */ ++static inline __u32 ++jnode_key_hashfn(j_hash_table * table, const jnode_key_t * key) ++{ ++ assert("nikita-2352", key != NULL); ++ assert("nikita-3346", IS_POW(table->_buckets)); ++ ++ /* yes, this is remarkable simply (where not stupid) hash function. */ ++ return (key->objectid + key->index) & (table->_buckets - 1); ++} ++ ++/* The hash table definition */ ++#define KMALLOC(size) reiser4_vmalloc(size) ++#define KFREE(ptr, size) vfree(ptr) ++TYPE_SAFE_HASH_DEFINE(j, jnode, jnode_key_t, key.j, link.j, jnode_key_hashfn, ++ jnode_key_eq); ++#undef KFREE ++#undef KMALLOC ++ ++/* call this to initialise jnode hash table */ ++int jnodes_tree_init(reiser4_tree * tree /* tree to initialise jnodes for */ ) ++{ ++ assert("nikita-2359", tree != NULL); ++ return j_hash_init(&tree->jhash_table, 16384); ++} ++ ++/* call this to destroy jnode hash table. This is called during umount. */ ++int jnodes_tree_done(reiser4_tree * tree /* tree to destroy jnodes for */ ) ++{ ++ j_hash_table *jtable; ++ jnode *node; ++ jnode *next; ++ ++ assert("nikita-2360", tree != NULL); ++ ++ /* ++ * Scan hash table and free all jnodes. ++ */ ++ jtable = &tree->jhash_table; ++ if (jtable->_table) { ++ for_all_in_htable(jtable, j, node, next) { ++ assert("nikita-2361", !atomic_read(&node->x_count)); ++ jdrop(node); ++ } ++ ++ j_hash_done(&tree->jhash_table); ++ } ++ return 0; ++} ++ ++/** ++ * init_jnodes - create jnode cache ++ * ++ * Initializes slab cache jnodes. It is part of reiser4 module initialization. ++ */ ++int init_jnodes(void) ++{ ++ assert("umka-168", _jnode_slab == NULL); ++ ++ _jnode_slab = kmem_cache_create("jnode", sizeof(jnode), 0, ++ SLAB_HWCACHE_ALIGN | ++ SLAB_RECLAIM_ACCOUNT, NULL, NULL); ++ if (_jnode_slab == NULL) ++ return RETERR(-ENOMEM); ++ ++ return 0; ++} ++ ++/** ++ * done_znodes - delete znode cache ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++void done_jnodes(void) ++{ ++ destroy_reiser4_cache(&_jnode_slab); ++} ++ ++/* Initialize a jnode. */ ++void jnode_init(jnode * node, reiser4_tree * tree, jnode_type type) ++{ ++ assert("umka-175", node != NULL); ++ ++ memset(node, 0, sizeof(jnode)); ++ ON_DEBUG(node->magic = JMAGIC); ++ jnode_set_type(node, type); ++ atomic_set(&node->d_count, 0); ++ atomic_set(&node->x_count, 0); ++ spin_lock_init(&node->guard); ++ spin_lock_init(&node->load); ++ node->atom = NULL; ++ node->tree = tree; ++ INIT_LIST_HEAD(&node->capture_link); ++ ++ ASSIGN_NODE_LIST(node, NOT_CAPTURED); ++ ++ INIT_RCU_HEAD(&node->rcu); ++ ++#if REISER4_DEBUG ++ { ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = get_super_private(tree->super); ++ spin_lock_irq(&sbinfo->all_guard); ++ list_add(&node->jnodes, &sbinfo->all_jnodes); ++ spin_unlock_irq(&sbinfo->all_guard); ++ } ++#endif ++} ++ ++#if REISER4_DEBUG ++/* ++ * Remove jnode from ->all_jnodes list. ++ */ ++static void jnode_done(jnode * node, reiser4_tree * tree) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = get_super_private(tree->super); ++ ++ spin_lock_irq(&sbinfo->all_guard); ++ assert("nikita-2422", !list_empty(&node->jnodes)); ++ list_del_init(&node->jnodes); ++ spin_unlock_irq(&sbinfo->all_guard); ++} ++#endif ++ ++/* return already existing jnode of page */ ++jnode *jnode_by_page(struct page *pg) ++{ ++ assert("nikita-2066", pg != NULL); ++ assert("nikita-2400", PageLocked(pg)); ++ assert("nikita-2068", PagePrivate(pg)); ++ assert("nikita-2067", jprivate(pg) != NULL); ++ return jprivate(pg); ++} ++ ++/* exported functions to allocate/free jnode objects outside this file */ ++jnode *jalloc(void) ++{ ++ jnode *jal = kmem_cache_alloc(_jnode_slab, reiser4_ctx_gfp_mask_get()); ++ return jal; ++} ++ ++/* return jnode back to the slab allocator */ ++inline void jfree(jnode * node) ++{ ++ assert("zam-449", node != NULL); ++ ++ assert("nikita-2663", (list_empty_careful(&node->capture_link) && ++ NODE_LIST(node) == NOT_CAPTURED)); ++ assert("nikita-3222", list_empty(&node->jnodes)); ++ assert("nikita-3221", jnode_page(node) == NULL); ++ ++ /* not yet phash_jnode_destroy(node); */ ++ ++ kmem_cache_free(_jnode_slab, node); ++} ++ ++/* ++ * This function is supplied as RCU callback. It actually frees jnode when ++ * last reference to it is gone. ++ */ ++static void jnode_free_actor(struct rcu_head *head) ++{ ++ jnode *node; ++ jnode_type jtype; ++ ++ node = container_of(head, jnode, rcu); ++ jtype = jnode_get_type(node); ++ ++ ON_DEBUG(jnode_done(node, jnode_get_tree(node))); ++ ++ switch (jtype) { ++ case JNODE_IO_HEAD: ++ case JNODE_BITMAP: ++ case JNODE_UNFORMATTED_BLOCK: ++ jfree(node); ++ break; ++ case JNODE_FORMATTED_BLOCK: ++ zfree(JZNODE(node)); ++ break; ++ case JNODE_INODE: ++ default: ++ wrong_return_value("nikita-3197", "Wrong jnode type"); ++ } ++} ++ ++/* ++ * Free a jnode. Post a callback to be executed later through RCU when all ++ * references to @node are released. ++ */ ++static inline void jnode_free(jnode * node, jnode_type jtype) ++{ ++ if (jtype != JNODE_INODE) { ++ /*assert("nikita-3219", list_empty(&node->rcu.list)); */ ++ call_rcu(&node->rcu, jnode_free_actor); ++ } else ++ jnode_list_remove(node); ++} ++ ++/* allocate new unformatted jnode */ ++static jnode *jnew_unformatted(void) ++{ ++ jnode *jal; ++ ++ jal = jalloc(); ++ if (jal == NULL) ++ return NULL; ++ ++ jnode_init(jal, current_tree, JNODE_UNFORMATTED_BLOCK); ++ jal->key.j.mapping = NULL; ++ jal->key.j.index = (unsigned long)-1; ++ jal->key.j.objectid = 0; ++ return jal; ++} ++ ++/* look for jnode with given mapping and offset within hash table */ ++jnode *jlookup(reiser4_tree * tree, oid_t objectid, unsigned long index) ++{ ++ jnode_key_t jkey; ++ jnode *node; ++ ++ assert("nikita-2353", tree != NULL); ++ ++ jkey.objectid = objectid; ++ jkey.index = index; ++ ++ /* ++ * hash table is _not_ protected by any lock during lookups. All we ++ * have to do is to disable preemption to keep RCU happy. ++ */ ++ ++ rcu_read_lock(); ++ node = j_hash_find(&tree->jhash_table, &jkey); ++ if (node != NULL) { ++ /* protect @node from recycling */ ++ jref(node); ++ assert("nikita-2955", jnode_invariant(node, 0, 0)); ++ node = jnode_rip_check(tree, node); ++ } ++ rcu_read_unlock(); ++ return node; ++} ++ ++/* per inode radix tree of jnodes is protected by tree's read write spin lock */ ++static jnode *jfind_nolock(struct address_space *mapping, unsigned long index) ++{ ++ assert("vs-1694", mapping->host != NULL); ++ ++ return radix_tree_lookup(jnode_tree_by_inode(mapping->host), index); ++} ++ ++jnode *jfind(struct address_space * mapping, unsigned long index) ++{ ++ reiser4_tree *tree; ++ jnode *node; ++ ++ assert("vs-1694", mapping->host != NULL); ++ tree = reiser4_tree_by_inode(mapping->host); ++ ++ read_lock_tree(tree); ++ node = jfind_nolock(mapping, index); ++ if (node != NULL) ++ jref(node); ++ read_unlock_tree(tree); ++ return node; ++} ++ ++static void inode_attach_jnode(jnode * node) ++{ ++ struct inode *inode; ++ reiser4_inode *info; ++ struct radix_tree_root *rtree; ++ ++ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock)); ++ assert("zam-1043", node->key.j.mapping != NULL); ++ inode = node->key.j.mapping->host; ++ info = reiser4_inode_data(inode); ++ rtree = jnode_tree_by_reiser4_inode(info); ++ if (rtree->rnode == NULL) { ++ /* prevent inode from being pruned when it has jnodes attached ++ to it */ ++ write_lock_irq(&inode->i_data.tree_lock); ++ inode->i_data.nrpages++; ++ write_unlock_irq(&inode->i_data.tree_lock); ++ } ++ assert("zam-1049", equi(rtree->rnode != NULL, info->nr_jnodes != 0)); ++ check_me("zam-1045", ++ !radix_tree_insert(rtree, node->key.j.index, node)); ++ ON_DEBUG(info->nr_jnodes++); ++} ++ ++static void inode_detach_jnode(jnode * node) ++{ ++ struct inode *inode; ++ reiser4_inode *info; ++ struct radix_tree_root *rtree; ++ ++ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock)); ++ assert("zam-1044", node->key.j.mapping != NULL); ++ inode = node->key.j.mapping->host; ++ info = reiser4_inode_data(inode); ++ rtree = jnode_tree_by_reiser4_inode(info); ++ ++ assert("zam-1051", info->nr_jnodes != 0); ++ assert("zam-1052", rtree->rnode != NULL); ++ ON_DEBUG(info->nr_jnodes--); ++ ++ /* delete jnode from inode's radix tree of jnodes */ ++ check_me("zam-1046", radix_tree_delete(rtree, node->key.j.index)); ++ if (rtree->rnode == NULL) { ++ /* inode can be pruned now */ ++ write_lock_irq(&inode->i_data.tree_lock); ++ inode->i_data.nrpages--; ++ write_unlock_irq(&inode->i_data.tree_lock); ++ } ++} ++ ++/* put jnode into hash table (where they can be found by flush who does not know ++ mapping) and to inode's tree of jnodes (where they can be found (hopefully ++ faster) in places where mapping is known). Currently it is used by ++ fs/reiser4/plugin/item/extent_file_ops.c:index_extent_jnode when new jnode is ++ created */ ++static void ++hash_unformatted_jnode(jnode * node, struct address_space *mapping, ++ unsigned long index) ++{ ++ j_hash_table *jtable; ++ ++ assert("vs-1446", jnode_is_unformatted(node)); ++ assert("vs-1442", node->key.j.mapping == 0); ++ assert("vs-1443", node->key.j.objectid == 0); ++ assert("vs-1444", node->key.j.index == (unsigned long)-1); ++ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock)); ++ ++ node->key.j.mapping = mapping; ++ node->key.j.objectid = get_inode_oid(mapping->host); ++ node->key.j.index = index; ++ ++ jtable = &jnode_get_tree(node)->jhash_table; ++ ++ /* race with some other thread inserting jnode into the hash table is ++ * impossible, because we keep the page lock. */ ++ /* ++ * following assertion no longer holds because of RCU: it is possible ++ * jnode is in the hash table, but with JNODE_RIP bit set. ++ */ ++ /* assert("nikita-3211", j_hash_find(jtable, &node->key.j) == NULL); */ ++ j_hash_insert_rcu(jtable, node); ++ inode_attach_jnode(node); ++} ++ ++static void unhash_unformatted_node_nolock(jnode * node) ++{ ++ assert("vs-1683", node->key.j.mapping != NULL); ++ assert("vs-1684", ++ node->key.j.objectid == ++ get_inode_oid(node->key.j.mapping->host)); ++ ++ /* remove jnode from hash-table */ ++ j_hash_remove_rcu(&node->tree->jhash_table, node); ++ inode_detach_jnode(node); ++ node->key.j.mapping = NULL; ++ node->key.j.index = (unsigned long)-1; ++ node->key.j.objectid = 0; ++ ++} ++ ++/* remove jnode from hash table and from inode's tree of jnodes. This is used in ++ reiser4_invalidatepage and in kill_hook_extent -> truncate_inode_jnodes -> ++ reiser4_uncapture_jnode */ ++void unhash_unformatted_jnode(jnode * node) ++{ ++ assert("vs-1445", jnode_is_unformatted(node)); ++ ++ write_lock_tree(node->tree); ++ unhash_unformatted_node_nolock(node); ++ write_unlock_tree(node->tree); ++} ++ ++/* ++ * search hash table for a jnode with given oid and index. If not found, ++ * allocate new jnode, insert it, and also insert into radix tree for the ++ * given inode/mapping. ++ */ ++static jnode *find_get_jnode(reiser4_tree * tree, ++ struct address_space *mapping, ++ oid_t oid, unsigned long index) ++{ ++ jnode *result; ++ jnode *shadow; ++ int preload; ++ ++ result = jnew_unformatted(); ++ ++ if (unlikely(result == NULL)) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ ++ preload = radix_tree_preload(reiser4_ctx_gfp_mask_get()); ++ if (preload != 0) ++ return ERR_PTR(preload); ++ ++ write_lock_tree(tree); ++ shadow = jfind_nolock(mapping, index); ++ if (likely(shadow == NULL)) { ++ /* add new jnode to hash table and inode's radix tree of jnodes */ ++ jref(result); ++ hash_unformatted_jnode(result, mapping, index); ++ } else { ++ /* jnode is found in inode's radix tree of jnodes */ ++ jref(shadow); ++ jnode_free(result, JNODE_UNFORMATTED_BLOCK); ++ assert("vs-1498", shadow->key.j.mapping == mapping); ++ result = shadow; ++ } ++ write_unlock_tree(tree); ++ ++ assert("nikita-2955", ++ ergo(result != NULL, jnode_invariant(result, 0, 0))); ++ radix_tree_preload_end(); ++ return result; ++} ++ ++/* jget() (a la zget() but for unformatted nodes). Returns (and possibly ++ creates) jnode corresponding to page @pg. jnode is attached to page and ++ inserted into jnode hash-table. */ ++static jnode *do_jget(reiser4_tree * tree, struct page *pg) ++{ ++ /* ++ * There are two ways to create jnode: starting with pre-existing page ++ * and without page. ++ * ++ * When page already exists, jnode is created ++ * (jnode_of_page()->do_jget()) under page lock. This is done in ++ * ->writepage(), or when capturing anonymous page dirtied through ++ * mmap. ++ * ++ * Jnode without page is created by index_extent_jnode(). ++ * ++ */ ++ ++ jnode *result; ++ oid_t oid = get_inode_oid(pg->mapping->host); ++ ++ assert("umka-176", pg != NULL); ++ assert("nikita-2394", PageLocked(pg)); ++ ++ result = jprivate(pg); ++ if (likely(result != NULL)) ++ return jref(result); ++ ++ tree = reiser4_tree_by_page(pg); ++ ++ /* check hash-table first */ ++ result = jfind(pg->mapping, pg->index); ++ if (unlikely(result != NULL)) { ++ spin_lock_jnode(result); ++ jnode_attach_page(result, pg); ++ spin_unlock_jnode(result); ++ result->key.j.mapping = pg->mapping; ++ return result; ++ } ++ ++ /* since page is locked, jnode should be allocated with GFP_NOFS flag */ ++ reiser4_ctx_gfp_mask_force(GFP_NOFS); ++ result = find_get_jnode(tree, pg->mapping, oid, pg->index); ++ if (unlikely(IS_ERR(result))) ++ return result; ++ /* attach jnode to page */ ++ spin_lock_jnode(result); ++ jnode_attach_page(result, pg); ++ spin_unlock_jnode(result); ++ return result; ++} ++ ++/* ++ * return jnode for @pg, creating it if necessary. ++ */ ++jnode *jnode_of_page(struct page * pg) ++{ ++ jnode *result; ++ ++ assert("umka-176", pg != NULL); ++ assert("nikita-2394", PageLocked(pg)); ++ ++ result = do_jget(reiser4_tree_by_page(pg), pg); ++ ++ if (REISER4_DEBUG && !IS_ERR(result)) { ++ assert("nikita-3210", result == jprivate(pg)); ++ assert("nikita-2046", jnode_page(jprivate(pg)) == pg); ++ if (jnode_is_unformatted(jprivate(pg))) { ++ assert("nikita-2364", ++ jprivate(pg)->key.j.index == pg->index); ++ assert("nikita-2367", ++ jprivate(pg)->key.j.mapping == pg->mapping); ++ assert("nikita-2365", ++ jprivate(pg)->key.j.objectid == ++ get_inode_oid(pg->mapping->host)); ++ assert("vs-1200", ++ jprivate(pg)->key.j.objectid == ++ pg->mapping->host->i_ino); ++ assert("nikita-2356", ++ jnode_is_unformatted(jnode_by_page(pg))); ++ } ++ assert("nikita-2956", jnode_invariant(jprivate(pg), 0, 0)); ++ } ++ return result; ++} ++ ++/* attach page to jnode: set ->pg pointer in jnode, and ->private one in the ++ * page.*/ ++void jnode_attach_page(jnode * node, struct page *pg) ++{ ++ assert("nikita-2060", node != NULL); ++ assert("nikita-2061", pg != NULL); ++ ++ assert("nikita-2050", jprivate(pg) == 0ul); ++ assert("nikita-2393", !PagePrivate(pg)); ++ assert("vs-1741", node->pg == NULL); ++ ++ assert("nikita-2396", PageLocked(pg)); ++ assert_spin_locked(&(node->guard)); ++ ++ page_cache_get(pg); ++ set_page_private(pg, (unsigned long)node); ++ node->pg = pg; ++ SetPagePrivate(pg); ++} ++ ++/* Dual to jnode_attach_page: break a binding between page and jnode */ ++void page_clear_jnode(struct page *page, jnode * node) ++{ ++ assert("nikita-2424", page != NULL); ++ assert("nikita-2425", PageLocked(page)); ++ assert("nikita-2426", node != NULL); ++ assert_spin_locked(&(node->guard)); ++ assert("nikita-2428", PagePrivate(page)); ++ ++ assert("nikita-3551", !PageWriteback(page)); ++ ++ JF_CLR(node, JNODE_PARSED); ++ set_page_private(page, 0ul); ++ ClearPagePrivate(page); ++ node->pg = NULL; ++ page_cache_release(page); ++} ++ ++#if 0 ++/* it is only used in one place to handle error */ ++void ++page_detach_jnode(struct page *page, struct address_space *mapping, ++ unsigned long index) ++{ ++ assert("nikita-2395", page != NULL); ++ ++ lock_page(page); ++ if ((page->mapping == mapping) && (page->index == index) ++ && PagePrivate(page)) { ++ jnode *node; ++ ++ node = jprivate(page); ++ spin_lock_jnode(node); ++ page_clear_jnode(page, node); ++ spin_unlock_jnode(node); ++ } ++ unlock_page(page); ++} ++#endif /* 0 */ ++ ++/* return @node page locked. ++ ++ Locking ordering requires that one first takes page lock and afterwards ++ spin lock on node attached to this page. Sometimes it is necessary to go in ++ the opposite direction. This is done through standard trylock-and-release ++ loop. ++*/ ++static struct page *jnode_lock_page(jnode * node) ++{ ++ struct page *page; ++ ++ assert("nikita-2052", node != NULL); ++ assert("nikita-2401", LOCK_CNT_NIL(spin_locked_jnode)); ++ ++ while (1) { ++ ++ spin_lock_jnode(node); ++ page = jnode_page(node); ++ if (page == NULL) { ++ break; ++ } ++ ++ /* no need to page_cache_get( page ) here, because page cannot ++ be evicted from memory without detaching it from jnode and ++ this requires spin lock on jnode that we already hold. ++ */ ++ if (!TestSetPageLocked(page)) { ++ /* We won a lock on jnode page, proceed. */ ++ break; ++ } ++ ++ /* Page is locked by someone else. */ ++ page_cache_get(page); ++ spin_unlock_jnode(node); ++ wait_on_page_locked(page); ++ /* it is possible that page was detached from jnode and ++ returned to the free pool, or re-assigned while we were ++ waiting on locked bit. This will be rechecked on the next ++ loop iteration. ++ */ ++ page_cache_release(page); ++ ++ /* try again */ ++ } ++ return page; ++} ++ ++/* ++ * is JNODE_PARSED bit is not set, call ->parse() method of jnode, to verify ++ * validness of jnode content. ++ */ ++static inline int jparse(jnode * node) ++{ ++ int result; ++ ++ assert("nikita-2466", node != NULL); ++ ++ spin_lock_jnode(node); ++ if (likely(!jnode_is_parsed(node))) { ++ result = jnode_ops(node)->parse(node); ++ if (likely(result == 0)) ++ JF_SET(node, JNODE_PARSED); ++ } else ++ result = 0; ++ spin_unlock_jnode(node); ++ return result; ++} ++ ++/* Lock a page attached to jnode, create and attach page to jnode if it had no ++ * one. */ ++static struct page *jnode_get_page_locked(jnode * node, gfp_t gfp_flags) ++{ ++ struct page *page; ++ ++ spin_lock_jnode(node); ++ page = jnode_page(node); ++ ++ if (page == NULL) { ++ spin_unlock_jnode(node); ++ page = find_or_create_page(jnode_get_mapping(node), ++ jnode_get_index(node), gfp_flags); ++ if (page == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ } else { ++ if (!TestSetPageLocked(page)) { ++ spin_unlock_jnode(node); ++ return page; ++ } ++ page_cache_get(page); ++ spin_unlock_jnode(node); ++ lock_page(page); ++ assert("nikita-3134", page->mapping == jnode_get_mapping(node)); ++ } ++ ++ spin_lock_jnode(node); ++ if (!jnode_page(node)) ++ jnode_attach_page(node, page); ++ spin_unlock_jnode(node); ++ ++ page_cache_release(page); ++ assert("zam-894", jnode_page(node) == page); ++ return page; ++} ++ ++/* Start read operation for jnode's page if page is not up-to-date. */ ++static int jnode_start_read(jnode * node, struct page *page) ++{ ++ assert("zam-893", PageLocked(page)); ++ ++ if (PageUptodate(page)) { ++ unlock_page(page); ++ return 0; ++ } ++ return reiser4_page_io(page, node, READ, reiser4_ctx_gfp_mask_get()); ++} ++ ++#if REISER4_DEBUG ++static void check_jload(jnode * node, struct page *page) ++{ ++ if (jnode_is_znode(node)) { ++ node40_header *nh; ++ znode *z; ++ ++ z = JZNODE(node); ++ if (znode_is_any_locked(z)) { ++ nh = (node40_header *) kmap(page); ++ /* this only works for node40-only file systems. For ++ * debugging. */ ++ assert("nikita-3253", ++ z->nr_items == le16_to_cpu(get_unaligned(&nh->nr_items))); ++ kunmap(page); ++ } ++ assert("nikita-3565", znode_invariant(z)); ++ } ++} ++#else ++#define check_jload(node, page) noop ++#endif ++ ++/* prefetch jnode to speed up next call to jload. Call this when you are going ++ * to call jload() shortly. This will bring appropriate portion of jnode into ++ * CPU cache. */ ++void jload_prefetch(jnode * node) ++{ ++ prefetchw(&node->x_count); ++} ++ ++/* load jnode's data into memory */ ++int jload_gfp(jnode * node /* node to load */ , ++ gfp_t gfp_flags /* allocation flags */ , ++ int do_kmap /* true if page should be kmapped */ ) ++{ ++ struct page *page; ++ int result = 0; ++ int parsed; ++ ++ assert("nikita-3010", reiser4_schedulable()); ++ ++ prefetchw(&node->pg); ++ ++ /* taking d-reference implies taking x-reference. */ ++ jref(node); ++ ++ /* ++ * acquiring d-reference to @jnode and check for JNODE_PARSED bit ++ * should be atomic, otherwise there is a race against ++ * reiser4_releasepage(). ++ */ ++ spin_lock(&(node->load)); ++ add_d_ref(node); ++ parsed = jnode_is_parsed(node); ++ spin_unlock(&(node->load)); ++ ++ if (unlikely(!parsed)) { ++ page = jnode_get_page_locked(node, gfp_flags); ++ if (unlikely(IS_ERR(page))) { ++ result = PTR_ERR(page); ++ goto failed; ++ } ++ ++ result = jnode_start_read(node, page); ++ if (unlikely(result != 0)) ++ goto failed; ++ ++ wait_on_page_locked(page); ++ if (unlikely(!PageUptodate(page))) { ++ result = RETERR(-EIO); ++ goto failed; ++ } ++ ++ if (do_kmap) ++ node->data = kmap(page); ++ ++ result = jparse(node); ++ if (unlikely(result != 0)) { ++ if (do_kmap) ++ kunmap(page); ++ goto failed; ++ } ++ check_jload(node, page); ++ } else { ++ page = jnode_page(node); ++ check_jload(node, page); ++ if (do_kmap) ++ node->data = kmap(page); ++ } ++ ++ if (!is_writeout_mode()) ++ /* We do not mark pages active if jload is called as a part of ++ * jnode_flush() or reiser4_write_logs(). Both jnode_flush() ++ * and write_logs() add no value to cached data, there is no ++ * sense to mark pages as active when they go to disk, it just ++ * confuses vm scanning routines because clean page could be ++ * moved out from inactive list as a result of this ++ * mark_page_accessed() call. */ ++ mark_page_accessed(page); ++ ++ return 0; ++ ++ failed: ++ jrelse_tail(node); ++ return result; ++ ++} ++ ++/* start asynchronous reading for given jnode's page. */ ++int jstartio(jnode * node) ++{ ++ struct page *page; ++ ++ page = jnode_get_page_locked(node, reiser4_ctx_gfp_mask_get()); ++ if (IS_ERR(page)) ++ return PTR_ERR(page); ++ ++ return jnode_start_read(node, page); ++} ++ ++/* Initialize a node by calling appropriate plugin instead of reading ++ * node from disk as in jload(). */ ++int jinit_new(jnode * node, gfp_t gfp_flags) ++{ ++ struct page *page; ++ int result; ++ ++ jref(node); ++ add_d_ref(node); ++ ++ page = jnode_get_page_locked(node, gfp_flags); ++ if (IS_ERR(page)) { ++ result = PTR_ERR(page); ++ goto failed; ++ } ++ ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ node->data = kmap(page); ++ ++ if (!jnode_is_parsed(node)) { ++ jnode_plugin *jplug = jnode_ops(node); ++ spin_lock_jnode(node); ++ result = jplug->init(node); ++ spin_unlock_jnode(node); ++ if (result) { ++ kunmap(page); ++ goto failed; ++ } ++ JF_SET(node, JNODE_PARSED); ++ } ++ ++ return 0; ++ ++ failed: ++ jrelse(node); ++ return result; ++} ++ ++/* release a reference to jnode acquired by jload(), decrement ->d_count */ ++void jrelse_tail(jnode * node /* jnode to release references to */ ) ++{ ++ assert("nikita-489", atomic_read(&node->d_count) > 0); ++ atomic_dec(&node->d_count); ++ if (jnode_is_unformatted(node) || jnode_is_znode(node)) ++ LOCK_CNT_DEC(d_refs); ++ /* release reference acquired in jload_gfp() or jinit_new() */ ++ jput(node); ++} ++ ++/* drop reference to node data. When last reference is dropped, data are ++ unloaded. */ ++void jrelse(jnode * node /* jnode to release references to */ ) ++{ ++ struct page *page; ++ ++ assert("nikita-487", node != NULL); ++ assert_spin_not_locked(&(node->guard)); ++ ++ page = jnode_page(node); ++ if (likely(page != NULL)) { ++ /* ++ * it is safe not to lock jnode here, because at this point ++ * @node->d_count is greater than zero (if jrelse() is used ++ * correctly, that is). JNODE_PARSED may be not set yet, if, ++ * for example, we got here as a result of error handling path ++ * in jload(). Anyway, page cannot be detached by ++ * reiser4_releasepage(). truncate will invalidate page ++ * regardless, but this should not be a problem. ++ */ ++ kunmap(page); ++ } ++ jrelse_tail(node); ++} ++ ++/* called from jput() to wait for io completion */ ++static void jnode_finish_io(jnode * node) ++{ ++ struct page *page; ++ ++ assert("nikita-2922", node != NULL); ++ ++ spin_lock_jnode(node); ++ page = jnode_page(node); ++ if (page != NULL) { ++ page_cache_get(page); ++ spin_unlock_jnode(node); ++ wait_on_page_writeback(page); ++ page_cache_release(page); ++ } else ++ spin_unlock_jnode(node); ++} ++ ++/* ++ * This is called by jput() when last reference to jnode is released. This is ++ * separate function, because we want fast path of jput() to be inline and, ++ * therefore, small. ++ */ ++void jput_final(jnode * node) ++{ ++ int r_i_p; ++ ++ /* A fast check for keeping node in cache. We always keep node in cache ++ * if its page is present and node was not marked for deletion */ ++ if (jnode_page(node) != NULL && !JF_ISSET(node, JNODE_HEARD_BANSHEE)) { ++ rcu_read_unlock(); ++ return; ++ } ++ assert("edward-1432", node->page_count == 0); ++ ++ r_i_p = !JF_TEST_AND_SET(node, JNODE_RIP); ++ /* ++ * if r_i_p is true, we were first to set JNODE_RIP on this node. In ++ * this case it is safe to access node after unlock. ++ */ ++ rcu_read_unlock(); ++ if (r_i_p) { ++ jnode_finish_io(node); ++ if (JF_ISSET(node, JNODE_HEARD_BANSHEE)) ++ /* node is removed from the tree. */ ++ jdelete(node); ++ else ++ jnode_try_drop(node); ++ } ++ /* if !r_i_p some other thread is already killing it */ ++} ++ ++int jwait_io(jnode * node, int rw) ++{ ++ struct page *page; ++ int result; ++ ++ assert("zam-447", node != NULL); ++ assert("zam-448", jnode_page(node) != NULL); ++ ++ page = jnode_page(node); ++ ++ result = 0; ++ if (rw == READ) { ++ wait_on_page_locked(page); ++ } else { ++ assert("nikita-2227", rw == WRITE); ++ wait_on_page_writeback(page); ++ } ++ if (PageError(page)) ++ result = RETERR(-EIO); ++ ++ return result; ++} ++ ++/* ++ * jnode types and plugins. ++ * ++ * jnode by itself is a "base type". There are several different jnode ++ * flavors, called "jnode types" (see jnode_type for a list). Sometimes code ++ * has to do different things based on jnode type. In the standard reiser4 way ++ * this is done by having jnode plugin (see fs/reiser4/plugin.h:jnode_plugin). ++ * ++ * Functions below deal with jnode types and define methods of jnode plugin. ++ * ++ */ ++ ++/* set jnode type. This is done during jnode initialization. */ ++static void jnode_set_type(jnode * node, jnode_type type) ++{ ++ static unsigned long type_to_mask[] = { ++ [JNODE_UNFORMATTED_BLOCK] = 1, ++ [JNODE_FORMATTED_BLOCK] = 0, ++ [JNODE_BITMAP] = 2, ++ [JNODE_IO_HEAD] = 6, ++ [JNODE_INODE] = 4 ++ }; ++ ++ assert("zam-647", type < LAST_JNODE_TYPE); ++ assert("nikita-2815", !jnode_is_loaded(node)); ++ assert("nikita-3386", node->state == 0); ++ ++ node->state |= (type_to_mask[type] << JNODE_TYPE_1); ++} ++ ++/* ->init() method of jnode plugin for jnodes that don't require plugin ++ * specific initialization. */ ++static int init_noinit(jnode * node UNUSED_ARG) ++{ ++ return 0; ++} ++ ++/* ->parse() method of jnode plugin for jnodes that don't require plugin ++ * specific pasring. */ ++static int parse_noparse(jnode * node UNUSED_ARG) ++{ ++ return 0; ++} ++ ++/* ->mapping() method for unformatted jnode */ ++struct address_space *mapping_jnode(const jnode * node) ++{ ++ struct address_space *map; ++ ++ assert("nikita-2713", node != NULL); ++ ++ /* mapping is stored in jnode */ ++ ++ map = node->key.j.mapping; ++ assert("nikita-2714", map != NULL); ++ assert("nikita-2897", is_reiser4_inode(map->host)); ++ assert("nikita-2715", get_inode_oid(map->host) == node->key.j.objectid); ++ return map; ++} ++ ++/* ->index() method for unformatted jnodes */ ++unsigned long index_jnode(const jnode * node) ++{ ++ /* index is stored in jnode */ ++ return node->key.j.index; ++} ++ ++/* ->remove() method for unformatted jnodes */ ++static inline void remove_jnode(jnode * node, reiser4_tree * tree) ++{ ++ /* remove jnode from hash table and radix tree */ ++ if (node->key.j.mapping) ++ unhash_unformatted_node_nolock(node); ++} ++ ++/* ->mapping() method for znodes */ ++static struct address_space *mapping_znode(const jnode * node) ++{ ++ /* all znodes belong to fake inode */ ++ return reiser4_get_super_fake(jnode_get_tree(node)->super)->i_mapping; ++} ++ ++/* ->index() method for znodes */ ++static unsigned long index_znode(const jnode * node) ++{ ++ unsigned long addr; ++ assert("nikita-3317", (1 << znode_shift_order) < sizeof(znode)); ++ ++ /* index of znode is just its address (shifted) */ ++ addr = (unsigned long)node; ++ return (addr - PAGE_OFFSET) >> znode_shift_order; ++} ++ ++/* ->mapping() method for bitmap jnode */ ++static struct address_space *mapping_bitmap(const jnode * node) ++{ ++ /* all bitmap blocks belong to special bitmap inode */ ++ return get_super_private(jnode_get_tree(node)->super)->bitmap-> ++ i_mapping; ++} ++ ++/* ->index() method for jnodes that are indexed by address */ ++static unsigned long index_is_address(const jnode * node) ++{ ++ unsigned long ind; ++ ++ ind = (unsigned long)node; ++ return ind - PAGE_OFFSET; ++} ++ ++/* resolve race with jput */ ++jnode *jnode_rip_sync(reiser4_tree *tree, jnode *node) ++{ ++ /* ++ * This is used as part of RCU-based jnode handling. ++ * ++ * jlookup(), zlook(), zget(), and cbk_cache_scan_slots() have to work ++ * with unreferenced jnodes (ones with ->x_count == 0). Hash table is ++ * not protected during this, so concurrent thread may execute ++ * zget-set-HEARD_BANSHEE-zput, or somehow else cause jnode to be ++ * freed in jput_final(). To avoid such races, jput_final() sets ++ * JNODE_RIP on jnode (under tree lock). All places that work with ++ * unreferenced jnodes call this function. It checks for JNODE_RIP bit ++ * (first without taking tree lock), and if this bit is set, released ++ * reference acquired by the current thread and returns NULL. ++ * ++ * As a result, if jnode is being concurrently freed, NULL is returned ++ * and caller should pretend that jnode wasn't found in the first ++ * place. ++ * ++ * Otherwise it's safe to release "rcu-read-lock" and continue with ++ * jnode. ++ */ ++ if (unlikely(JF_ISSET(node, JNODE_RIP))) { ++ read_lock_tree(tree); ++ if (JF_ISSET(node, JNODE_RIP)) { ++ dec_x_ref(node); ++ node = NULL; ++ } ++ read_unlock_tree(tree); ++ } ++ return node; ++} ++ ++reiser4_key *jnode_build_key(const jnode * node, reiser4_key * key) ++{ ++ struct inode *inode; ++ item_plugin *iplug; ++ loff_t off; ++ ++ assert("nikita-3092", node != NULL); ++ assert("nikita-3093", key != NULL); ++ assert("nikita-3094", jnode_is_unformatted(node)); ++ ++ off = ((loff_t) index_jnode(node)) << PAGE_CACHE_SHIFT; ++ inode = mapping_jnode(node)->host; ++ ++ if (node->parent_item_id != 0) ++ iplug = item_plugin_by_id(node->parent_item_id); ++ else ++ iplug = NULL; ++ ++ if (iplug != NULL && iplug->f.key_by_offset) ++ iplug->f.key_by_offset(inode, off, key); ++ else { ++ file_plugin *fplug; ++ ++ fplug = inode_file_plugin(inode); ++ assert("zam-1007", fplug != NULL); ++ assert("zam-1008", fplug->key_by_inode != NULL); ++ ++ fplug->key_by_inode(inode, off, key); ++ } ++ ++ return key; ++} ++ ++/* ->parse() method for formatted nodes */ ++static int parse_znode(jnode * node) ++{ ++ return zparse(JZNODE(node)); ++} ++ ++/* ->delete() method for formatted nodes */ ++static void delete_znode(jnode * node, reiser4_tree * tree) ++{ ++ znode *z; ++ ++ assert_rw_write_locked(&(tree->tree_lock)); ++ assert("vs-898", JF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ ++ z = JZNODE(node); ++ assert("vs-899", z->c_count == 0); ++ ++ /* delete znode from sibling list. */ ++ sibling_list_remove(z); ++ ++ znode_remove(z, tree); ++} ++ ++/* ->remove() method for formatted nodes */ ++static int remove_znode(jnode * node, reiser4_tree * tree) ++{ ++ znode *z; ++ ++ assert_rw_write_locked(&(tree->tree_lock)); ++ z = JZNODE(node); ++ ++ if (z->c_count == 0) { ++ /* detach znode from sibling list. */ ++ sibling_list_drop(z); ++ /* this is called with tree spin-lock held, so call ++ znode_remove() directly (rather than znode_lock_remove()). */ ++ znode_remove(z, tree); ++ return 0; ++ } ++ return RETERR(-EBUSY); ++} ++ ++/* ->init() method for formatted nodes */ ++static int init_znode(jnode * node) ++{ ++ znode *z; ++ ++ z = JZNODE(node); ++ /* call node plugin to do actual initialization */ ++ return z->nplug->init(z); ++} ++ ++/* ->clone() method for formatted nodes */ ++static jnode *clone_formatted(jnode * node) ++{ ++ znode *clone; ++ ++ assert("vs-1430", jnode_is_znode(node)); ++ clone = zalloc(reiser4_ctx_gfp_mask_get()); ++ if (clone == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ zinit(clone, NULL, current_tree); ++ jnode_set_block(ZJNODE(clone), jnode_get_block(node)); ++ /* ZJNODE(clone)->key.z is not initialized */ ++ clone->level = JZNODE(node)->level; ++ ++ return ZJNODE(clone); ++} ++ ++/* jplug->clone for unformatted nodes */ ++static jnode *clone_unformatted(jnode * node) ++{ ++ jnode *clone; ++ ++ assert("vs-1431", jnode_is_unformatted(node)); ++ clone = jalloc(); ++ if (clone == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ ++ jnode_init(clone, current_tree, JNODE_UNFORMATTED_BLOCK); ++ jnode_set_block(clone, jnode_get_block(node)); ++ ++ return clone; ++ ++} ++ ++/* ++ * Setup jnode plugin methods for various jnode types. ++ */ ++jnode_plugin jnode_plugins[LAST_JNODE_TYPE] = { ++ [JNODE_UNFORMATTED_BLOCK] = { ++ .h = { ++ .type_id = REISER4_JNODE_PLUGIN_TYPE, ++ .id = JNODE_UNFORMATTED_BLOCK, ++ .pops = NULL, ++ .label = "unformatted", ++ .desc = "unformatted node", ++ .linkage = {NULL, NULL} ++ }, ++ .init = init_noinit, ++ .parse = parse_noparse, ++ .mapping = mapping_jnode, ++ .index = index_jnode, ++ .clone = clone_unformatted ++ }, ++ [JNODE_FORMATTED_BLOCK] = { ++ .h = { ++ .type_id = REISER4_JNODE_PLUGIN_TYPE, ++ .id = JNODE_FORMATTED_BLOCK, ++ .pops = NULL, ++ .label = "formatted", ++ .desc = "formatted tree node", ++ .linkage = {NULL, NULL} ++ }, ++ .init = init_znode, ++ .parse = parse_znode, ++ .mapping = mapping_znode, ++ .index = index_znode, ++ .clone = clone_formatted ++ }, ++ [JNODE_BITMAP] = { ++ .h = { ++ .type_id = REISER4_JNODE_PLUGIN_TYPE, ++ .id = JNODE_BITMAP, ++ .pops = NULL, ++ .label = "bitmap", ++ .desc = "bitmap node", ++ .linkage = {NULL, NULL} ++ }, ++ .init = init_noinit, ++ .parse = parse_noparse, ++ .mapping = mapping_bitmap, ++ .index = index_is_address, ++ .clone = NULL ++ }, ++ [JNODE_IO_HEAD] = { ++ .h = { ++ .type_id = REISER4_JNODE_PLUGIN_TYPE, ++ .id = JNODE_IO_HEAD, ++ .pops = NULL, ++ .label = "io head", ++ .desc = "io head", ++ .linkage = {NULL, NULL} ++ }, ++ .init = init_noinit, ++ .parse = parse_noparse, ++ .mapping = mapping_bitmap, ++ .index = index_is_address, ++ .clone = NULL ++ }, ++ [JNODE_INODE] = { ++ .h = { ++ .type_id = REISER4_JNODE_PLUGIN_TYPE, ++ .id = JNODE_INODE, ++ .pops = NULL, ++ .label = "inode", ++ .desc = "inode's builtin jnode", ++ .linkage = {NULL, NULL} ++ }, ++ .init = NULL, ++ .parse = NULL, ++ .mapping = NULL, ++ .index = NULL, ++ .clone = NULL ++ } ++}; ++ ++/* ++ * jnode destruction. ++ * ++ * Thread may use a jnode after it acquired a reference to it. References are ++ * counted in ->x_count field. Reference protects jnode from being ++ * recycled. This is different from protecting jnode data (that are stored in ++ * jnode page) from being evicted from memory. Data are protected by jload() ++ * and released by jrelse(). ++ * ++ * If thread already possesses a reference to the jnode it can acquire another ++ * one through jref(). Initial reference is obtained (usually) by locating ++ * jnode in some indexing structure that depends on jnode type: formatted ++ * nodes are kept in global hash table, where they are indexed by block ++ * number, and also in the cbk cache. Unformatted jnodes are also kept in hash ++ * table, which is indexed by oid and offset within file, and in per-inode ++ * radix tree. ++ * ++ * Reference to jnode is released by jput(). If last reference is released, ++ * jput_final() is called. This function determines whether jnode has to be ++ * deleted (this happens when corresponding node is removed from the file ++ * system, jnode is marked with JNODE_HEARD_BANSHEE bit in this case), or it ++ * should be just "removed" (deleted from memory). ++ * ++ * Jnode destruction is signally delicate dance because of locking and RCU. ++ */ ++ ++/* ++ * Returns true if jnode cannot be removed right now. This check is called ++ * under tree lock. If it returns true, jnode is irrevocably committed to be ++ * deleted/removed. ++ */ ++static inline int jnode_is_busy(const jnode * node, jnode_type jtype) ++{ ++ /* if other thread managed to acquire a reference to this jnode, don't ++ * free it. */ ++ if (atomic_read(&node->x_count) > 0) ++ return 1; ++ /* also, don't free znode that has children in memory */ ++ if (jtype == JNODE_FORMATTED_BLOCK && JZNODE(node)->c_count > 0) ++ return 1; ++ return 0; ++} ++ ++/* ++ * this is called as part of removing jnode. Based on jnode type, call ++ * corresponding function that removes jnode from indices and returns it back ++ * to the appropriate slab (through RCU). ++ */ ++static inline void ++jnode_remove(jnode * node, jnode_type jtype, reiser4_tree * tree) ++{ ++ switch (jtype) { ++ case JNODE_UNFORMATTED_BLOCK: ++ remove_jnode(node, tree); ++ break; ++ case JNODE_IO_HEAD: ++ case JNODE_BITMAP: ++ break; ++ case JNODE_INODE: ++ break; ++ case JNODE_FORMATTED_BLOCK: ++ remove_znode(node, tree); ++ break; ++ default: ++ wrong_return_value("nikita-3196", "Wrong jnode type"); ++ } ++} ++ ++/* ++ * this is called as part of deleting jnode. Based on jnode type, call ++ * corresponding function that removes jnode from indices and returns it back ++ * to the appropriate slab (through RCU). ++ * ++ * This differs from jnode_remove() only for formatted nodes---for them ++ * sibling list handling is different for removal and deletion. ++ */ ++static inline void ++jnode_delete(jnode * node, jnode_type jtype, reiser4_tree * tree UNUSED_ARG) ++{ ++ switch (jtype) { ++ case JNODE_UNFORMATTED_BLOCK: ++ remove_jnode(node, tree); ++ break; ++ case JNODE_IO_HEAD: ++ case JNODE_BITMAP: ++ break; ++ case JNODE_FORMATTED_BLOCK: ++ delete_znode(node, tree); ++ break; ++ case JNODE_INODE: ++ default: ++ wrong_return_value("nikita-3195", "Wrong jnode type"); ++ } ++} ++ ++#if REISER4_DEBUG ++/* ++ * remove jnode from the debugging list of all jnodes hanging off super-block. ++ */ ++void jnode_list_remove(jnode * node) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = get_super_private(jnode_get_tree(node)->super); ++ ++ spin_lock_irq(&sbinfo->all_guard); ++ assert("nikita-2422", !list_empty(&node->jnodes)); ++ list_del_init(&node->jnodes); ++ spin_unlock_irq(&sbinfo->all_guard); ++} ++#endif ++ ++/* ++ * this is called by jput_final() to remove jnode when last reference to it is ++ * released. ++ */ ++static int jnode_try_drop(jnode * node) ++{ ++ int result; ++ reiser4_tree *tree; ++ jnode_type jtype; ++ ++ assert("nikita-2491", node != NULL); ++ assert("nikita-2583", JF_ISSET(node, JNODE_RIP)); ++ ++ tree = jnode_get_tree(node); ++ jtype = jnode_get_type(node); ++ ++ spin_lock_jnode(node); ++ write_lock_tree(tree); ++ /* ++ * if jnode has a page---leave it alone. Memory pressure will ++ * eventually kill page and jnode. ++ */ ++ if (jnode_page(node) != NULL) { ++ write_unlock_tree(tree); ++ spin_unlock_jnode(node); ++ JF_CLR(node, JNODE_RIP); ++ return RETERR(-EBUSY); ++ } ++ ++ /* re-check ->x_count under tree lock. */ ++ result = jnode_is_busy(node, jtype); ++ if (result == 0) { ++ assert("nikita-2582", !JF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ assert("jmacd-511/b", atomic_read(&node->d_count) == 0); ++ ++ spin_unlock_jnode(node); ++ /* no page and no references---despatch him. */ ++ jnode_remove(node, jtype, tree); ++ write_unlock_tree(tree); ++ jnode_free(node, jtype); ++ } else { ++ /* busy check failed: reference was acquired by concurrent ++ * thread. */ ++ write_unlock_tree(tree); ++ spin_unlock_jnode(node); ++ JF_CLR(node, JNODE_RIP); ++ } ++ return result; ++} ++ ++/* jdelete() -- Delete jnode from the tree and file system */ ++static int jdelete(jnode * node /* jnode to finish with */ ) ++{ ++ struct page *page; ++ int result; ++ reiser4_tree *tree; ++ jnode_type jtype; ++ ++ assert("nikita-467", node != NULL); ++ assert("nikita-2531", JF_ISSET(node, JNODE_RIP)); ++ ++ jtype = jnode_get_type(node); ++ ++ page = jnode_lock_page(node); ++ assert_spin_locked(&(node->guard)); ++ ++ tree = jnode_get_tree(node); ++ ++ write_lock_tree(tree); ++ /* re-check ->x_count under tree lock. */ ++ result = jnode_is_busy(node, jtype); ++ if (likely(!result)) { ++ assert("nikita-2123", JF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ assert("jmacd-511", atomic_read(&node->d_count) == 0); ++ ++ /* detach page */ ++ if (page != NULL) { ++ /* ++ * FIXME this is racy against jnode_extent_write(). ++ */ ++ page_clear_jnode(page, node); ++ } ++ spin_unlock_jnode(node); ++ /* goodbye */ ++ jnode_delete(node, jtype, tree); ++ write_unlock_tree(tree); ++ jnode_free(node, jtype); ++ /* @node is no longer valid pointer */ ++ if (page != NULL) ++ reiser4_drop_page(page); ++ } else { ++ /* busy check failed: reference was acquired by concurrent ++ * thread. */ ++ JF_CLR(node, JNODE_RIP); ++ write_unlock_tree(tree); ++ spin_unlock_jnode(node); ++ if (page != NULL) ++ unlock_page(page); ++ } ++ return result; ++} ++ ++/* drop jnode on the floor. ++ ++ Return value: ++ ++ -EBUSY: failed to drop jnode, because there are still references to it ++ ++ 0: successfully dropped jnode ++ ++*/ ++static int jdrop_in_tree(jnode * node, reiser4_tree * tree) ++{ ++ struct page *page; ++ jnode_type jtype; ++ int result; ++ ++ assert("zam-602", node != NULL); ++ assert_rw_not_read_locked(&(tree->tree_lock)); ++ assert_rw_not_write_locked(&(tree->tree_lock)); ++ assert("nikita-2403", !JF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ ++ jtype = jnode_get_type(node); ++ ++ page = jnode_lock_page(node); ++ assert_spin_locked(&(node->guard)); ++ ++ write_lock_tree(tree); ++ ++ /* re-check ->x_count under tree lock. */ ++ result = jnode_is_busy(node, jtype); ++ if (!result) { ++ assert("nikita-2488", page == jnode_page(node)); ++ assert("nikita-2533", atomic_read(&node->d_count) == 0); ++ if (page != NULL) { ++ assert("nikita-2126", !PageDirty(page)); ++ assert("nikita-2127", PageUptodate(page)); ++ assert("nikita-2181", PageLocked(page)); ++ page_clear_jnode(page, node); ++ } ++ spin_unlock_jnode(node); ++ jnode_remove(node, jtype, tree); ++ write_unlock_tree(tree); ++ jnode_free(node, jtype); ++ if (page != NULL) { ++ reiser4_drop_page(page); ++ } ++ } else { ++ /* busy check failed: reference was acquired by concurrent ++ * thread. */ ++ JF_CLR(node, JNODE_RIP); ++ write_unlock_tree(tree); ++ spin_unlock_jnode(node); ++ if (page != NULL) ++ unlock_page(page); ++ } ++ return result; ++} ++ ++/* This function frees jnode "if possible". In particular, [dcx]_count has to ++ be 0 (where applicable). */ ++void jdrop(jnode * node) ++{ ++ jdrop_in_tree(node, jnode_get_tree(node)); ++} ++ ++/* IO head jnode implementation; The io heads are simple j-nodes with limited ++ functionality (these j-nodes are not in any hash table) just for reading ++ from and writing to disk. */ ++ ++jnode *reiser4_alloc_io_head(const reiser4_block_nr * block) ++{ ++ jnode *jal = jalloc(); ++ ++ if (jal != NULL) { ++ jnode_init(jal, current_tree, JNODE_IO_HEAD); ++ jnode_set_block(jal, block); ++ } ++ ++ jref(jal); ++ ++ return jal; ++} ++ ++void reiser4_drop_io_head(jnode * node) ++{ ++ assert("zam-648", jnode_get_type(node) == JNODE_IO_HEAD); ++ ++ jput(node); ++ jdrop(node); ++} ++ ++/* protect keep jnode data from reiser4_releasepage() */ ++void pin_jnode_data(jnode * node) ++{ ++ assert("zam-671", jnode_page(node) != NULL); ++ page_cache_get(jnode_page(node)); ++} ++ ++/* make jnode data free-able again */ ++void unpin_jnode_data(jnode * node) ++{ ++ assert("zam-672", jnode_page(node) != NULL); ++ page_cache_release(jnode_page(node)); ++} ++ ++struct address_space *jnode_get_mapping(const jnode * node) ++{ ++ assert("nikita-3162", node != NULL); ++ return jnode_ops(node)->mapping(node); ++} ++ ++#if REISER4_DEBUG ++/* debugging aid: jnode invariant */ ++int jnode_invariant_f(const jnode * node, char const **msg) ++{ ++#define _ergo(ant, con) \ ++ ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con))) ++#define _check(exp) ((*msg) = #exp, (exp)) ++ ++ return _check(node != NULL) && ++ /* [jnode-queued] */ ++ /* only relocated node can be queued, except that when znode ++ * is being deleted, its JNODE_RELOC bit is cleared */ ++ _ergo(JF_ISSET(node, JNODE_FLUSH_QUEUED), ++ JF_ISSET(node, JNODE_RELOC) || ++ JF_ISSET(node, JNODE_HEARD_BANSHEE)) && ++ _check(node->jnodes.prev != NULL) && ++ _check(node->jnodes.next != NULL) && ++ /* [jnode-dirty] invariant */ ++ /* dirty inode is part of atom */ ++ _ergo(JF_ISSET(node, JNODE_DIRTY), node->atom != NULL) && ++ /* [jnode-oid] invariant */ ++ /* for unformatted node ->objectid and ->mapping fields are ++ * consistent */ ++ _ergo(jnode_is_unformatted(node) && node->key.j.mapping != NULL, ++ node->key.j.objectid == ++ get_inode_oid(node->key.j.mapping->host)) && ++ /* [jnode-atom-valid] invariant */ ++ /* node atom has valid state */ ++ _ergo(node->atom != NULL, node->atom->stage != ASTAGE_INVALID) && ++ /* [jnode-page-binding] invariant */ ++ /* if node points to page, it points back to node */ ++ _ergo(node->pg != NULL, jprivate(node->pg) == node) && ++ /* [jnode-refs] invariant */ ++ /* only referenced jnode can be loaded */ ++ _check(atomic_read(&node->x_count) >= atomic_read(&node->d_count)); ++ ++} ++ ++static const char *jnode_type_name(jnode_type type) ++{ ++ switch (type) { ++ case JNODE_UNFORMATTED_BLOCK: ++ return "unformatted"; ++ case JNODE_FORMATTED_BLOCK: ++ return "formatted"; ++ case JNODE_BITMAP: ++ return "bitmap"; ++ case JNODE_IO_HEAD: ++ return "io head"; ++ case JNODE_INODE: ++ return "inode"; ++ case LAST_JNODE_TYPE: ++ return "last"; ++ default:{ ++ static char unknown[30]; ++ ++ sprintf(unknown, "unknown %i", type); ++ return unknown; ++ } ++ } ++} ++ ++#define jnode_state_name( node, flag ) \ ++ ( JF_ISSET( ( node ), ( flag ) ) ? ((#flag "|")+6) : "" ) ++ ++/* debugging aid: output human readable information about @node */ ++static void info_jnode(const char *prefix /* prefix to print */ , ++ const jnode * node /* node to print */ ) ++{ ++ assert("umka-068", prefix != NULL); ++ ++ if (node == NULL) { ++ printk("%s: null\n", prefix); ++ return; ++ } ++ ++ printk ++ ("%s: %p: state: %lx: [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s], level: %i," ++ " block: %s, d_count: %d, x_count: %d, " ++ "pg: %p, atom: %p, lock: %i:%i, type: %s, ", prefix, node, ++ node->state, ++ jnode_state_name(node, JNODE_PARSED), ++ jnode_state_name(node, JNODE_HEARD_BANSHEE), ++ jnode_state_name(node, JNODE_LEFT_CONNECTED), ++ jnode_state_name(node, JNODE_RIGHT_CONNECTED), ++ jnode_state_name(node, JNODE_ORPHAN), ++ jnode_state_name(node, JNODE_CREATED), ++ jnode_state_name(node, JNODE_RELOC), ++ jnode_state_name(node, JNODE_OVRWR), ++ jnode_state_name(node, JNODE_DIRTY), ++ jnode_state_name(node, JNODE_IS_DYING), ++ jnode_state_name(node, JNODE_RIP), ++ jnode_state_name(node, JNODE_MISSED_IN_CAPTURE), ++ jnode_state_name(node, JNODE_WRITEBACK), ++ jnode_state_name(node, JNODE_NEW), ++ jnode_state_name(node, JNODE_DKSET), ++ jnode_state_name(node, JNODE_REPACK), ++ jnode_state_name(node, JNODE_CLUSTER_PAGE), ++ jnode_get_level(node), sprint_address(jnode_get_block(node)), ++ atomic_read(&node->d_count), atomic_read(&node->x_count), ++ jnode_page(node), node->atom, 0, 0, ++ jnode_type_name(jnode_get_type(node))); ++ if (jnode_is_unformatted(node)) { ++ printk("inode: %llu, index: %lu, ", ++ node->key.j.objectid, node->key.j.index); ++ } ++} ++ ++/* debugging aid: check znode invariant and panic if it doesn't hold */ ++static int jnode_invariant(const jnode * node, int tlocked, int jlocked) ++{ ++ char const *failed_msg; ++ int result; ++ reiser4_tree *tree; ++ ++ tree = jnode_get_tree(node); ++ ++ assert("umka-063312", node != NULL); ++ assert("umka-064321", tree != NULL); ++ ++ if (!jlocked && !tlocked) ++ spin_lock_jnode((jnode *) node); ++ if (!tlocked) ++ read_lock_tree(jnode_get_tree(node)); ++ result = jnode_invariant_f(node, &failed_msg); ++ if (!result) { ++ info_jnode("corrupted node", node); ++ warning("jmacd-555", "Condition %s failed", failed_msg); ++ } ++ if (!tlocked) ++ read_unlock_tree(jnode_get_tree(node)); ++ if (!jlocked && !tlocked) ++ spin_unlock_jnode((jnode *) node); ++ return result; ++} ++ ++#endif /* REISER4_DEBUG */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ End: ++*/ +diff --git a/fs/reiser4/jnode.h b/fs/reiser4/jnode.h +new file mode 100644 +index 0000000..c05d88e +--- /dev/null ++++ b/fs/reiser4/jnode.h +@@ -0,0 +1,705 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Declaration of jnode. See jnode.c for details. */ ++ ++#ifndef __JNODE_H__ ++#define __JNODE_H__ ++ ++#include "forward.h" ++#include "type_safe_hash.h" ++#include "txnmgr.h" ++#include "key.h" ++#include "debug.h" ++#include "dformat.h" ++#include "page_cache.h" ++#include "context.h" ++ ++#include "plugin/plugin.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* declare hash table of jnodes (jnodes proper, that is, unformatted ++ nodes) */ ++TYPE_SAFE_HASH_DECLARE(j, jnode); ++ ++/* declare hash table of znodes */ ++TYPE_SAFE_HASH_DECLARE(z, znode); ++ ++typedef struct { ++ __u64 objectid; ++ unsigned long index; ++ struct address_space *mapping; ++} jnode_key_t; ++ ++/* ++ Jnode is the "base class" of other nodes in reiser4. It is also happens to ++ be exactly the node we use for unformatted tree nodes. ++ ++ Jnode provides following basic functionality: ++ ++ . reference counting and indexing. ++ ++ . integration with page cache. Jnode has ->pg reference to which page can ++ be attached. ++ ++ . interface to transaction manager. It is jnode that is kept in transaction ++ manager lists, attached to atoms, etc. (NOTE-NIKITA one may argue that this ++ means, there should be special type of jnode for inode.) ++ ++ Locking: ++ ++ Spin lock: the following fields are protected by the per-jnode spin lock: ++ ++ ->state ++ ->atom ++ ->capture_link ++ ++ Following fields are protected by the global tree lock: ++ ++ ->link ++ ->key.z (content of ->key.z is only changed in znode_rehash()) ++ ->key.j ++ ++ Atomic counters ++ ++ ->x_count ++ ->d_count ++ ++ ->pg, and ->data are protected by spin lock for unused jnode and are ++ immutable for used jnode (one for which fs/reiser4/vfs_ops.c:releasable() ++ is false). ++ ++ ->tree is immutable after creation ++ ++ Unclear ++ ++ ->blocknr: should be under jnode spin-lock, but current interface is based ++ on passing of block address. ++ ++ If you ever need to spin lock two nodes at once, do this in "natural" ++ memory order: lock znode with lower address first. (See lock_two_nodes().) ++ ++ Invariants involving this data-type: ++ ++ [jnode-dirty] ++ [jnode-refs] ++ [jnode-oid] ++ [jnode-queued] ++ [jnode-atom-valid] ++ [jnode-page-binding] ++*/ ++ ++struct jnode { ++#if REISER4_DEBUG ++#define JMAGIC 0x52654973 /* "ReIs" */ ++ int magic; ++#endif ++ /* FIRST CACHE LINE (16 bytes): data used by jload */ ++ ++ /* jnode's state: bitwise flags from the reiser4_jnode_state enum. */ ++ /* 0 */ unsigned long state; ++ ++ /* lock, protecting jnode's fields. */ ++ /* 4 */ spinlock_t load; ++ ++ /* counter of references to jnode itself. Increased on jref(). ++ Decreased on jput(). ++ */ ++ /* 8 */ atomic_t x_count; ++ ++ /* counter of references to jnode's data. Pin data page(s) in ++ memory while this is greater than 0. Increased on jload(). ++ Decreased on jrelse(). ++ */ ++ /* 12 */ atomic_t d_count; ++ ++ /* SECOND CACHE LINE: data used by hash table lookups */ ++ ++ /* 16 */ union { ++ /* znodes are hashed by block number */ ++ reiser4_block_nr z; ++ /* unformatted nodes are hashed by mapping plus offset */ ++ jnode_key_t j; ++ } key; ++ ++ /* THIRD CACHE LINE */ ++ ++ /* 32 */ union { ++ /* pointers to maintain hash-table */ ++ z_hash_link z; ++ j_hash_link j; ++ } link; ++ ++ /* pointer to jnode page. */ ++ /* 36 */ struct page *pg; ++ /* pointer to node itself. This is page_address(node->pg) when page is ++ attached to the jnode ++ */ ++ /* 40 */ void *data; ++ ++ /* 44 */ reiser4_tree *tree; ++ ++ /* FOURTH CACHE LINE: atom related fields */ ++ ++ /* 48 */ spinlock_t guard; ++ ++ /* atom the block is in, if any */ ++ /* 52 */ txn_atom *atom; ++ ++ /* capture list */ ++ /* 56 */ struct list_head capture_link; ++ ++ /* FIFTH CACHE LINE */ ++ ++ /* 64 */ struct rcu_head rcu; ++ /* crosses cache line */ ++ ++ /* SIXTH CACHE LINE */ ++ ++ /* the real blocknr (where io is going to/from) */ ++ /* 80 */ reiser4_block_nr blocknr; ++ /* Parent item type, unformatted and CRC need it for offset => key conversion. */ ++ /* NOTE: this parent_item_id looks like jnode type. */ ++ /* 88 */ reiser4_plugin_id parent_item_id; ++ /* 92 */ ++#if REISER4_DEBUG ++ /* number of pages referenced by the jnode (meaningful while capturing of ++ page clusters) */ ++ int page_count; ++ /* list of all jnodes for debugging purposes. */ ++ struct list_head jnodes; ++ /* how many times this jnode was written in one transaction */ ++ int written; ++ /* this indicates which atom's list the jnode is on */ ++ atom_list list; ++#endif ++} __attribute__ ((aligned(16))); ++ ++/* ++ * jnode types. Enumeration of existing jnode types. ++ */ ++typedef enum { ++ JNODE_UNFORMATTED_BLOCK, /* unformatted block */ ++ JNODE_FORMATTED_BLOCK, /* formatted block, znode */ ++ JNODE_BITMAP, /* bitmap */ ++ JNODE_IO_HEAD, /* jnode representing a block in the ++ * wandering log */ ++ JNODE_INODE, /* jnode embedded into inode */ ++ LAST_JNODE_TYPE ++} jnode_type; ++ ++/* jnode states */ ++typedef enum { ++ /* jnode's page is loaded and data checked */ ++ JNODE_PARSED = 0, ++ /* node was deleted, not all locks on it were released. This ++ node is empty and is going to be removed from the tree ++ shortly. */ ++ JNODE_HEARD_BANSHEE = 1, ++ /* left sibling pointer is valid */ ++ JNODE_LEFT_CONNECTED = 2, ++ /* right sibling pointer is valid */ ++ JNODE_RIGHT_CONNECTED = 3, ++ ++ /* znode was just created and doesn't yet have a pointer from ++ its parent */ ++ JNODE_ORPHAN = 4, ++ ++ /* this node was created by its transaction and has not been assigned ++ a block address. */ ++ JNODE_CREATED = 5, ++ ++ /* this node is currently relocated */ ++ JNODE_RELOC = 6, ++ /* this node is currently wandered */ ++ JNODE_OVRWR = 7, ++ ++ /* this znode has been modified */ ++ JNODE_DIRTY = 8, ++ ++ /* znode lock is being invalidated */ ++ JNODE_IS_DYING = 9, ++ ++ /* THIS PLACE IS INTENTIONALLY LEFT BLANK */ ++ ++ /* jnode is queued for flushing. */ ++ JNODE_FLUSH_QUEUED = 12, ++ ++ /* In the following bits jnode type is encoded. */ ++ JNODE_TYPE_1 = 13, ++ JNODE_TYPE_2 = 14, ++ JNODE_TYPE_3 = 15, ++ ++ /* jnode is being destroyed */ ++ JNODE_RIP = 16, ++ ++ /* znode was not captured during locking (it might so be because ++ ->level != LEAF_LEVEL and lock_mode == READ_LOCK) */ ++ JNODE_MISSED_IN_CAPTURE = 17, ++ ++ /* write is in progress */ ++ JNODE_WRITEBACK = 18, ++ ++ /* FIXME: now it is used by crypto-compress plugin only */ ++ JNODE_NEW = 19, ++ ++ /* delimiting keys are already set for this znode. */ ++ JNODE_DKSET = 20, ++ ++ /* when this bit is set page and jnode can not be disconnected */ ++ JNODE_WRITE_PREPARED = 21, ++ ++ JNODE_CLUSTER_PAGE = 22, ++ /* Jnode is marked for repacking, that means the reiser4 flush and the ++ * block allocator should process this node special way */ ++ JNODE_REPACK = 23, ++ /* node should be converted by flush in squalloc phase */ ++ JNODE_CONVERTIBLE = 24, ++ /* ++ * When jnode is dirtied for the first time in given transaction, ++ * do_jnode_make_dirty() checks whether this jnode can possible became ++ * member of overwrite set. If so, this bit is set, and one block is ++ * reserved in the ->flush_reserved space of atom. ++ * ++ * This block is "used" (and JNODE_FLUSH_RESERVED bit is cleared) when ++ * ++ * (1) flush decides that we want this block to go into relocate ++ * set after all. ++ * ++ * (2) wandering log is allocated (by log writer) ++ * ++ * (3) extent is allocated ++ * ++ */ ++ JNODE_FLUSH_RESERVED = 29 ++} reiser4_jnode_state; ++ ++/* Macros for accessing the jnode state. */ ++ ++static inline void JF_CLR(jnode * j, int f) ++{ ++ assert("unknown-1", j->magic == JMAGIC); ++ clear_bit(f, &j->state); ++} ++static inline int JF_ISSET(const jnode * j, int f) ++{ ++ assert("unknown-2", j->magic == JMAGIC); ++ return test_bit(f, &((jnode *) j)->state); ++} ++static inline void JF_SET(jnode * j, int f) ++{ ++ assert("unknown-3", j->magic == JMAGIC); ++ set_bit(f, &j->state); ++} ++ ++static inline int JF_TEST_AND_SET(jnode * j, int f) ++{ ++ assert("unknown-4", j->magic == JMAGIC); ++ return test_and_set_bit(f, &j->state); ++} ++ ++static inline void spin_lock_jnode(jnode *node) ++{ ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", (LOCK_CNT_NIL(rw_locked_tree) && ++ LOCK_CNT_NIL(spin_locked_txnh) && ++ LOCK_CNT_NIL(spin_locked_zlock) && ++ LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_LT(spin_locked_jnode, 2))); ++ ++ spin_lock(&(node->guard)); ++ ++ LOCK_CNT_INC(spin_locked_jnode); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline void spin_unlock_jnode(jnode *node) ++{ ++ assert_spin_locked(&(node->guard)); ++ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_jnode)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(spin_locked_jnode); ++ LOCK_CNT_DEC(spin_locked); ++ ++ spin_unlock(&(node->guard)); ++} ++ ++static inline int jnode_is_in_deleteset(const jnode * node) ++{ ++ return JF_ISSET(node, JNODE_RELOC); ++} ++ ++extern int init_jnodes(void); ++extern void done_jnodes(void); ++ ++/* Jnode routines */ ++extern jnode *jalloc(void); ++extern void jfree(jnode * node) NONNULL; ++extern jnode *jclone(jnode *); ++extern jnode *jlookup(reiser4_tree * tree, ++ oid_t objectid, unsigned long ind) NONNULL; ++extern jnode *jfind(struct address_space *, unsigned long index) NONNULL; ++extern jnode *jnode_by_page(struct page *pg) NONNULL; ++extern jnode *jnode_of_page(struct page *pg) NONNULL; ++void jnode_attach_page(jnode * node, struct page *pg); ++ ++void unhash_unformatted_jnode(jnode *); ++extern jnode *page_next_jnode(jnode * node) NONNULL; ++extern void jnode_init(jnode * node, reiser4_tree * tree, jnode_type) NONNULL; ++extern void jnode_make_dirty(jnode * node) NONNULL; ++extern void jnode_make_clean(jnode * node) NONNULL; ++extern void jnode_make_wander_nolock(jnode * node) NONNULL; ++extern void jnode_make_wander(jnode *) NONNULL; ++extern void znode_make_reloc(znode *, flush_queue_t *) NONNULL; ++extern void unformatted_make_reloc(jnode *, flush_queue_t *) NONNULL; ++extern struct address_space *jnode_get_mapping(const jnode * node) NONNULL; ++ ++/** ++ * jnode_get_block ++ * @node: jnode to query ++ * ++ */ ++static inline const reiser4_block_nr *jnode_get_block(const jnode *node) ++{ ++ assert("nikita-528", node != NULL); ++ ++ return &node->blocknr; ++} ++ ++/** ++ * jnode_set_block ++ * @node: jnode to update ++ * @blocknr: new block nr ++ */ ++static inline void jnode_set_block(jnode *node, const reiser4_block_nr *blocknr) ++{ ++ assert("nikita-2020", node != NULL); ++ assert("umka-055", blocknr != NULL); ++ node->blocknr = *blocknr; ++} ++ ++ ++/* block number for IO. Usually this is the same as jnode_get_block(), unless ++ * jnode was emergency flushed---then block number chosen by eflush is ++ * used. */ ++static inline const reiser4_block_nr *jnode_get_io_block(jnode * node) ++{ ++ assert("nikita-2768", node != NULL); ++ assert_spin_locked(&(node->guard)); ++ ++ return jnode_get_block(node); ++} ++ ++/* Jnode flush interface. */ ++extern reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t * pos); ++extern flush_queue_t *reiser4_pos_fq(flush_pos_t * pos); ++ ++/* FIXME-VS: these are used in plugin/item/extent.c */ ++ ++/* does extent_get_block have to be called */ ++#define jnode_mapped(node) JF_ISSET (node, JNODE_MAPPED) ++#define jnode_set_mapped(node) JF_SET (node, JNODE_MAPPED) ++ ++/* the node should be converted during flush squalloc phase */ ++#define jnode_convertible(node) JF_ISSET (node, JNODE_CONVERTIBLE) ++#define jnode_set_convertible(node) JF_SET (node, JNODE_CONVERTIBLE) ++ ++/* Macros to convert from jnode to znode, znode to jnode. These are macros ++ because C doesn't allow overloading of const prototypes. */ ++#define ZJNODE(x) (& (x) -> zjnode) ++#define JZNODE(x) \ ++({ \ ++ typeof (x) __tmp_x; \ ++ \ ++ __tmp_x = (x); \ ++ assert ("jmacd-1300", jnode_is_znode (__tmp_x)); \ ++ (znode*) __tmp_x; \ ++}) ++ ++extern int jnodes_tree_init(reiser4_tree * tree); ++extern int jnodes_tree_done(reiser4_tree * tree); ++ ++#if REISER4_DEBUG ++ ++extern int znode_is_any_locked(const znode * node); ++extern void jnode_list_remove(jnode * node); ++ ++#else ++ ++#define jnode_list_remove(node) noop ++ ++#endif ++ ++int znode_is_root(const znode * node) NONNULL; ++ ++/* bump reference counter on @node */ ++static inline void add_x_ref(jnode * node /* node to increase x_count of */ ) ++{ ++ assert("nikita-1911", node != NULL); ++ ++ atomic_inc(&node->x_count); ++ LOCK_CNT_INC(x_refs); ++} ++ ++static inline void dec_x_ref(jnode * node) ++{ ++ assert("nikita-3215", node != NULL); ++ assert("nikita-3216", atomic_read(&node->x_count) > 0); ++ ++ atomic_dec(&node->x_count); ++ assert("nikita-3217", LOCK_CNT_GTZ(x_refs)); ++ LOCK_CNT_DEC(x_refs); ++} ++ ++/* jref() - increase counter of references to jnode/znode (x_count) */ ++static inline jnode *jref(jnode * node) ++{ ++ assert("jmacd-508", (node != NULL) && !IS_ERR(node)); ++ add_x_ref(node); ++ return node; ++} ++ ++/* get the page of jnode */ ++static inline struct page *jnode_page(const jnode * node) ++{ ++ return node->pg; ++} ++ ++/* return pointer to jnode data */ ++static inline char *jdata(const jnode * node) ++{ ++ assert("nikita-1415", node != NULL); ++ assert("nikita-3198", jnode_page(node) != NULL); ++ return node->data; ++} ++ ++static inline int jnode_is_loaded(const jnode * node) ++{ ++ assert("zam-506", node != NULL); ++ return atomic_read(&node->d_count) > 0; ++} ++ ++extern void page_clear_jnode(struct page *page, jnode * node) NONNULL; ++ ++static inline void jnode_set_reloc(jnode * node) ++{ ++ assert("nikita-2431", node != NULL); ++ assert("nikita-2432", !JF_ISSET(node, JNODE_OVRWR)); ++ JF_SET(node, JNODE_RELOC); ++} ++ ++/* jload/jwrite/junload give a bread/bwrite/brelse functionality for jnodes */ ++ ++extern int jload_gfp(jnode *, gfp_t, int do_kmap) NONNULL; ++ ++static inline int jload(jnode *node) ++{ ++ return jload_gfp(node, reiser4_ctx_gfp_mask_get(), 1); ++} ++ ++extern int jinit_new(jnode *, gfp_t) NONNULL; ++extern int jstartio(jnode *) NONNULL; ++ ++extern void jdrop(jnode *) NONNULL; ++extern int jwait_io(jnode *, int rw) NONNULL; ++ ++void jload_prefetch(jnode *); ++ ++extern jnode *reiser4_alloc_io_head(const reiser4_block_nr * block) NONNULL; ++extern void reiser4_drop_io_head(jnode * node) NONNULL; ++ ++static inline reiser4_tree *jnode_get_tree(const jnode * node) ++{ ++ assert("nikita-2691", node != NULL); ++ return node->tree; ++} ++ ++extern void pin_jnode_data(jnode *); ++extern void unpin_jnode_data(jnode *); ++ ++static inline jnode_type jnode_get_type(const jnode * node) ++{ ++ static const unsigned long state_mask = ++ (1 << JNODE_TYPE_1) | (1 << JNODE_TYPE_2) | (1 << JNODE_TYPE_3); ++ ++ static jnode_type mask_to_type[] = { ++ /* JNODE_TYPE_3 : JNODE_TYPE_2 : JNODE_TYPE_1 */ ++ ++ /* 000 */ ++ [0] = JNODE_FORMATTED_BLOCK, ++ /* 001 */ ++ [1] = JNODE_UNFORMATTED_BLOCK, ++ /* 010 */ ++ [2] = JNODE_BITMAP, ++ /* 011 */ ++ [3] = LAST_JNODE_TYPE, /*invalid */ ++ /* 100 */ ++ [4] = JNODE_INODE, ++ /* 101 */ ++ [5] = LAST_JNODE_TYPE, ++ /* 110 */ ++ [6] = JNODE_IO_HEAD, ++ /* 111 */ ++ [7] = LAST_JNODE_TYPE, /* invalid */ ++ }; ++ ++ return mask_to_type[(node->state & state_mask) >> JNODE_TYPE_1]; ++} ++ ++/* returns true if node is a znode */ ++static inline int jnode_is_znode(const jnode * node) ++{ ++ return jnode_get_type(node) == JNODE_FORMATTED_BLOCK; ++} ++ ++static inline int jnode_is_flushprepped(jnode * node) ++{ ++ assert("jmacd-78212", node != NULL); ++ assert_spin_locked(&(node->guard)); ++ return !JF_ISSET(node, JNODE_DIRTY) || JF_ISSET(node, JNODE_RELOC) || ++ JF_ISSET(node, JNODE_OVRWR); ++} ++ ++/* Return true if @node has already been processed by the squeeze and allocate ++ process. This implies the block address has been finalized for the ++ duration of this atom (or it is clean and will remain in place). If this ++ returns true you may use the block number as a hint. */ ++static inline int jnode_check_flushprepped(jnode * node) ++{ ++ int result; ++ ++ /* It must be clean or relocated or wandered. New allocations are set to relocate. */ ++ spin_lock_jnode(node); ++ result = jnode_is_flushprepped(node); ++ spin_unlock_jnode(node); ++ return result; ++} ++ ++/* returns true if node is unformatted */ ++static inline int jnode_is_unformatted(const jnode * node) ++{ ++ assert("jmacd-0123", node != NULL); ++ return jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK; ++} ++ ++/* returns true if node represents a cluster cache page */ ++static inline int jnode_is_cluster_page(const jnode * node) ++{ ++ assert("edward-50", node != NULL); ++ return (JF_ISSET(node, JNODE_CLUSTER_PAGE)); ++} ++ ++/* returns true is node is builtin inode's jnode */ ++static inline int jnode_is_inode(const jnode * node) ++{ ++ assert("vs-1240", node != NULL); ++ return jnode_get_type(node) == JNODE_INODE; ++} ++ ++static inline jnode_plugin *jnode_ops_of(const jnode_type type) ++{ ++ assert("nikita-2367", type < LAST_JNODE_TYPE); ++ return jnode_plugin_by_id((reiser4_plugin_id) type); ++} ++ ++static inline jnode_plugin *jnode_ops(const jnode * node) ++{ ++ assert("nikita-2366", node != NULL); ++ ++ return jnode_ops_of(jnode_get_type(node)); ++} ++ ++/* Get the index of a block. */ ++static inline unsigned long jnode_get_index(jnode * node) ++{ ++ return jnode_ops(node)->index(node); ++} ++ ++/* return true if "node" is the root */ ++static inline int jnode_is_root(const jnode * node) ++{ ++ return jnode_is_znode(node) && znode_is_root(JZNODE(node)); ++} ++ ++extern struct address_space *mapping_jnode(const jnode * node); ++extern unsigned long index_jnode(const jnode * node); ++ ++static inline void jput(jnode * node); ++extern void jput_final(jnode * node); ++ ++/* bump data counter on @node */ ++static inline void add_d_ref(jnode * node /* node to increase d_count of */ ) ++{ ++ assert("nikita-1962", node != NULL); ++ ++ atomic_inc(&node->d_count); ++ if (jnode_is_unformatted(node) || jnode_is_znode(node)) ++ LOCK_CNT_INC(d_refs); ++} ++ ++/* jput() - decrement x_count reference counter on znode. ++ ++ Count may drop to 0, jnode stays in cache until memory pressure causes the ++ eviction of its page. The c_count variable also ensures that children are ++ pressured out of memory before the parent. The jnode remains hashed as ++ long as the VM allows its page to stay in memory. ++*/ ++static inline void jput(jnode * node) ++{ ++ assert("jmacd-509", node != NULL); ++ assert("jmacd-510", atomic_read(&node->x_count) > 0); ++ assert("zam-926", reiser4_schedulable()); ++ LOCK_CNT_DEC(x_refs); ++ ++ rcu_read_lock(); ++ /* ++ * we don't need any kind of lock here--jput_final() uses RCU. ++ */ ++ if (unlikely(atomic_dec_and_test(&node->x_count))) { ++ jput_final(node); ++ } else ++ rcu_read_unlock(); ++ assert("nikita-3473", reiser4_schedulable()); ++} ++ ++extern void jrelse(jnode * node); ++extern void jrelse_tail(jnode * node); ++ ++extern jnode *jnode_rip_sync(reiser4_tree * t, jnode * node); ++ ++/* resolve race with jput */ ++static inline jnode *jnode_rip_check(reiser4_tree * tree, jnode * node) ++{ ++ if (unlikely(JF_ISSET(node, JNODE_RIP))) ++ node = jnode_rip_sync(tree, node); ++ return node; ++} ++ ++extern reiser4_key *jnode_build_key(const jnode *node, reiser4_key * key); ++ ++#if REISER4_DEBUG ++extern int jnode_invariant_f(const jnode *node, char const **msg); ++#endif ++ ++extern jnode_plugin jnode_plugins[LAST_JNODE_TYPE]; ++ ++/* __JNODE_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/kassign.c b/fs/reiser4/kassign.c +new file mode 100644 +index 0000000..3c8f9f5 +--- /dev/null ++++ b/fs/reiser4/kassign.c +@@ -0,0 +1,661 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Key assignment policy implementation */ ++ ++/* ++ * In reiser4 every piece of file system data and meta-data has a key. Keys ++ * are used to store information in and retrieve it from reiser4 internal ++ * tree. In addition to this, keys define _ordering_ of all file system ++ * information: things having close keys are placed into the same or ++ * neighboring (in the tree order) nodes of the tree. As our block allocator ++ * tries to respect tree order (see flush.c), keys also define order in which ++ * things are laid out on the disk, and hence, affect performance directly. ++ * ++ * Obviously, assignment of keys to data and meta-data should be consistent ++ * across whole file system. Algorithm that calculates a key for a given piece ++ * of data or meta-data is referred to as "key assignment". ++ * ++ * Key assignment is too expensive to be implemented as a plugin (that is, ++ * with an ability to support different key assignment schemas in the same ++ * compiled kernel image). As a compromise, all key-assignment functions and ++ * data-structures are collected in this single file, so that modifications to ++ * key assignment algorithm can be localized. Additional changes may be ++ * required in key.[ch]. ++ * ++ * Current default reiser4 key assignment algorithm is dubbed "Plan A". As one ++ * may guess, there is "Plan B" too. ++ * ++ */ ++ ++/* ++ * Additional complication with key assignment implementation is a requirement ++ * to support different key length. ++ */ ++ ++/* ++ * KEY ASSIGNMENT: PLAN A, LONG KEYS. ++ * ++ * DIRECTORY ITEMS ++ * ++ * | 60 | 4 | 7 |1| 56 | 64 | 64 | ++ * +--------------+---+---+-+-------------+------------------+-----------------+ ++ * | dirid | 0 | F |H| prefix-1 | prefix-2 | prefix-3/hash | ++ * +--------------+---+---+-+-------------+------------------+-----------------+ ++ * | | | | | ++ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes | ++ * ++ * dirid objectid of directory this item is for ++ * ++ * F fibration, see fs/reiser4/plugin/fibration.[ch] ++ * ++ * H 1 if last 8 bytes of the key contain hash, ++ * 0 if last 8 bytes of the key contain prefix-3 ++ * ++ * prefix-1 first 7 characters of file name. ++ * Padded by zeroes if name is not long enough. ++ * ++ * prefix-2 next 8 characters of the file name. ++ * ++ * prefix-3 next 8 characters of the file name. ++ * ++ * hash hash of the rest of file name (i.e., portion of file ++ * name not included into prefix-1 and prefix-2). ++ * ++ * File names shorter than 23 (== 7 + 8 + 8) characters are completely encoded ++ * in the key. Such file names are called "short". They are distinguished by H ++ * bit set 0 in the key. ++ * ++ * Other file names are "long". For long name, H bit is 1, and first 15 (== 7 ++ * + 8) characters are encoded in prefix-1 and prefix-2 portions of the ++ * key. Last 8 bytes of the key are occupied by hash of the remaining ++ * characters of the name. ++ * ++ * This key assignment reaches following important goals: ++ * ++ * (1) directory entries are sorted in approximately lexicographical ++ * order. ++ * ++ * (2) collisions (when multiple directory items have the same key), while ++ * principally unavoidable in a tree with fixed length keys, are rare. ++ * ++ * STAT DATA ++ * ++ * | 60 | 4 | 64 | 4 | 60 | 64 | ++ * +--------------+---+-----------------+---+--------------+-----------------+ ++ * | locality id | 1 | ordering | 0 | objectid | 0 | ++ * +--------------+---+-----------------+---+--------------+-----------------+ ++ * | | | | | ++ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes | ++ * ++ * locality id object id of a directory where first name was created for ++ * the object ++ * ++ * ordering copy of second 8-byte portion of the key of directory ++ * entry for the first name of this object. Ordering has a form ++ * { ++ * fibration :7; ++ * h :1; ++ * prefix1 :56; ++ * } ++ * see description of key for directory entry above. ++ * ++ * objectid object id for this object ++ * ++ * This key assignment policy is designed to keep stat-data in the same order ++ * as corresponding directory items, thus speeding up readdir/stat types of ++ * workload. ++ * ++ * FILE BODY ++ * ++ * | 60 | 4 | 64 | 4 | 60 | 64 | ++ * +--------------+---+-----------------+---+--------------+-----------------+ ++ * | locality id | 4 | ordering | 0 | objectid | offset | ++ * +--------------+---+-----------------+---+--------------+-----------------+ ++ * | | | | | ++ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes | ++ * ++ * locality id object id of a directory where first name was created for ++ * the object ++ * ++ * ordering the same as in the key of stat-data for this object ++ * ++ * objectid object id for this object ++ * ++ * offset logical offset from the beginning of this file. ++ * Measured in bytes. ++ * ++ * ++ * KEY ASSIGNMENT: PLAN A, SHORT KEYS. ++ * ++ * DIRECTORY ITEMS ++ * ++ * | 60 | 4 | 7 |1| 56 | 64 | ++ * +--------------+---+---+-+-------------+-----------------+ ++ * | dirid | 0 | F |H| prefix-1 | prefix-2/hash | ++ * +--------------+---+---+-+-------------+-----------------+ ++ * | | | | ++ * | 8 bytes | 8 bytes | 8 bytes | ++ * ++ * dirid objectid of directory this item is for ++ * ++ * F fibration, see fs/reiser4/plugin/fibration.[ch] ++ * ++ * H 1 if last 8 bytes of the key contain hash, ++ * 0 if last 8 bytes of the key contain prefix-2 ++ * ++ * prefix-1 first 7 characters of file name. ++ * Padded by zeroes if name is not long enough. ++ * ++ * prefix-2 next 8 characters of the file name. ++ * ++ * hash hash of the rest of file name (i.e., portion of file ++ * name not included into prefix-1). ++ * ++ * File names shorter than 15 (== 7 + 8) characters are completely encoded in ++ * the key. Such file names are called "short". They are distinguished by H ++ * bit set in the key. ++ * ++ * Other file names are "long". For long name, H bit is 0, and first 7 ++ * characters are encoded in prefix-1 portion of the key. Last 8 bytes of the ++ * key are occupied by hash of the remaining characters of the name. ++ * ++ * STAT DATA ++ * ++ * | 60 | 4 | 4 | 60 | 64 | ++ * +--------------+---+---+--------------+-----------------+ ++ * | locality id | 1 | 0 | objectid | 0 | ++ * +--------------+---+---+--------------+-----------------+ ++ * | | | | ++ * | 8 bytes | 8 bytes | 8 bytes | ++ * ++ * locality id object id of a directory where first name was created for ++ * the object ++ * ++ * objectid object id for this object ++ * ++ * FILE BODY ++ * ++ * | 60 | 4 | 4 | 60 | 64 | ++ * +--------------+---+---+--------------+-----------------+ ++ * | locality id | 4 | 0 | objectid | offset | ++ * +--------------+---+---+--------------+-----------------+ ++ * | | | | ++ * | 8 bytes | 8 bytes | 8 bytes | ++ * ++ * locality id object id of a directory where first name was created for ++ * the object ++ * ++ * objectid object id for this object ++ * ++ * offset logical offset from the beginning of this file. ++ * Measured in bytes. ++ * ++ * ++ */ ++ ++#include "debug.h" ++#include "key.h" ++#include "kassign.h" ++#include "vfs_ops.h" ++#include "inode.h" ++#include "super.h" ++#include "dscale.h" ++ ++#include /* for __u?? */ ++#include /* for struct super_block, etc */ ++ ++/* bitmask for H bit (see comment at the beginning of this file */ ++static const __u64 longname_mark = 0x0100000000000000ull; ++/* bitmask for F and H portions of the key. */ ++static const __u64 fibration_mask = 0xff00000000000000ull; ++ ++/* return true if name is not completely encoded in @key */ ++int is_longname_key(const reiser4_key * key) ++{ ++ __u64 highpart; ++ ++ assert("nikita-2863", key != NULL); ++ if (get_key_type(key) != KEY_FILE_NAME_MINOR) ++ reiser4_print_key("oops", key); ++ assert("nikita-2864", get_key_type(key) == KEY_FILE_NAME_MINOR); ++ ++ if (REISER4_LARGE_KEY) ++ highpart = get_key_ordering(key); ++ else ++ highpart = get_key_objectid(key); ++ ++ return (highpart & longname_mark) ? 1 : 0; ++} ++ ++/* return true if @name is too long to be completely encoded in the key */ ++int is_longname(const char *name UNUSED_ARG, int len) ++{ ++ if (REISER4_LARGE_KEY) ++ return len > 23; ++ else ++ return len > 15; ++} ++ ++/* code ascii string into __u64. ++ ++ Put characters of @name into result (@str) one after another starting ++ from @start_idx-th highest (arithmetically) byte. This produces ++ endian-safe encoding. memcpy(2) will not do. ++ ++*/ ++static __u64 pack_string(const char *name /* string to encode */ , ++ int start_idx /* highest byte in result from ++ * which to start encoding */ ) ++{ ++ unsigned i; ++ __u64 str; ++ ++ str = 0; ++ for (i = 0; (i < sizeof str - start_idx) && name[i]; ++i) { ++ str <<= 8; ++ str |= (unsigned char)name[i]; ++ } ++ str <<= (sizeof str - i - start_idx) << 3; ++ return str; ++} ++ ++/* opposite to pack_string(). Takes value produced by pack_string(), restores ++ * string encoded in it and stores result in @buf */ ++char * reiser4_unpack_string(__u64 value, char *buf) ++{ ++ do { ++ *buf = value >> (64 - 8); ++ if (*buf) ++ ++buf; ++ value <<= 8; ++ } while (value != 0); ++ *buf = 0; ++ return buf; ++} ++ ++/* obtain name encoded in @key and store it in @buf */ ++char *extract_name_from_key(const reiser4_key * key, char *buf) ++{ ++ char *c; ++ ++ assert("nikita-2868", !is_longname_key(key)); ++ ++ c = buf; ++ if (REISER4_LARGE_KEY) { ++ c = reiser4_unpack_string(get_key_ordering(key) & ++ ~fibration_mask, c); ++ c = reiser4_unpack_string(get_key_fulloid(key), c); ++ } else ++ c = reiser4_unpack_string(get_key_fulloid(key) & ++ ~fibration_mask, c); ++ reiser4_unpack_string(get_key_offset(key), c); ++ return buf; ++} ++ ++/** ++ * complete_entry_key - calculate entry key by name ++ * @dir: directory where entry is (or will be) in ++ * @name: name to calculate key of ++ * @len: lenth of name ++ * @result: place to store result in ++ * ++ * Sets fields of entry key @result which depend on file name. ++ * When REISER4_LARGE_KEY is defined three fields of @result are set: ordering, ++ * objectid and offset. Otherwise, objectid and offset are set. ++ */ ++void complete_entry_key(const struct inode *dir, const char *name, ++ int len, reiser4_key *result) ++{ ++#if REISER4_LARGE_KEY ++ __u64 ordering; ++ __u64 objectid; ++ __u64 offset; ++ ++ assert("nikita-1139", dir != NULL); ++ assert("nikita-1142", result != NULL); ++ assert("nikita-2867", strlen(name) == len); ++ ++ /* ++ * key allocation algorithm for directory entries in case of large ++ * keys: ++ * ++ * If name is not longer than 7 + 8 + 8 = 23 characters, put first 7 ++ * characters into ordering field of key, next 8 charactes (if any) ++ * into objectid field of key and next 8 ones (of any) into offset ++ * field of key ++ * ++ * If file name is longer than 23 characters, put first 7 characters ++ * into key's ordering, next 8 to objectid and hash of remaining ++ * characters into offset field. ++ * ++ * To distinguish above cases, in latter set up unused high bit in ++ * ordering field. ++ */ ++ ++ /* [0-6] characters to ordering */ ++ ordering = pack_string(name, 1); ++ if (len > 7) { ++ /* [7-14] characters to objectid */ ++ objectid = pack_string(name + 7, 0); ++ if (len > 15) { ++ if (len <= 23) { ++ /* [15-23] characters to offset */ ++ offset = pack_string(name + 15, 0); ++ } else { ++ /* note in a key the fact that offset contains hash. */ ++ ordering |= longname_mark; ++ ++ /* offset is the hash of the file name's tail. */ ++ offset = inode_hash_plugin(dir)->hash(name + 15, ++ len - 15); ++ } ++ } else { ++ offset = 0ull; ++ } ++ } else { ++ objectid = 0ull; ++ offset = 0ull; ++ } ++ ++ assert("nikita-3480", inode_fibration_plugin(dir) != NULL); ++ ordering |= inode_fibration_plugin(dir)->fibre(dir, name, len); ++ ++ set_key_ordering(result, ordering); ++ set_key_fulloid(result, objectid); ++ set_key_offset(result, offset); ++ return; ++ ++#else ++ __u64 objectid; ++ __u64 offset; ++ ++ assert("nikita-1139", dir != NULL); ++ assert("nikita-1142", result != NULL); ++ assert("nikita-2867", strlen(name) == len); ++ ++ /* ++ * key allocation algorithm for directory entries in case of not large ++ * keys: ++ * ++ * If name is not longer than 7 + 8 = 15 characters, put first 7 ++ * characters into objectid field of key, next 8 charactes (if any) ++ * into offset field of key ++ * ++ * If file name is longer than 15 characters, put first 7 characters ++ * into key's objectid, and hash of remaining characters into offset ++ * field. ++ * ++ * To distinguish above cases, in latter set up unused high bit in ++ * objectid field. ++ */ ++ ++ /* [0-6] characters to objectid */ ++ objectid = pack_string(name, 1); ++ if (len > 7) { ++ if (len <= 15) { ++ /* [7-14] characters to offset */ ++ offset = pack_string(name + 7, 0); ++ } else { ++ /* note in a key the fact that offset contains hash. */ ++ objectid |= longname_mark; ++ ++ /* offset is the hash of the file name. */ ++ offset = inode_hash_plugin(dir)->hash(name + 7, ++ len - 7); ++ } ++ } else ++ offset = 0ull; ++ ++ assert("nikita-3480", inode_fibration_plugin(dir) != NULL); ++ objectid |= inode_fibration_plugin(dir)->fibre(dir, name, len); ++ ++ set_key_fulloid(result, objectid); ++ set_key_offset(result, offset); ++ return; ++#endif /* ! REISER4_LARGE_KEY */ ++} ++ ++/* true, if @key is the key of "." */ ++int is_dot_key(const reiser4_key * key /* key to check */ ) ++{ ++ assert("nikita-1717", key != NULL); ++ assert("nikita-1718", get_key_type(key) == KEY_FILE_NAME_MINOR); ++ return ++ (get_key_ordering(key) == 0ull) && ++ (get_key_objectid(key) == 0ull) && (get_key_offset(key) == 0ull); ++} ++ ++/* build key for stat-data. ++ ++ return key of stat-data of this object. This should became sd plugin ++ method in the future. For now, let it be here. ++ ++*/ ++reiser4_key *build_sd_key(const struct inode * target /* inode of an object */ , ++ reiser4_key * result /* resulting key of @target ++ stat-data */ ) ++{ ++ assert("nikita-261", result != NULL); ++ ++ reiser4_key_init(result); ++ set_key_locality(result, reiser4_inode_data(target)->locality_id); ++ set_key_ordering(result, get_inode_ordering(target)); ++ set_key_objectid(result, get_inode_oid(target)); ++ set_key_type(result, KEY_SD_MINOR); ++ set_key_offset(result, (__u64) 0); ++ return result; ++} ++ ++/* encode part of key into &obj_key_id ++ ++ This encodes into @id part of @key sufficient to restore @key later, ++ given that latter is key of object (key of stat-data). ++ ++ See &obj_key_id ++*/ ++int build_obj_key_id(const reiser4_key * key /* key to encode */ , ++ obj_key_id * id /* id where key is encoded in */ ) ++{ ++ assert("nikita-1151", key != NULL); ++ assert("nikita-1152", id != NULL); ++ ++ memcpy(id, key, sizeof *id); ++ return 0; ++} ++ ++/* encode reference to @obj in @id. ++ ++ This is like build_obj_key_id() above, but takes inode as parameter. */ ++int build_inode_key_id(const struct inode *obj /* object to build key of */ , ++ obj_key_id * id /* result */ ) ++{ ++ reiser4_key sdkey; ++ ++ assert("nikita-1166", obj != NULL); ++ assert("nikita-1167", id != NULL); ++ ++ build_sd_key(obj, &sdkey); ++ build_obj_key_id(&sdkey, id); ++ return 0; ++} ++ ++/* decode @id back into @key ++ ++ Restore key of object stat-data from @id. This is dual to ++ build_obj_key_id() above. ++*/ ++int extract_key_from_id(const obj_key_id * id /* object key id to extract key ++ * from */ , ++ reiser4_key * key /* result */ ) ++{ ++ assert("nikita-1153", id != NULL); ++ assert("nikita-1154", key != NULL); ++ ++ reiser4_key_init(key); ++ memcpy(key, id, sizeof *id); ++ return 0; ++} ++ ++/* extract objectid of directory from key of directory entry within said ++ directory. ++ */ ++oid_t extract_dir_id_from_key(const reiser4_key * de_key /* key of ++ * directory ++ * entry */ ) ++{ ++ assert("nikita-1314", de_key != NULL); ++ return get_key_locality(de_key); ++} ++ ++/* encode into @id key of directory entry. ++ ++ Encode into @id information sufficient to later distinguish directory ++ entries within the same directory. This is not whole key, because all ++ directory entries within directory item share locality which is equal ++ to objectid of their directory. ++ ++*/ ++int build_de_id(const struct inode *dir /* inode of directory */ , ++ const struct qstr *name /* name to be given to @obj by ++ * directory entry being ++ * constructed */ , ++ de_id * id /* short key of directory entry */ ) ++{ ++ reiser4_key key; ++ ++ assert("nikita-1290", dir != NULL); ++ assert("nikita-1292", id != NULL); ++ ++ /* NOTE-NIKITA this is suboptimal. */ ++ inode_dir_plugin(dir)->build_entry_key(dir, name, &key); ++ return build_de_id_by_key(&key, id); ++} ++ ++/* encode into @id key of directory entry. ++ ++ Encode into @id information sufficient to later distinguish directory ++ entries within the same directory. This is not whole key, because all ++ directory entries within directory item share locality which is equal ++ to objectid of their directory. ++ ++*/ ++int build_de_id_by_key(const reiser4_key * entry_key /* full key of directory ++ * entry */ , ++ de_id * id /* short key of directory entry */ ) ++{ ++ memcpy(id, ((__u64 *) entry_key) + 1, sizeof *id); ++ return 0; ++} ++ ++/* restore from @id key of directory entry. ++ ++ Function dual to build_de_id(): given @id and locality, build full ++ key of directory entry within directory item. ++ ++*/ ++int extract_key_from_de_id(const oid_t locality /* locality of directory ++ * entry */ , ++ const de_id * id /* directory entry id */ , ++ reiser4_key * key /* result */ ) ++{ ++ /* no need to initialise key here: all fields are overwritten */ ++ memcpy(((__u64 *) key) + 1, id, sizeof *id); ++ set_key_locality(key, locality); ++ set_key_type(key, KEY_FILE_NAME_MINOR); ++ return 0; ++} ++ ++/* compare two &de_id's */ ++cmp_t de_id_cmp(const de_id * id1 /* first &de_id to compare */ , ++ const de_id * id2 /* second &de_id to compare */ ) ++{ ++ /* NOTE-NIKITA ugly implementation */ ++ reiser4_key k1; ++ reiser4_key k2; ++ ++ extract_key_from_de_id((oid_t) 0, id1, &k1); ++ extract_key_from_de_id((oid_t) 0, id2, &k2); ++ return keycmp(&k1, &k2); ++} ++ ++/* compare &de_id with key */ ++cmp_t de_id_key_cmp(const de_id * id /* directory entry id to compare */ , ++ const reiser4_key * key /* key to compare */ ) ++{ ++ cmp_t result; ++ reiser4_key *k1; ++ ++ k1 = (reiser4_key *) (((unsigned long)id) - sizeof key->el[0]); ++ result = KEY_DIFF_EL(k1, key, 1); ++ if (result == EQUAL_TO) { ++ result = KEY_DIFF_EL(k1, key, 2); ++ if (REISER4_LARGE_KEY && result == EQUAL_TO) { ++ result = KEY_DIFF_EL(k1, key, 3); ++ } ++ } ++ return result; ++} ++ ++/* ++ * return number of bytes necessary to encode @inode identity. ++ */ ++int inode_onwire_size(const struct inode *inode) ++{ ++ int result; ++ ++ result = dscale_bytes(get_inode_oid(inode)); ++ result += dscale_bytes(get_inode_locality(inode)); ++ ++ /* ++ * ordering is large (it usually has highest bits set), so it makes ++ * little sense to dscale it. ++ */ ++ if (REISER4_LARGE_KEY) ++ result += sizeof(get_inode_ordering(inode)); ++ return result; ++} ++ ++/* ++ * encode @inode identity at @start ++ */ ++char *build_inode_onwire(const struct inode *inode, char *start) ++{ ++ start += dscale_write(start, get_inode_locality(inode)); ++ start += dscale_write(start, get_inode_oid(inode)); ++ ++ if (REISER4_LARGE_KEY) { ++ put_unaligned(cpu_to_le64(get_inode_ordering(inode)), (__le64 *)start); ++ start += sizeof(get_inode_ordering(inode)); ++ } ++ return start; ++} ++ ++/* ++ * extract key that was previously encoded by build_inode_onwire() at @addr ++ */ ++char *extract_obj_key_id_from_onwire(char *addr, obj_key_id * key_id) ++{ ++ __u64 val; ++ ++ addr += dscale_read(addr, &val); ++ val = (val << KEY_LOCALITY_SHIFT) | KEY_SD_MINOR; ++ put_unaligned(cpu_to_le64(val), (__le64 *)key_id->locality); ++ addr += dscale_read(addr, &val); ++ put_unaligned(cpu_to_le64(val), (__le64 *)key_id->objectid); ++#if REISER4_LARGE_KEY ++ memcpy(&key_id->ordering, addr, sizeof key_id->ordering); ++ addr += sizeof key_id->ordering; ++#endif ++ return addr; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/kassign.h b/fs/reiser4/kassign.h +new file mode 100644 +index 0000000..ee818d5 +--- /dev/null ++++ b/fs/reiser4/kassign.h +@@ -0,0 +1,110 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Key assignment policy interface. See kassign.c for details. */ ++ ++#if !defined( __KASSIGN_H__ ) ++#define __KASSIGN_H__ ++ ++#include "forward.h" ++#include "key.h" ++#include "dformat.h" ++ ++#include /* for __u?? */ ++#include /* for struct super_block, etc */ ++#include /* for struct qstr */ ++ ++/* key assignment functions */ ++ ++/* Information from which key of file stat-data can be uniquely ++ restored. This depends on key assignment policy for ++ stat-data. Currently it's enough to store object id and locality id ++ (60+60==120) bits, because minor packing locality and offset of ++ stat-data key are always known constants: KEY_SD_MINOR and 0 ++ respectively. For simplicity 4 bits are wasted in each id, and just ++ two 64 bit integers are stored. ++ ++ This field has to be byte-aligned, because we don't want to waste ++ space in directory entries. There is another side of a coin of ++ course: we waste CPU and bus bandwidth in stead, by copying data back ++ and forth. ++ ++ Next optimization: &obj_key_id is mainly used to address stat data from ++ directory entries. Under the assumption that majority of files only have ++ only name (one hard link) from *the* parent directory it seems reasonable ++ to only store objectid of stat data and take its locality from key of ++ directory item. ++ ++ This requires some flag to be added to the &obj_key_id to distinguish ++ between these two cases. Remaining bits in flag byte are then asking to be ++ used to store file type. ++ ++ This optimization requires changes in directory item handling code. ++ ++*/ ++typedef struct obj_key_id { ++ d8 locality[sizeof(__u64)]; ++ ON_LARGE_KEY(d8 ordering[sizeof(__u64)]; ++ ) ++ d8 objectid[sizeof(__u64)]; ++} ++obj_key_id; ++ ++/* Information sufficient to uniquely identify directory entry within ++ compressed directory item. ++ ++ For alignment issues see &obj_key_id above. ++*/ ++typedef struct de_id { ++ ON_LARGE_KEY(d8 ordering[sizeof(__u64)];) ++ d8 objectid[sizeof(__u64)]; ++ d8 offset[sizeof(__u64)]; ++} ++de_id; ++ ++extern int inode_onwire_size(const struct inode *obj); ++extern char *build_inode_onwire(const struct inode *obj, char *area); ++extern char *extract_obj_key_id_from_onwire(char *area, obj_key_id * key_id); ++ ++extern int build_inode_key_id(const struct inode *obj, obj_key_id * id); ++extern int extract_key_from_id(const obj_key_id * id, reiser4_key * key); ++extern int build_obj_key_id(const reiser4_key * key, obj_key_id * id); ++extern oid_t extract_dir_id_from_key(const reiser4_key * de_key); ++extern int build_de_id(const struct inode *dir, const struct qstr *name, ++ de_id * id); ++extern int build_de_id_by_key(const reiser4_key * entry_key, de_id * id); ++extern int extract_key_from_de_id(const oid_t locality, const de_id * id, ++ reiser4_key * key); ++extern cmp_t de_id_cmp(const de_id * id1, const de_id * id2); ++extern cmp_t de_id_key_cmp(const de_id * id, const reiser4_key * key); ++ ++extern int build_readdir_key_common(struct file *dir, reiser4_key * result); ++extern void build_entry_key_common(const struct inode *dir, ++ const struct qstr *name, ++ reiser4_key * result); ++extern void build_entry_key_stable_entry(const struct inode *dir, ++ const struct qstr *name, ++ reiser4_key * result); ++extern int is_dot_key(const reiser4_key * key); ++extern reiser4_key *build_sd_key(const struct inode *target, ++ reiser4_key * result); ++ ++extern int is_longname_key(const reiser4_key * key); ++extern int is_longname(const char *name, int len); ++extern char *extract_name_from_key(const reiser4_key * key, char *buf); ++extern char *reiser4_unpack_string(__u64 value, char *buf); ++extern void complete_entry_key(const struct inode *dir, const char *name, ++ int len, reiser4_key *result); ++ ++/* __KASSIGN_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/key.c b/fs/reiser4/key.c +new file mode 100644 +index 0000000..384c318 +--- /dev/null ++++ b/fs/reiser4/key.c +@@ -0,0 +1,137 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Key manipulations. */ ++ ++#include "debug.h" ++#include "key.h" ++#include "super.h" ++#include "reiser4.h" ++ ++#include /* for __u?? */ ++ ++/* Minimal possible key: all components are zero. It is presumed that this is ++ independent of key scheme. */ ++static const reiser4_key MINIMAL_KEY = { ++ .el = { ++ 0ull, ++ ON_LARGE_KEY(0ull,) ++ 0ull, ++ 0ull ++ } ++}; ++ ++/* Maximal possible key: all components are ~0. It is presumed that this is ++ independent of key scheme. */ ++static const reiser4_key MAXIMAL_KEY = { ++ .el = { ++ __constant_cpu_to_le64(~0ull), ++ ON_LARGE_KEY(__constant_cpu_to_le64(~0ull),) ++ __constant_cpu_to_le64(~0ull), ++ __constant_cpu_to_le64(~0ull) ++ } ++}; ++ ++/* Initialize key. */ ++void reiser4_key_init(reiser4_key * key /* key to init */ ) ++{ ++ assert("nikita-1169", key != NULL); ++ memset(key, 0, sizeof *key); ++} ++ ++/* minimal possible key in the tree. Return pointer to the static storage. */ ++const reiser4_key *reiser4_min_key(void) ++{ ++ return &MINIMAL_KEY; ++} ++ ++/* maximum possible key in the tree. Return pointer to the static storage. */ ++const reiser4_key *reiser4_max_key(void) ++{ ++ return &MAXIMAL_KEY; ++} ++ ++#if REISER4_DEBUG ++/* debugging aid: print symbolic name of key type */ ++static const char *type_name(unsigned int key_type /* key type */ ) ++{ ++ switch (key_type) { ++ case KEY_FILE_NAME_MINOR: ++ return "file name"; ++ case KEY_SD_MINOR: ++ return "stat data"; ++ case KEY_ATTR_NAME_MINOR: ++ return "attr name"; ++ case KEY_ATTR_BODY_MINOR: ++ return "attr body"; ++ case KEY_BODY_MINOR: ++ return "file body"; ++ default: ++ return "unknown"; ++ } ++} ++ ++/* debugging aid: print human readable information about key */ ++void reiser4_print_key(const char *prefix /* prefix to print */ , ++ const reiser4_key * key /* key to print */ ) ++{ ++ /* turn bold on */ ++ /* printf ("\033[1m"); */ ++ if (key == NULL) ++ printk("%s: null key\n", prefix); ++ else { ++ if (REISER4_LARGE_KEY) ++ printk("%s: (%Lx:%x:%Lx:%Lx:%Lx:%Lx)", prefix, ++ get_key_locality(key), ++ get_key_type(key), ++ get_key_ordering(key), ++ get_key_band(key), ++ get_key_objectid(key), get_key_offset(key)); ++ else ++ printk("%s: (%Lx:%x:%Lx:%Lx:%Lx)", prefix, ++ get_key_locality(key), ++ get_key_type(key), ++ get_key_band(key), ++ get_key_objectid(key), get_key_offset(key)); ++ /* ++ * if this is a key of directory entry, try to decode part of ++ * a name stored in the key, and output it. ++ */ ++ if (get_key_type(key) == KEY_FILE_NAME_MINOR) { ++ char buf[DE_NAME_BUF_LEN]; ++ char *c; ++ ++ c = buf; ++ c = reiser4_unpack_string(get_key_ordering(key), c); ++ reiser4_unpack_string(get_key_fulloid(key), c); ++ printk("[%s", buf); ++ if (is_longname_key(key)) ++ /* ++ * only part of the name is stored in the key. ++ */ ++ printk("...]\n"); ++ else { ++ /* ++ * whole name is stored in the key. ++ */ ++ reiser4_unpack_string(get_key_offset(key), buf); ++ printk("%s]\n", buf); ++ } ++ } else { ++ printk("[%s]\n", type_name(get_key_type(key))); ++ } ++ } ++ /* turn bold off */ ++ /* printf ("\033[m\017"); */ ++} ++ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/key.h b/fs/reiser4/key.h +new file mode 100644 +index 0000000..3f6b47e +--- /dev/null ++++ b/fs/reiser4/key.h +@@ -0,0 +1,384 @@ ++/* Copyright 2000, 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Declarations of key-related data-structures and operations on keys. */ ++ ++#if !defined( __REISER4_KEY_H__ ) ++#define __REISER4_KEY_H__ ++ ++#include "dformat.h" ++#include "forward.h" ++#include "debug.h" ++ ++#include /* for __u?? */ ++ ++/* Operations on keys in reiser4 tree */ ++ ++/* No access to any of these fields shall be done except via a ++ wrapping macro/function, and that wrapping macro/function shall ++ convert to little endian order. Compare keys will consider cpu byte order. */ ++ ++/* A storage layer implementation difference between a regular unix file body and its attributes is in the typedef below ++ which causes all of the attributes of a file to be near in key to all of the other attributes for all of the files ++ within that directory, and not near to the file itself. It is interesting to consider whether this is the wrong ++ approach, and whether there should be no difference at all. For current usage patterns this choice is probably the ++ right one. */ ++ ++/* possible values for minor packing locality (4 bits required) */ ++typedef enum { ++ /* file name */ ++ KEY_FILE_NAME_MINOR = 0, ++ /* stat-data */ ++ KEY_SD_MINOR = 1, ++ /* file attribute name */ ++ KEY_ATTR_NAME_MINOR = 2, ++ /* file attribute value */ ++ KEY_ATTR_BODY_MINOR = 3, ++ /* file body (tail or extent) */ ++ KEY_BODY_MINOR = 4, ++} key_minor_locality; ++ ++/* everything stored in the tree has a unique key, which means that the tree is (logically) fully ordered by key. ++ Physical order is determined by dynamic heuristics that attempt to reflect key order when allocating available space, ++ and by the repacker. It is stylistically better to put aggregation information into the key. Thus, if you want to ++ segregate extents from tails, it is better to give them distinct minor packing localities rather than changing ++ block_alloc.c to check the node type when deciding where to allocate the node. ++ ++ The need to randomly displace new directories and large files disturbs this symmetry unfortunately. However, it ++ should be noted that this is a need that is not clearly established given the existence of a repacker. Also, in our ++ current implementation tails have a different minor packing locality from extents, and no files have both extents and ++ tails, so maybe symmetry can be had without performance cost after all. Symmetry is what we ship for now.... ++*/ ++ ++/* Arbitrary major packing localities can be assigned to objects using ++ the reiser4(filenameA/..packing<=some_number) system call. ++ ++ In reiser4, the creat() syscall creates a directory ++ ++ whose default flow (that which is referred to if the directory is ++ read as a file) is the traditional unix file body. ++ ++ whose directory plugin is the 'filedir' ++ ++ whose major packing locality is that of the parent of the object created. ++ ++ The static_stat item is a particular commonly used directory ++ compression (the one for normal unix files). ++ ++ The filedir plugin checks to see if the static_stat item exists. ++ There is a unique key for static_stat. If yes, then it uses the ++ static_stat item for all of the values that it contains. The ++ static_stat item contains a flag for each stat it contains which ++ indicates whether one should look outside the static_stat item for its ++ contents. ++*/ ++ ++/* offset of fields in reiser4_key. Value of each element of this enum ++ is index within key (thought as array of __u64's) where this field ++ is. */ ++typedef enum { ++ /* major "locale", aka dirid. Sits in 1st element */ ++ KEY_LOCALITY_INDEX = 0, ++ /* minor "locale", aka item type. Sits in 1st element */ ++ KEY_TYPE_INDEX = 0, ++ ON_LARGE_KEY(KEY_ORDERING_INDEX,) ++ /* "object band". Sits in 2nd element */ ++ KEY_BAND_INDEX, ++ /* objectid. Sits in 2nd element */ ++ KEY_OBJECTID_INDEX = KEY_BAND_INDEX, ++ /* full objectid. Sits in 2nd element */ ++ KEY_FULLOID_INDEX = KEY_BAND_INDEX, ++ /* Offset. Sits in 3rd element */ ++ KEY_OFFSET_INDEX, ++ /* Name hash. Sits in 3rd element */ ++ KEY_HASH_INDEX = KEY_OFFSET_INDEX, ++ KEY_CACHELINE_END = KEY_OFFSET_INDEX, ++ KEY_LAST_INDEX ++} reiser4_key_field_index; ++ ++/* key in reiser4 internal "balanced" tree. It is just array of three ++ 64bit integers in disk byte order (little-endian by default). This ++ array is actually indexed by reiser4_key_field. Each __u64 within ++ this array is called "element". Logical key component encoded within ++ elements are called "fields". ++ ++ We declare this as union with second component dummy to suppress ++ inconvenient array<->pointer casts implied in C. */ ++union reiser4_key { ++ __le64 el[KEY_LAST_INDEX]; ++ int pad; ++}; ++ ++/* bitmasks showing where within reiser4_key particular key is stored. */ ++/* major locality occupies higher 60 bits of the first element */ ++#define KEY_LOCALITY_MASK 0xfffffffffffffff0ull ++ ++/* minor locality occupies lower 4 bits of the first element */ ++#define KEY_TYPE_MASK 0xfull ++ ++/* controversial band occupies higher 4 bits of the 2nd element */ ++#define KEY_BAND_MASK 0xf000000000000000ull ++ ++/* objectid occupies lower 60 bits of the 2nd element */ ++#define KEY_OBJECTID_MASK 0x0fffffffffffffffull ++ ++/* full 64bit objectid*/ ++#define KEY_FULLOID_MASK 0xffffffffffffffffull ++ ++/* offset is just 3rd L.M.Nt itself */ ++#define KEY_OFFSET_MASK 0xffffffffffffffffull ++ ++/* ordering is whole second element */ ++#define KEY_ORDERING_MASK 0xffffffffffffffffull ++ ++/* how many bits key element should be shifted to left to get particular field */ ++typedef enum { ++ KEY_LOCALITY_SHIFT = 4, ++ KEY_TYPE_SHIFT = 0, ++ KEY_BAND_SHIFT = 60, ++ KEY_OBJECTID_SHIFT = 0, ++ KEY_FULLOID_SHIFT = 0, ++ KEY_OFFSET_SHIFT = 0, ++ KEY_ORDERING_SHIFT = 0, ++} reiser4_key_field_shift; ++ ++static inline __u64 ++get_key_el(const reiser4_key * key, reiser4_key_field_index off) ++{ ++ assert("nikita-753", key != NULL); ++ assert("nikita-754", off < KEY_LAST_INDEX); ++ return le64_to_cpu(get_unaligned(&key->el[off])); ++} ++ ++static inline void ++set_key_el(reiser4_key * key, reiser4_key_field_index off, __u64 value) ++{ ++ assert("nikita-755", key != NULL); ++ assert("nikita-756", off < KEY_LAST_INDEX); ++ put_unaligned(cpu_to_le64(value), &key->el[off]); ++} ++ ++/* macro to define getter and setter functions for field F with type T */ ++#define DEFINE_KEY_FIELD( L, U, T ) \ ++static inline T get_key_ ## L ( const reiser4_key *key ) \ ++{ \ ++ assert( "nikita-750", key != NULL ); \ ++ return ( T ) ( get_key_el( key, KEY_ ## U ## _INDEX ) & \ ++ KEY_ ## U ## _MASK ) >> KEY_ ## U ## _SHIFT; \ ++} \ ++ \ ++static inline void set_key_ ## L ( reiser4_key *key, T loc ) \ ++{ \ ++ __u64 el; \ ++ \ ++ assert( "nikita-752", key != NULL ); \ ++ \ ++ el = get_key_el( key, KEY_ ## U ## _INDEX ); \ ++ /* clear field bits in the key */ \ ++ el &= ~KEY_ ## U ## _MASK; \ ++ /* actually it should be \ ++ \ ++ el |= ( loc << KEY_ ## U ## _SHIFT ) & KEY_ ## U ## _MASK; \ ++ \ ++ but we trust user to never pass values that wouldn't fit \ ++ into field. Clearing extra bits is one operation, but this \ ++ function is time-critical. \ ++ But check this in assertion. */ \ ++ assert( "nikita-759", ( ( loc << KEY_ ## U ## _SHIFT ) & \ ++ ~KEY_ ## U ## _MASK ) == 0 ); \ ++ el |= ( loc << KEY_ ## U ## _SHIFT ); \ ++ set_key_el( key, KEY_ ## U ## _INDEX, el ); \ ++} ++ ++typedef __u64 oid_t; ++ ++/* define get_key_locality(), set_key_locality() */ ++DEFINE_KEY_FIELD(locality, LOCALITY, oid_t); ++/* define get_key_type(), set_key_type() */ ++DEFINE_KEY_FIELD(type, TYPE, key_minor_locality); ++/* define get_key_band(), set_key_band() */ ++DEFINE_KEY_FIELD(band, BAND, __u64); ++/* define get_key_objectid(), set_key_objectid() */ ++DEFINE_KEY_FIELD(objectid, OBJECTID, oid_t); ++/* define get_key_fulloid(), set_key_fulloid() */ ++DEFINE_KEY_FIELD(fulloid, FULLOID, oid_t); ++/* define get_key_offset(), set_key_offset() */ ++DEFINE_KEY_FIELD(offset, OFFSET, __u64); ++#if (REISER4_LARGE_KEY) ++/* define get_key_ordering(), set_key_ordering() */ ++DEFINE_KEY_FIELD(ordering, ORDERING, __u64); ++#else ++static inline __u64 get_key_ordering(const reiser4_key * key) ++{ ++ return 0; ++} ++ ++static inline void set_key_ordering(reiser4_key * key, __u64 val) ++{ ++} ++#endif ++ ++/* key comparison result */ ++typedef enum { LESS_THAN = -1, /* if first key is less than second */ ++ EQUAL_TO = 0, /* if keys are equal */ ++ GREATER_THAN = +1 /* if first key is greater than second */ ++} cmp_t; ++ ++void reiser4_key_init(reiser4_key * key); ++ ++/* minimal possible key in the tree. Return pointer to the static storage. */ ++extern const reiser4_key *reiser4_min_key(void); ++extern const reiser4_key *reiser4_max_key(void); ++ ++/* helper macro for keycmp() */ ++#define KEY_DIFF(k1, k2, field) \ ++({ \ ++ typeof (get_key_ ## field (k1)) f1; \ ++ typeof (get_key_ ## field (k2)) f2; \ ++ \ ++ f1 = get_key_ ## field (k1); \ ++ f2 = get_key_ ## field (k2); \ ++ \ ++ (f1 < f2) ? LESS_THAN : ((f1 == f2) ? EQUAL_TO : GREATER_THAN); \ ++}) ++ ++/* helper macro for keycmp() */ ++#define KEY_DIFF_EL(k1, k2, off) \ ++({ \ ++ __u64 e1; \ ++ __u64 e2; \ ++ \ ++ e1 = get_key_el(k1, off); \ ++ e2 = get_key_el(k2, off); \ ++ \ ++ (e1 < e2) ? LESS_THAN : ((e1 == e2) ? EQUAL_TO : GREATER_THAN); \ ++}) ++ ++/* compare `k1' and `k2'. This function is a heart of "key allocation ++ policy". All you need to implement new policy is to add yet another ++ clause here. */ ++static inline cmp_t keycmp(const reiser4_key * k1 /* first key to compare */ , ++ const reiser4_key * k2 /* second key to compare */ ) ++{ ++ cmp_t result; ++ ++ /* ++ * This function is the heart of reiser4 tree-routines. Key comparison ++ * is among most heavily used operations in the file system. ++ */ ++ ++ assert("nikita-439", k1 != NULL); ++ assert("nikita-440", k2 != NULL); ++ ++ /* there is no actual branch here: condition is compile time constant ++ * and constant folding and propagation ensures that only one branch ++ * is actually compiled in. */ ++ ++ if (REISER4_PLANA_KEY_ALLOCATION) { ++ /* if physical order of fields in a key is identical ++ with logical order, we can implement key comparison ++ as three 64bit comparisons. */ ++ /* logical order of fields in plan-a: ++ locality->type->objectid->offset. */ ++ /* compare locality and type at once */ ++ result = KEY_DIFF_EL(k1, k2, 0); ++ if (result == EQUAL_TO) { ++ /* compare objectid (and band if it's there) */ ++ result = KEY_DIFF_EL(k1, k2, 1); ++ /* compare offset */ ++ if (result == EQUAL_TO) { ++ result = KEY_DIFF_EL(k1, k2, 2); ++ if (REISER4_LARGE_KEY && result == EQUAL_TO) { ++ result = KEY_DIFF_EL(k1, k2, 3); ++ } ++ } ++ } ++ } else if (REISER4_3_5_KEY_ALLOCATION) { ++ result = KEY_DIFF(k1, k2, locality); ++ if (result == EQUAL_TO) { ++ result = KEY_DIFF(k1, k2, objectid); ++ if (result == EQUAL_TO) { ++ result = KEY_DIFF(k1, k2, type); ++ if (result == EQUAL_TO) ++ result = KEY_DIFF(k1, k2, offset); ++ } ++ } ++ } else ++ impossible("nikita-441", "Unknown key allocation scheme!"); ++ return result; ++} ++ ++/* true if @k1 equals @k2 */ ++static inline int keyeq(const reiser4_key * k1 /* first key to compare */ , ++ const reiser4_key * k2 /* second key to compare */ ) ++{ ++ assert("nikita-1879", k1 != NULL); ++ assert("nikita-1880", k2 != NULL); ++ return !memcmp(k1, k2, sizeof *k1); ++} ++ ++/* true if @k1 is less than @k2 */ ++static inline int keylt(const reiser4_key * k1 /* first key to compare */ , ++ const reiser4_key * k2 /* second key to compare */ ) ++{ ++ assert("nikita-1952", k1 != NULL); ++ assert("nikita-1953", k2 != NULL); ++ return keycmp(k1, k2) == LESS_THAN; ++} ++ ++/* true if @k1 is less than or equal to @k2 */ ++static inline int keyle(const reiser4_key * k1 /* first key to compare */ , ++ const reiser4_key * k2 /* second key to compare */ ) ++{ ++ assert("nikita-1954", k1 != NULL); ++ assert("nikita-1955", k2 != NULL); ++ return keycmp(k1, k2) != GREATER_THAN; ++} ++ ++/* true if @k1 is greater than @k2 */ ++static inline int keygt(const reiser4_key * k1 /* first key to compare */ , ++ const reiser4_key * k2 /* second key to compare */ ) ++{ ++ assert("nikita-1959", k1 != NULL); ++ assert("nikita-1960", k2 != NULL); ++ return keycmp(k1, k2) == GREATER_THAN; ++} ++ ++/* true if @k1 is greater than or equal to @k2 */ ++static inline int keyge(const reiser4_key * k1 /* first key to compare */ , ++ const reiser4_key * k2 /* second key to compare */ ) ++{ ++ assert("nikita-1956", k1 != NULL); ++ assert("nikita-1957", k2 != NULL); /* October 4: sputnik launched ++ * November 3: Laika */ ++ return keycmp(k1, k2) != LESS_THAN; ++} ++ ++static inline void prefetchkey(reiser4_key * key) ++{ ++ prefetch(key); ++ prefetch(&key->el[KEY_CACHELINE_END]); ++} ++ ++/* (%Lx:%x:%Lx:%Lx:%Lx:%Lx) = ++ 1 + 16 + 1 + 1 + 1 + 1 + 1 + 16 + 1 + 16 + 1 + 16 + 1 */ ++/* size of a buffer suitable to hold human readable key representation */ ++#define KEY_BUF_LEN (80) ++ ++#if REISER4_DEBUG ++extern void reiser4_print_key(const char *prefix, const reiser4_key * key); ++#else ++#define reiser4_print_key(p,k) noop ++#endif ++ ++/* __FS_REISERFS_KEY_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/ktxnmgrd.c b/fs/reiser4/ktxnmgrd.c +new file mode 100644 +index 0000000..15bb6d6 +--- /dev/null ++++ b/fs/reiser4/ktxnmgrd.c +@@ -0,0 +1,215 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++/* Transaction manager daemon. */ ++ ++/* ++ * ktxnmgrd is a kernel daemon responsible for committing transactions. It is ++ * needed/important for the following reasons: ++ * ++ * 1. in reiser4 atom is not committed immediately when last transaction ++ * handle closes, unless atom is either too old or too large (see ++ * atom_should_commit()). This is done to avoid committing too frequently. ++ * because: ++ * ++ * 2. sometimes we don't want to commit atom when closing last transaction ++ * handle even if it is old and fat enough. For example, because we are at ++ * this point under directory semaphore, and committing would stall all ++ * accesses to this directory. ++ * ++ * ktxnmgrd binds its time sleeping on condition variable. When is awakes ++ * either due to (tunable) timeout or because it was explicitly woken up by ++ * call to ktxnmgrd_kick(), it scans list of all atoms and commits ones ++ * eligible. ++ * ++ */ ++ ++#include "debug.h" ++#include "txnmgr.h" ++#include "tree.h" ++#include "ktxnmgrd.h" ++#include "super.h" ++#include "reiser4.h" ++ ++#include /* for struct task_struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int scan_mgr(struct super_block *); ++ ++/* ++ * change current->comm so that ps, top, and friends will see changed ++ * state. This serves no useful purpose whatsoever, but also costs nothing. May ++ * be it will make lonely system administrator feeling less alone at 3 A.M. ++ */ ++#define set_comm( state ) \ ++ snprintf( current -> comm, sizeof( current -> comm ), \ ++ "%s:%s:%s", __FUNCTION__, (super)->s_id, ( state ) ) ++ ++/** ++ * ktxnmgrd - kernel txnmgr daemon ++ * @arg: pointer to super block ++ * ++ * The background transaction manager daemon, started as a kernel thread during ++ * reiser4 initialization. ++ */ ++static int ktxnmgrd(void *arg) ++{ ++ struct super_block *super; ++ ktxnmgrd_context *ctx; ++ txn_mgr *mgr; ++ int done = 0; ++ ++ super = arg; ++ mgr = &get_super_private(super)->tmgr; ++ ++ /* ++ * do_fork() just copies task_struct into the new thread. ->fs_context ++ * shouldn't be copied of course. This shouldn't be a problem for the ++ * rest of the code though. ++ */ ++ current->journal_info = NULL; ++ ctx = mgr->daemon; ++ while (1) { ++ try_to_freeze(); ++ set_comm("wait"); ++ { ++ DEFINE_WAIT(__wait); ++ ++ prepare_to_wait(&ctx->wait, &__wait, TASK_INTERRUPTIBLE); ++ if (kthread_should_stop()) { ++ done = 1; ++ } else ++ schedule_timeout(ctx->timeout); ++ finish_wait(&ctx->wait, &__wait); ++ } ++ if (done) ++ break; ++ set_comm("run"); ++ spin_lock(&ctx->guard); ++ /* ++ * wait timed out or ktxnmgrd was woken up by explicit request ++ * to commit something. Scan list of atoms in txnmgr and look ++ * for too old atoms. ++ */ ++ do { ++ ctx->rescan = 0; ++ scan_mgr(super); ++ spin_lock(&ctx->guard); ++ if (ctx->rescan) { ++ /* ++ * the list could be modified while ctx ++ * spinlock was released, we have to repeat ++ * scanning from the beginning ++ */ ++ break; ++ } ++ } while (ctx->rescan); ++ spin_unlock(&ctx->guard); ++ } ++ return 0; ++} ++ ++#undef set_comm ++ ++/** ++ * reiser4_init_ktxnmgrd - initialize ktxnmgrd context and start kernel daemon ++ * @super: pointer to super block ++ * ++ * Allocates and initializes ktxnmgrd_context, attaches it to transaction ++ * manager. Starts kernel txnmgr daemon. This is called on mount. ++ */ ++int reiser4_init_ktxnmgrd(struct super_block *super) ++{ ++ txn_mgr *mgr; ++ ktxnmgrd_context *ctx; ++ ++ mgr = &get_super_private(super)->tmgr; ++ ++ assert("zam-1014", mgr->daemon == NULL); ++ ++ ctx = kmalloc(sizeof(ktxnmgrd_context), reiser4_ctx_gfp_mask_get()); ++ if (ctx == NULL) ++ return RETERR(-ENOMEM); ++ ++ assert("nikita-2442", ctx != NULL); ++ ++ memset(ctx, 0, sizeof *ctx); ++ init_waitqueue_head(&ctx->wait); ++ ++ /*kcond_init(&ctx->startup);*/ ++ spin_lock_init(&ctx->guard); ++ ctx->timeout = REISER4_TXNMGR_TIMEOUT; ++ ctx->rescan = 1; ++ mgr->daemon = ctx; ++ ++ ctx->tsk = kthread_run(ktxnmgrd, super, "ktxnmgrd"); ++ if (IS_ERR(ctx->tsk)) { ++ int ret = PTR_ERR(ctx->tsk); ++ mgr->daemon = NULL; ++ kfree(ctx); ++ return RETERR(ret); ++ } ++ return 0; ++} ++ ++void ktxnmgrd_kick(txn_mgr *mgr) ++{ ++ assert("nikita-3234", mgr != NULL); ++ assert("nikita-3235", mgr->daemon != NULL); ++ wake_up(&mgr->daemon->wait); ++} ++ ++int is_current_ktxnmgrd(void) ++{ ++ return (get_current_super_private()->tmgr.daemon->tsk == current); ++} ++ ++/** ++ * scan_mgr - commit atoms which are to be committed ++ * @super: super block to commit atoms of ++ * ++ * Commits old atoms. ++ */ ++static int scan_mgr(struct super_block *super) ++{ ++ int ret; ++ reiser4_context ctx; ++ ++ init_stack_context(&ctx, super); ++ ++ ret = commit_some_atoms(&get_super_private(super)->tmgr); ++ ++ reiser4_exit_context(&ctx); ++ return ret; ++} ++ ++/** ++ * reiser4_done_ktxnmgrd - stop kernel thread and frees ktxnmgrd context ++ * @mgr: ++ * ++ * This is called on umount. Stops ktxnmgrd and free t ++ */ ++void reiser4_done_ktxnmgrd(struct super_block *super) ++{ ++ txn_mgr *mgr; ++ ++ mgr = &get_super_private(super)->tmgr; ++ assert("zam-1012", mgr->daemon != NULL); ++ ++ kthread_stop(mgr->daemon->tsk); ++ kfree(mgr->daemon); ++ mgr->daemon = NULL; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 120 ++ * End: ++ */ +diff --git a/fs/reiser4/ktxnmgrd.h b/fs/reiser4/ktxnmgrd.h +new file mode 100644 +index 0000000..d00f1d9 +--- /dev/null ++++ b/fs/reiser4/ktxnmgrd.h +@@ -0,0 +1,52 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Transaction manager daemon. See ktxnmgrd.c for comments. */ ++ ++#ifndef __KTXNMGRD_H__ ++#define __KTXNMGRD_H__ ++ ++#include "txnmgr.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include /* for struct task_struct */ ++ ++/* in this structure all data necessary to start up, shut down and communicate ++ * with ktxnmgrd are kept. */ ++struct ktxnmgrd_context { ++ /* wait queue head on which ktxnmgrd sleeps */ ++ wait_queue_head_t wait; ++ /* spin lock protecting all fields of this structure */ ++ spinlock_t guard; ++ /* timeout of sleeping on ->wait */ ++ signed long timeout; ++ /* kernel thread running ktxnmgrd */ ++ struct task_struct *tsk; ++ /* list of all file systems served by this ktxnmgrd */ ++ struct list_head queue; ++ /* should ktxnmgrd repeat scanning of atoms? */ ++ unsigned int rescan:1; ++}; ++ ++extern int reiser4_init_ktxnmgrd(struct super_block *); ++extern void reiser4_done_ktxnmgrd(struct super_block *); ++ ++extern void ktxnmgrd_kick(txn_mgr * mgr); ++extern int is_current_ktxnmgrd(void); ++ ++/* __KTXNMGRD_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/lock.c b/fs/reiser4/lock.c +new file mode 100644 +index 0000000..cdca928 +--- /dev/null ++++ b/fs/reiser4/lock.c +@@ -0,0 +1,1232 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Traditional deadlock avoidance is achieved by acquiring all locks in a single ++ order. V4 balances the tree from the bottom up, and searches the tree from ++ the top down, and that is really the way we want it, so tradition won't work ++ for us. ++ ++ Instead we have two lock orderings, a high priority lock ordering, and a low ++ priority lock ordering. Each node in the tree has a lock in its znode. ++ ++ Suppose we have a set of processes which lock (R/W) tree nodes. Each process ++ has a set (maybe empty) of already locked nodes ("process locked set"). Each ++ process may have a pending lock request to a node locked by another process. ++ Note: we lock and unlock, but do not transfer locks: it is possible ++ transferring locks instead would save some bus locking.... ++ ++ Deadlock occurs when we have a loop constructed from process locked sets and ++ lock request vectors. ++ ++ NOTE: The reiser4 "tree" is a tree on disk, but its cached representation in ++ memory is extended with "znodes" with which we connect nodes with their left ++ and right neighbors using sibling pointers stored in the znodes. When we ++ perform balancing operations we often go from left to right and from right to ++ left. ++ ++ +-P1-+ +-P3-+ ++ |+--+| V1 |+--+| ++ ||N1|| -------> ||N3|| ++ |+--+| |+--+| ++ +----+ +----+ ++ ^ | ++ |V2 |V3 ++ | v ++ +---------P2---------+ ++ |+--+ +--+| ++ ||N2| -------- |N4|| ++ |+--+ +--+| ++ +--------------------+ ++ ++ We solve this by ensuring that only low priority processes lock in top to ++ bottom order and from right to left, and high priority processes lock from ++ bottom to top and left to right. ++ ++ ZAM-FIXME-HANS: order not just node locks in this way, order atom locks, and ++ kill those damn busy loops. ++ ANSWER(ZAM): atom locks (which are introduced by ASTAGE_CAPTURE_WAIT atom ++ stage) cannot be ordered that way. There are no rules what nodes can belong ++ to the atom and what nodes cannot. We cannot define what is right or left ++ direction, what is top or bottom. We can take immediate parent or side ++ neighbor of one node, but nobody guarantees that, say, left neighbor node is ++ not a far right neighbor for other nodes from the same atom. It breaks ++ deadlock avoidance rules and hi-low priority locking cannot be applied for ++ atom locks. ++ ++ How does it help to avoid deadlocks ? ++ ++ Suppose we have a deadlock with n processes. Processes from one priority ++ class never deadlock because they take locks in one consistent ++ order. ++ ++ So, any possible deadlock loop must have low priority as well as high ++ priority processes. There are no other lock priority levels except low and ++ high. We know that any deadlock loop contains at least one node locked by a ++ low priority process and requested by a high priority process. If this ++ situation is caught and resolved it is sufficient to avoid deadlocks. ++ ++ V4 DEADLOCK PREVENTION ALGORITHM IMPLEMENTATION. ++ ++ The deadlock prevention algorithm is based on comparing ++ priorities of node owners (processes which keep znode locked) and ++ requesters (processes which want to acquire a lock on znode). We ++ implement a scheme where low-priority owners yield locks to ++ high-priority requesters. We created a signal passing system that ++ is used to ask low-priority processes to yield one or more locked ++ znodes. ++ ++ The condition when a znode needs to change its owners is described by the ++ following formula: ++ ++ ############################################# ++ # # ++ # (number of high-priority requesters) > 0 # ++ # AND # ++ # (numbers of high-priority owners) == 0 # ++ # # ++ ############################################# ++ ++ Note that a low-priority process delays node releasing if another ++ high-priority process owns this node. So, slightly more strictly speaking, ++ to have a deadlock capable cycle you must have a loop in which a high ++ priority process is waiting on a low priority process to yield a node, which ++ is slightly different from saying a high priority process is waiting on a ++ node owned by a low priority process. ++ ++ It is enough to avoid deadlocks if we prevent any low-priority process from ++ falling asleep if its locked set contains a node which satisfies the ++ deadlock condition. ++ ++ That condition is implicitly or explicitly checked in all places where new ++ high-priority requests may be added or removed from node request queue or ++ high-priority process takes or releases a lock on node. The main ++ goal of these checks is to never lose the moment when node becomes "has ++ wrong owners" and send "must-yield-this-lock" signals to its low-pri owners ++ at that time. ++ ++ The information about received signals is stored in the per-process ++ structure (lock stack) and analyzed before a low-priority process goes to ++ sleep but after a "fast" attempt to lock a node fails. Any signal wakes ++ sleeping process up and forces him to re-check lock status and received ++ signal info. If "must-yield-this-lock" signals were received the locking ++ primitive (longterm_lock_znode()) fails with -E_DEADLOCK error code. ++ ++ V4 LOCKING DRAWBACKS ++ ++ If we have already balanced on one level, and we are propagating our changes ++ upward to a higher level, it could be very messy to surrender all locks on ++ the lower level because we put so much computational work into it, and ++ reverting them to their state before they were locked might be very complex. ++ We also don't want to acquire all locks before performing balancing because ++ that would either be almost as much work as the balancing, or it would be ++ too conservative and lock too much. We want balancing to be done only at ++ high priority. Yet, we might want to go to the left one node and use some ++ of its empty space... So we make one attempt at getting the node to the left ++ using try_lock, and if it fails we do without it, because we didn't really ++ need it, it was only a nice to have. ++ ++ LOCK STRUCTURES DESCRIPTION ++ ++ The following data structures are used in the reiser4 locking ++ implementation: ++ ++ All fields related to long-term locking are stored in znode->lock. ++ ++ The lock stack is a per thread object. It owns all znodes locked by the ++ thread. One znode may be locked by several threads in case of read lock or ++ one znode may be write locked by one thread several times. The special link ++ objects (lock handles) support n<->m relation between znodes and lock ++ owners. ++ ++ ++ ++ +---------+ +---------+ ++ | LS1 | | LS2 | ++ +---------+ +---------+ ++ ^ ^ ++ |---------------+ +----------+ ++ v v v v ++ +---------+ +---------+ +---------+ +---------+ ++ | LH1 | | LH2 | | LH3 | | LH4 | ++ +---------+ +---------+ +---------+ +---------+ ++ ^ ^ ^ ^ ++ | +------------+ | ++ v v v ++ +---------+ +---------+ +---------+ ++ | Z1 | | Z2 | | Z3 | ++ +---------+ +---------+ +---------+ ++ ++ Thread 1 locked znodes Z1 and Z2, thread 2 locked znodes Z2 and Z3. The ++ picture above shows that lock stack LS1 has a list of 2 lock handles LH1 and ++ LH2, lock stack LS2 has a list with lock handles LH3 and LH4 on it. Znode ++ Z1 is locked by only one thread, znode has only one lock handle LH1 on its ++ list, similar situation is for Z3 which is locked by the thread 2 only. Z2 ++ is locked (for read) twice by different threads and two lock handles are on ++ its list. Each lock handle represents a single relation of a locking of a ++ znode by a thread. Locking of a znode is an establishing of a locking ++ relation between the lock stack and the znode by adding of a new lock handle ++ to a list of lock handles, the lock stack. The lock stack links all lock ++ handles for all znodes locked by the lock stack. The znode list groups all ++ lock handles for all locks stacks which locked the znode. ++ ++ Yet another relation may exist between znode and lock owners. If lock ++ procedure cannot immediately take lock on an object it adds the lock owner ++ on special `requestors' list belongs to znode. That list represents a ++ queue of pending lock requests. Because one lock owner may request only ++ only one lock object at a time, it is a 1->n relation between lock objects ++ and a lock owner implemented as it is described above. Full information ++ (priority, pointers to lock and link objects) about each lock request is ++ stored in lock owner structure in `request' field. ++ ++ SHORT_TERM LOCKING ++ ++ This is a list of primitive operations over lock stacks / lock handles / ++ znodes and locking descriptions for them. ++ ++ 1. locking / unlocking which is done by two list insertion/deletion, one ++ to/from znode's list of lock handles, another one is to/from lock stack's ++ list of lock handles. The first insertion is protected by ++ znode->lock.guard spinlock. The list owned by the lock stack can be ++ modified only by thread who owns the lock stack and nobody else can ++ modify/read it. There is nothing to be protected by a spinlock or ++ something else. ++ ++ 2. adding/removing a lock request to/from znode requesters list. The rule is ++ that znode->lock.guard spinlock should be taken for this. ++ ++ 3. we can traverse list of lock handles and use references to lock stacks who ++ locked given znode if znode->lock.guard spinlock is taken. ++ ++ 4. If a lock stack is associated with a znode as a lock requestor or lock ++ owner its existence is guaranteed by znode->lock.guard spinlock. Some its ++ (lock stack's) fields should be protected from being accessed in parallel ++ by two or more threads. Please look at lock_stack structure definition ++ for the info how those fields are protected. */ ++ ++/* Znode lock and capturing intertwining. */ ++/* In current implementation we capture formatted nodes before locking ++ them. Take a look on longterm lock znode, reiser4_try_capture() request ++ precedes locking requests. The longterm_lock_znode function unconditionally ++ captures znode before even checking of locking conditions. ++ ++ Another variant is to capture znode after locking it. It was not tested, but ++ at least one deadlock condition is supposed to be there. One thread has ++ locked a znode (Node-1) and calls reiser4_try_capture() for it. ++ reiser4_try_capture() sleeps because znode's atom has CAPTURE_WAIT state. ++ Second thread is a flushing thread, its current atom is the atom Node-1 ++ belongs to. Second thread wants to lock Node-1 and sleeps because Node-1 ++ is locked by the first thread. The described situation is a deadlock. */ ++ ++#include "debug.h" ++#include "txnmgr.h" ++#include "znode.h" ++#include "jnode.h" ++#include "tree.h" ++#include "plugin/node/node.h" ++#include "super.h" ++ ++#include ++ ++#if REISER4_DEBUG ++static int request_is_deadlock_safe(znode *, znode_lock_mode, ++ znode_lock_request); ++#endif ++ ++/* Returns a lock owner associated with current thread */ ++lock_stack *get_current_lock_stack(void) ++{ ++ return &get_current_context()->stack; ++} ++ ++/* Wakes up all low priority owners informing them about possible deadlock */ ++static void wake_up_all_lopri_owners(znode * node) ++{ ++ lock_handle *handle; ++ ++ assert_spin_locked(&(node->lock.guard)); ++ list_for_each_entry(handle, &node->lock.owners, owners_link) { ++ assert("nikita-1832", handle->node == node); ++ /* count this signal in owner->nr_signaled */ ++ if (!handle->signaled) { ++ handle->signaled = 1; ++ atomic_inc(&handle->owner->nr_signaled); ++ /* Wake up a single process */ ++ reiser4_wake_up(handle->owner); ++ } ++ } ++} ++ ++/* Adds a lock to a lock owner, which means creating a link to the lock and ++ putting the link into the two lists all links are on (the doubly linked list ++ that forms the lock_stack, and the doubly linked list of links attached ++ to a lock. ++*/ ++static inline void ++link_object(lock_handle * handle, lock_stack * owner, znode * node) ++{ ++ assert("jmacd-810", handle->owner == NULL); ++ assert_spin_locked(&(node->lock.guard)); ++ ++ handle->owner = owner; ++ handle->node = node; ++ ++ assert("reiser4-4", ++ ergo(list_empty_careful(&owner->locks), owner->nr_locks == 0)); ++ ++ /* add lock handle to the end of lock_stack's list of locks */ ++ list_add_tail(&handle->locks_link, &owner->locks); ++ ON_DEBUG(owner->nr_locks++); ++ reiser4_ctx_gfp_mask_set(); ++ ++ /* add lock handle to the head of znode's list of owners */ ++ list_add(&handle->owners_link, &node->lock.owners); ++ handle->signaled = 0; ++} ++ ++/* Breaks a relation between a lock and its owner */ ++static inline void unlink_object(lock_handle * handle) ++{ ++ assert("zam-354", handle->owner != NULL); ++ assert("nikita-1608", handle->node != NULL); ++ assert_spin_locked(&(handle->node->lock.guard)); ++ assert("nikita-1829", handle->owner == get_current_lock_stack()); ++ assert("reiser4-5", handle->owner->nr_locks > 0); ++ ++ /* remove lock handle from lock_stack's list of locks */ ++ list_del(&handle->locks_link); ++ ON_DEBUG(handle->owner->nr_locks--); ++ reiser4_ctx_gfp_mask_set(); ++ assert("reiser4-6", ++ ergo(list_empty_careful(&handle->owner->locks), ++ handle->owner->nr_locks == 0)); ++ /* remove lock handle from znode's list of owners */ ++ list_del(&handle->owners_link); ++ /* indicates that lock handle is free now */ ++ handle->node = NULL; ++#if REISER4_DEBUG ++ INIT_LIST_HEAD(&handle->locks_link); ++ INIT_LIST_HEAD(&handle->owners_link); ++ handle->owner = NULL; ++#endif ++} ++ ++/* Actually locks an object knowing that we are able to do this */ ++static void lock_object(lock_stack * owner) ++{ ++ lock_request *request; ++ znode *node; ++ ++ request = &owner->request; ++ node = request->node; ++ assert_spin_locked(&(node->lock.guard)); ++ if (request->mode == ZNODE_READ_LOCK) { ++ node->lock.nr_readers++; ++ } else { ++ /* check that we don't switched from read to write lock */ ++ assert("nikita-1840", node->lock.nr_readers <= 0); ++ /* We allow recursive locking; a node can be locked several ++ times for write by same process */ ++ node->lock.nr_readers--; ++ } ++ ++ link_object(request->handle, owner, node); ++ ++ if (owner->curpri) { ++ node->lock.nr_hipri_owners++; ++ } ++} ++ ++/* Check for recursive write locking */ ++static int recursive(lock_stack * owner) ++{ ++ int ret; ++ znode *node; ++ lock_handle *lh; ++ ++ node = owner->request.node; ++ ++ /* Owners list is not empty for a locked node */ ++ assert("zam-314", !list_empty_careful(&node->lock.owners)); ++ assert("nikita-1841", owner == get_current_lock_stack()); ++ assert_spin_locked(&(node->lock.guard)); ++ ++ lh = list_entry(node->lock.owners.next, lock_handle, owners_link); ++ ret = (lh->owner == owner); ++ ++ /* Recursive read locking should be done usual way */ ++ assert("zam-315", !ret || owner->request.mode == ZNODE_WRITE_LOCK); ++ /* mixing of read/write locks is not allowed */ ++ assert("zam-341", !ret || znode_is_wlocked(node)); ++ ++ return ret; ++} ++ ++#if REISER4_DEBUG ++/* Returns true if the lock is held by the calling thread. */ ++int znode_is_any_locked(const znode * node) ++{ ++ lock_handle *handle; ++ lock_stack *stack; ++ int ret; ++ ++ if (!znode_is_locked(node)) { ++ return 0; ++ } ++ ++ stack = get_current_lock_stack(); ++ ++ spin_lock_stack(stack); ++ ++ ret = 0; ++ ++ list_for_each_entry(handle, &stack->locks, locks_link) { ++ if (handle->node == node) { ++ ret = 1; ++ break; ++ } ++ } ++ ++ spin_unlock_stack(stack); ++ ++ return ret; ++} ++ ++#endif ++ ++/* Returns true if a write lock is held by the calling thread. */ ++int znode_is_write_locked(const znode * node) ++{ ++ lock_stack *stack; ++ lock_handle *handle; ++ ++ assert("jmacd-8765", node != NULL); ++ ++ if (!znode_is_wlocked(node)) { ++ return 0; ++ } ++ ++ stack = get_current_lock_stack(); ++ ++ /* ++ * When znode is write locked, all owner handles point to the same lock ++ * stack. Get pointer to lock stack from the first lock handle from ++ * znode's owner list ++ */ ++ handle = list_entry(node->lock.owners.next, lock_handle, owners_link); ++ ++ return (handle->owner == stack); ++} ++ ++/* This "deadlock" condition is the essential part of reiser4 locking ++ implementation. This condition is checked explicitly by calling ++ check_deadlock_condition() or implicitly in all places where znode lock ++ state (set of owners and request queue) is changed. Locking code is ++ designed to use this condition to trigger procedure of passing object from ++ low priority owner(s) to high priority one(s). ++ ++ The procedure results in passing an event (setting lock_handle->signaled ++ flag) and counting this event in nr_signaled field of owner's lock stack ++ object and wakeup owner's process. ++*/ ++static inline int check_deadlock_condition(znode * node) ++{ ++ assert_spin_locked(&(node->lock.guard)); ++ return node->lock.nr_hipri_requests > 0 ++ && node->lock.nr_hipri_owners == 0; ++} ++ ++static int check_livelock_condition(znode * node, znode_lock_mode mode) ++{ ++ zlock * lock = &node->lock; ++ ++ return mode == ZNODE_READ_LOCK && ++ lock -> nr_readers >= 0 && lock->nr_hipri_write_requests > 0; ++} ++ ++/* checks lock/request compatibility */ ++static int can_lock_object(lock_stack * owner) ++{ ++ znode *node = owner->request.node; ++ ++ assert_spin_locked(&(node->lock.guard)); ++ ++ /* See if the node is disconnected. */ ++ if (unlikely(ZF_ISSET(node, JNODE_IS_DYING))) ++ return RETERR(-EINVAL); ++ ++ /* Do not ever try to take a lock if we are going in low priority ++ direction and a node have a high priority request without high ++ priority owners. */ ++ if (unlikely(!owner->curpri && check_deadlock_condition(node))) ++ return RETERR(-E_REPEAT); ++ if (unlikely(owner->curpri && check_livelock_condition(node, owner->request.mode))) ++ return RETERR(-E_REPEAT); ++ if (unlikely(!is_lock_compatible(node, owner->request.mode))) ++ return RETERR(-E_REPEAT); ++ return 0; ++} ++ ++/* Setting of a high priority to the process. It clears "signaled" flags ++ because znode locked by high-priority process can't satisfy our "deadlock ++ condition". */ ++static void set_high_priority(lock_stack * owner) ++{ ++ assert("nikita-1846", owner == get_current_lock_stack()); ++ /* Do nothing if current priority is already high */ ++ if (!owner->curpri) { ++ /* We don't need locking for owner->locks list, because, this ++ * function is only called with the lock stack of the current ++ * thread, and no other thread can play with owner->locks list ++ * and/or change ->node pointers of lock handles in this list. ++ * ++ * (Interrupts also are not involved.) ++ */ ++ lock_handle *item = list_entry(owner->locks.next, lock_handle, locks_link); ++ while (&owner->locks != &item->locks_link) { ++ znode *node = item->node; ++ ++ spin_lock_zlock(&node->lock); ++ ++ node->lock.nr_hipri_owners++; ++ ++ /* we can safely set signaled to zero, because ++ previous statement (nr_hipri_owners ++) guarantees ++ that signaled will be never set again. */ ++ item->signaled = 0; ++ spin_unlock_zlock(&node->lock); ++ ++ item = list_entry(item->locks_link.next, lock_handle, locks_link); ++ } ++ owner->curpri = 1; ++ atomic_set(&owner->nr_signaled, 0); ++ } ++} ++ ++/* Sets a low priority to the process. */ ++static void set_low_priority(lock_stack * owner) ++{ ++ assert("nikita-3075", owner == get_current_lock_stack()); ++ /* Do nothing if current priority is already low */ ++ if (owner->curpri) { ++ /* scan all locks (lock handles) held by @owner, which is ++ actually current thread, and check whether we are reaching ++ deadlock possibility anywhere. ++ */ ++ lock_handle *handle = list_entry(owner->locks.next, lock_handle, locks_link); ++ while (&owner->locks != &handle->locks_link) { ++ znode *node = handle->node; ++ spin_lock_zlock(&node->lock); ++ /* this thread just was hipri owner of @node, so ++ nr_hipri_owners has to be greater than zero. */ ++ assert("nikita-1835", node->lock.nr_hipri_owners > 0); ++ node->lock.nr_hipri_owners--; ++ /* If we have deadlock condition, adjust a nr_signaled ++ field. It is enough to set "signaled" flag only for ++ current process, other low-pri owners will be ++ signaled and waken up after current process unlocks ++ this object and any high-priority requestor takes ++ control. */ ++ if (check_deadlock_condition(node) ++ && !handle->signaled) { ++ handle->signaled = 1; ++ atomic_inc(&owner->nr_signaled); ++ } ++ spin_unlock_zlock(&node->lock); ++ handle = list_entry(handle->locks_link.next, lock_handle, locks_link); ++ } ++ owner->curpri = 0; ++ } ++} ++ ++static void remove_lock_request(lock_stack * requestor) ++{ ++ zlock * lock = &requestor->request.node->lock; ++ ++ if (requestor->curpri) { ++ assert("nikita-1838", lock->nr_hipri_requests > 0); ++ lock->nr_hipri_requests--; ++ if (requestor->request.mode == ZNODE_WRITE_LOCK) ++ lock->nr_hipri_write_requests --; ++ } ++ list_del(&requestor->requestors_link); ++} ++ ++static void invalidate_all_lock_requests(znode * node) ++{ ++ lock_stack *requestor, *tmp; ++ ++ assert_spin_locked(&(node->lock.guard)); ++ ++ list_for_each_entry_safe(requestor, tmp, &node->lock.requestors, requestors_link) { ++ remove_lock_request(requestor); ++ requestor->request.ret_code = -EINVAL; ++ reiser4_wake_up(requestor); ++ requestor->request.mode = ZNODE_NO_LOCK; ++ } ++} ++ ++static void dispatch_lock_requests(znode * node) ++{ ++ lock_stack *requestor, *tmp; ++ ++ assert_spin_locked(&(node->lock.guard)); ++ ++ list_for_each_entry_safe(requestor, tmp, &node->lock.requestors, requestors_link) { ++ if (znode_is_write_locked(node)) ++ break; ++ if (!can_lock_object(requestor)) { ++ lock_object(requestor); ++ remove_lock_request(requestor); ++ requestor->request.ret_code = 0; ++ reiser4_wake_up(requestor); ++ requestor->request.mode = ZNODE_NO_LOCK; ++ } ++ } ++} ++ ++/* release long-term lock, acquired by longterm_lock_znode() */ ++void longterm_unlock_znode(lock_handle * handle) ++{ ++ znode *node = handle->node; ++ lock_stack *oldowner = handle->owner; ++ int hipri; ++ int readers; ++ int rdelta; ++ int youdie; ++ ++ /* ++ * this is time-critical and highly optimized code. Modify carefully. ++ */ ++ ++ assert("jmacd-1021", handle != NULL); ++ assert("jmacd-1022", handle->owner != NULL); ++ assert("nikita-1392", LOCK_CNT_GTZ(long_term_locked_znode)); ++ ++ assert("zam-130", oldowner == get_current_lock_stack()); ++ ++ LOCK_CNT_DEC(long_term_locked_znode); ++ ++ /* ++ * to minimize amount of operations performed under lock, pre-compute ++ * all variables used within critical section. This makes code ++ * obscure. ++ */ ++ ++ /* was this lock of hi or lo priority */ ++ hipri = oldowner->curpri ? 1 : 0; ++ /* number of readers */ ++ readers = node->lock.nr_readers; ++ /* +1 if write lock, -1 if read lock */ ++ rdelta = (readers > 0) ? -1 : +1; ++ /* true if node is to die and write lock is released */ ++ youdie = ZF_ISSET(node, JNODE_HEARD_BANSHEE) && (readers < 0); ++ ++ spin_lock_zlock(&node->lock); ++ ++ assert("zam-101", znode_is_locked(node)); ++ ++ /* Adjust a number of high priority owners of this lock */ ++ assert("nikita-1836", node->lock.nr_hipri_owners >= hipri); ++ node->lock.nr_hipri_owners -= hipri; ++ ++ /* Handle znode deallocation on last write-lock release. */ ++ if (znode_is_wlocked_once(node)) { ++ if (youdie) { ++ forget_znode(handle); ++ assert("nikita-2191", znode_invariant(node)); ++ zput(node); ++ return; ++ } ++ } ++ ++ if (handle->signaled) ++ atomic_dec(&oldowner->nr_signaled); ++ ++ /* Unlocking means owner<->object link deletion */ ++ unlink_object(handle); ++ ++ /* This is enough to be sure whether an object is completely ++ unlocked. */ ++ node->lock.nr_readers += rdelta; ++ ++ /* If the node is locked it must have an owners list. Likewise, if ++ the node is unlocked it must have an empty owners list. */ ++ assert("zam-319", equi(znode_is_locked(node), ++ !list_empty_careful(&node->lock.owners))); ++ ++#if REISER4_DEBUG ++ if (!znode_is_locked(node)) ++ ++node->times_locked; ++#endif ++ ++ /* If there are pending lock requests we wake up a requestor */ ++ if (!znode_is_wlocked(node)) ++ dispatch_lock_requests(node); ++ if (check_deadlock_condition(node)) ++ wake_up_all_lopri_owners(node); ++ spin_unlock_zlock(&node->lock); ++ ++ /* minus one reference from handle->node */ ++ assert("nikita-2190", znode_invariant(node)); ++ ON_DEBUG(check_lock_data()); ++ ON_DEBUG(check_lock_node_data(node)); ++ zput(node); ++} ++ ++/* final portion of longterm-lock */ ++static int ++lock_tail(lock_stack * owner, int ok, znode_lock_mode mode) ++{ ++ znode *node = owner->request.node; ++ ++ assert_spin_locked(&(node->lock.guard)); ++ ++ /* If we broke with (ok == 0) it means we can_lock, now do it. */ ++ if (ok == 0) { ++ lock_object(owner); ++ owner->request.mode = 0; ++ /* count a reference from lockhandle->node ++ ++ znode was already referenced at the entry to this function, ++ hence taking spin-lock here is not necessary (see comment ++ in the zref()). ++ */ ++ zref(node); ++ ++ LOCK_CNT_INC(long_term_locked_znode); ++ } ++ spin_unlock_zlock(&node->lock); ++ ON_DEBUG(check_lock_data()); ++ ON_DEBUG(check_lock_node_data(node)); ++ return ok; ++} ++ ++/* ++ * version of longterm_znode_lock() optimized for the most common case: read ++ * lock without any special flags. This is the kind of lock that any tree ++ * traversal takes on the root node of the tree, which is very frequent. ++ */ ++static int longterm_lock_tryfast(lock_stack * owner) ++{ ++ int result; ++ znode *node; ++ zlock *lock; ++ ++ node = owner->request.node; ++ lock = &node->lock; ++ ++ assert("nikita-3340", reiser4_schedulable()); ++ assert("nikita-3341", request_is_deadlock_safe(node, ++ ZNODE_READ_LOCK, ++ ZNODE_LOCK_LOPRI)); ++ spin_lock_zlock(lock); ++ result = can_lock_object(owner); ++ spin_unlock_zlock(lock); ++ ++ if (likely(result != -EINVAL)) { ++ spin_lock_znode(node); ++ result = reiser4_try_capture(ZJNODE(node), ZNODE_READ_LOCK, 0); ++ spin_unlock_znode(node); ++ spin_lock_zlock(lock); ++ if (unlikely(result != 0)) { ++ owner->request.mode = 0; ++ } else { ++ result = can_lock_object(owner); ++ if (unlikely(result == -E_REPEAT)) { ++ /* fall back to longterm_lock_znode() */ ++ spin_unlock_zlock(lock); ++ return 1; ++ } ++ } ++ return lock_tail(owner, result, ZNODE_READ_LOCK); ++ } else ++ return 1; ++} ++ ++/* locks given lock object */ ++int longterm_lock_znode( ++ /* local link object (allocated by lock owner thread, usually on its own ++ * stack) */ ++ lock_handle * handle, ++ /* znode we want to lock. */ ++ znode * node, ++ /* {ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}; */ ++ znode_lock_mode mode, ++ /* {0, -EINVAL, -E_DEADLOCK}, see return codes description. */ ++ znode_lock_request request) { ++ int ret; ++ int hipri = (request & ZNODE_LOCK_HIPRI) != 0; ++ int non_blocking = 0; ++ int has_atom; ++ txn_capture cap_flags; ++ zlock *lock; ++ txn_handle *txnh; ++ tree_level level; ++ ++ /* Get current process context */ ++ lock_stack *owner = get_current_lock_stack(); ++ ++ /* Check that the lock handle is initialized and isn't already being ++ * used. */ ++ assert("jmacd-808", handle->owner == NULL); ++ assert("nikita-3026", reiser4_schedulable()); ++ assert("nikita-3219", request_is_deadlock_safe(node, mode, request)); ++ assert("zam-1056", atomic_read(&ZJNODE(node)->x_count) > 0); ++ /* long term locks are not allowed in the VM contexts (->writepage(), ++ * prune_{d,i}cache()). ++ * ++ * FIXME this doesn't work due to unused-dentry-with-unlinked-inode ++ * bug caused by d_splice_alias() only working for directories. ++ */ ++ assert("nikita-3547", 1 || ((current->flags & PF_MEMALLOC) == 0)); ++ assert ("zam-1055", mode != ZNODE_NO_LOCK); ++ ++ cap_flags = 0; ++ if (request & ZNODE_LOCK_NONBLOCK) { ++ cap_flags |= TXN_CAPTURE_NONBLOCKING; ++ non_blocking = 1; ++ } ++ ++ if (request & ZNODE_LOCK_DONT_FUSE) ++ cap_flags |= TXN_CAPTURE_DONT_FUSE; ++ ++ /* If we are changing our process priority we must adjust a number ++ of high priority owners for each znode that we already lock */ ++ if (hipri) { ++ set_high_priority(owner); ++ } else { ++ set_low_priority(owner); ++ } ++ ++ level = znode_get_level(node); ++ ++ /* Fill request structure with our values. */ ++ owner->request.mode = mode; ++ owner->request.handle = handle; ++ owner->request.node = node; ++ ++ txnh = get_current_context()->trans; ++ lock = &node->lock; ++ ++ if (mode == ZNODE_READ_LOCK && request == 0) { ++ ret = longterm_lock_tryfast(owner); ++ if (ret <= 0) ++ return ret; ++ } ++ ++ has_atom = (txnh->atom != NULL); ++ ++ /* Synchronize on node's zlock guard lock. */ ++ spin_lock_zlock(lock); ++ ++ if (znode_is_locked(node) && ++ mode == ZNODE_WRITE_LOCK && recursive(owner)) ++ return lock_tail(owner, 0, mode); ++ ++ for (;;) { ++ /* Check the lock's availability: if it is unavaiable we get ++ E_REPEAT, 0 indicates "can_lock", otherwise the node is ++ invalid. */ ++ ret = can_lock_object(owner); ++ ++ if (unlikely(ret == -EINVAL)) { ++ /* @node is dying. Leave it alone. */ ++ break; ++ } ++ ++ if (unlikely(ret == -E_REPEAT && non_blocking)) { ++ /* either locking of @node by the current thread will ++ * lead to the deadlock, or lock modes are ++ * incompatible. */ ++ break; ++ } ++ ++ assert("nikita-1844", (ret == 0) ++ || ((ret == -E_REPEAT) && !non_blocking)); ++ /* If we can get the lock... Try to capture first before ++ taking the lock. */ ++ ++ /* first handle commonest case where node and txnh are already ++ * in the same atom. */ ++ /* safe to do without taking locks, because: ++ * ++ * 1. read of aligned word is atomic with respect to writes to ++ * this word ++ * ++ * 2. false negatives are handled in reiser4_try_capture(). ++ * ++ * 3. false positives are impossible. ++ * ++ * PROOF: left as an exercise to the curious reader. ++ * ++ * Just kidding. Here is one: ++ * ++ * At the time T0 txnh->atom is stored in txnh_atom. ++ * ++ * At the time T1 node->atom is stored in node_atom. ++ * ++ * At the time T2 we observe that ++ * ++ * txnh_atom != NULL && node_atom == txnh_atom. ++ * ++ * Imagine that at this moment we acquire node and txnh spin ++ * lock in this order. Suppose that under spin lock we have ++ * ++ * node->atom != txnh->atom, (S1) ++ * ++ * at the time T3. ++ * ++ * txnh->atom != NULL still, because txnh is open by the ++ * current thread. ++ * ++ * Suppose node->atom == NULL, that is, node was un-captured ++ * between T1, and T3. But un-capturing of formatted node is ++ * always preceded by the call to reiser4_invalidate_lock(), ++ * which marks znode as JNODE_IS_DYING under zlock spin ++ * lock. Contradiction, because can_lock_object() above checks ++ * for JNODE_IS_DYING. Hence, node->atom != NULL at T3. ++ * ++ * Suppose that node->atom != node_atom, that is, atom, node ++ * belongs to was fused into another atom: node_atom was fused ++ * into node->atom. Atom of txnh was equal to node_atom at T2, ++ * which means that under spin lock, txnh->atom == node->atom, ++ * because txnh->atom can only follow fusion ++ * chain. Contradicts S1. ++ * ++ * The same for hypothesis txnh->atom != txnh_atom. Hence, ++ * node->atom == node_atom == txnh_atom == txnh->atom. Again ++ * contradicts S1. Hence S1 is false. QED. ++ * ++ */ ++ ++ if (likely(has_atom && ZJNODE(node)->atom == txnh->atom)) { ++ ; ++ } else { ++ /* ++ * unlock zlock spin lock here. It is possible for ++ * longterm_unlock_znode() to sneak in here, but there ++ * is no harm: reiser4_invalidate_lock() will mark znode ++ * as JNODE_IS_DYING and this will be noted by ++ * can_lock_object() below. ++ */ ++ spin_unlock_zlock(lock); ++ spin_lock_znode(node); ++ ret = reiser4_try_capture(ZJNODE(node), mode, cap_flags); ++ spin_unlock_znode(node); ++ spin_lock_zlock(lock); ++ if (unlikely(ret != 0)) { ++ /* In the failure case, the txnmgr releases ++ the znode's lock (or in some cases, it was ++ released a while ago). There's no need to ++ reacquire it so we should return here, ++ avoid releasing the lock. */ ++ owner->request.mode = 0; ++ break; ++ } ++ ++ /* Check the lock's availability again -- this is ++ because under some circumstances the capture code ++ has to release and reacquire the znode spinlock. */ ++ ret = can_lock_object(owner); ++ } ++ ++ /* This time, a return of (ret == 0) means we can lock, so we ++ should break out of the loop. */ ++ if (likely(ret != -E_REPEAT || non_blocking)) ++ break; ++ ++ /* Lock is unavailable, we have to wait. */ ++ ret = reiser4_prepare_to_sleep(owner); ++ if (unlikely(ret != 0)) ++ break; ++ ++ assert_spin_locked(&(node->lock.guard)); ++ if (hipri) { ++ /* If we are going in high priority direction then ++ increase high priority requests counter for the ++ node */ ++ lock->nr_hipri_requests++; ++ if (mode == ZNODE_WRITE_LOCK) ++ lock->nr_hipri_write_requests ++; ++ /* If there are no high priority owners for a node, ++ then immediately wake up low priority owners, so ++ they can detect possible deadlock */ ++ if (lock->nr_hipri_owners == 0) ++ wake_up_all_lopri_owners(node); ++ } ++ list_add_tail(&owner->requestors_link, &lock->requestors); ++ ++ /* Ok, here we have prepared a lock request, so unlock ++ a znode ... */ ++ spin_unlock_zlock(lock); ++ /* ... and sleep */ ++ reiser4_go_to_sleep(owner); ++ if (owner->request.mode == ZNODE_NO_LOCK) ++ goto request_is_done; ++ spin_lock_zlock(lock); ++ if (owner->request.mode == ZNODE_NO_LOCK) { ++ spin_unlock_zlock(lock); ++ request_is_done: ++ if (owner->request.ret_code == 0) { ++ LOCK_CNT_INC(long_term_locked_znode); ++ zref(node); ++ } ++ return owner->request.ret_code; ++ } ++ remove_lock_request(owner); ++ } ++ ++ return lock_tail(owner, ret, mode); ++} ++ ++/* lock object invalidation means changing of lock object state to `INVALID' ++ and waiting for all other processes to cancel theirs lock requests. */ ++void reiser4_invalidate_lock(lock_handle * handle /* path to lock ++ * owner and lock ++ * object is being ++ * invalidated. */ ) ++{ ++ znode *node = handle->node; ++ lock_stack *owner = handle->owner; ++ ++ assert("zam-325", owner == get_current_lock_stack()); ++ assert("zam-103", znode_is_write_locked(node)); ++ assert("nikita-1393", !ZF_ISSET(node, JNODE_LEFT_CONNECTED)); ++ assert("nikita-1793", !ZF_ISSET(node, JNODE_RIGHT_CONNECTED)); ++ assert("nikita-1394", ZF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ assert("nikita-3097", znode_is_wlocked_once(node)); ++ assert_spin_locked(&(node->lock.guard)); ++ ++ if (handle->signaled) ++ atomic_dec(&owner->nr_signaled); ++ ++ ZF_SET(node, JNODE_IS_DYING); ++ unlink_object(handle); ++ node->lock.nr_readers = 0; ++ ++ invalidate_all_lock_requests(node); ++ spin_unlock_zlock(&node->lock); ++} ++ ++/* Initializes lock_stack. */ ++void init_lock_stack(lock_stack * owner /* pointer to ++ * allocated ++ * structure. */ ) ++{ ++ INIT_LIST_HEAD(&owner->locks); ++ INIT_LIST_HEAD(&owner->requestors_link); ++ spin_lock_init(&owner->sguard); ++ owner->curpri = 1; ++ init_waitqueue_head(&owner->wait); ++} ++ ++/* Initializes lock object. */ ++void reiser4_init_lock(zlock * lock /* pointer on allocated ++ * uninitialized lock object ++ * structure. */ ) ++{ ++ memset(lock, 0, sizeof(zlock)); ++ spin_lock_init(&lock->guard); ++ INIT_LIST_HEAD(&lock->requestors); ++ INIT_LIST_HEAD(&lock->owners); ++} ++ ++/* Transfer a lock handle (presumably so that variables can be moved between stack and ++ heap locations). */ ++static void ++move_lh_internal(lock_handle * new, lock_handle * old, int unlink_old) ++{ ++ znode *node = old->node; ++ lock_stack *owner = old->owner; ++ int signaled; ++ ++ /* locks_list, modified by link_object() is not protected by ++ anything. This is valid because only current thread ever modifies ++ locks_list of its lock_stack. ++ */ ++ assert("nikita-1827", owner == get_current_lock_stack()); ++ assert("nikita-1831", new->owner == NULL); ++ ++ spin_lock_zlock(&node->lock); ++ ++ signaled = old->signaled; ++ if (unlink_old) { ++ unlink_object(old); ++ } else { ++ if (node->lock.nr_readers > 0) { ++ node->lock.nr_readers += 1; ++ } else { ++ node->lock.nr_readers -= 1; ++ } ++ if (signaled) { ++ atomic_inc(&owner->nr_signaled); ++ } ++ if (owner->curpri) { ++ node->lock.nr_hipri_owners += 1; ++ } ++ LOCK_CNT_INC(long_term_locked_znode); ++ ++ zref(node); ++ } ++ link_object(new, owner, node); ++ new->signaled = signaled; ++ ++ spin_unlock_zlock(&node->lock); ++} ++ ++void move_lh(lock_handle * new, lock_handle * old) ++{ ++ move_lh_internal(new, old, /*unlink_old */ 1); ++} ++ ++void copy_lh(lock_handle * new, lock_handle * old) ++{ ++ move_lh_internal(new, old, /*unlink_old */ 0); ++} ++ ++/* after getting -E_DEADLOCK we unlock znodes until this function returns false */ ++int reiser4_check_deadlock(void) ++{ ++ lock_stack *owner = get_current_lock_stack(); ++ return atomic_read(&owner->nr_signaled) != 0; ++} ++ ++/* Before going to sleep we re-check "release lock" requests which might come from threads with hi-pri lock ++ priorities. */ ++int reiser4_prepare_to_sleep(lock_stack * owner) ++{ ++ assert("nikita-1847", owner == get_current_lock_stack()); ++ ++ /* We return -E_DEADLOCK if one or more "give me the lock" messages are ++ * counted in nr_signaled */ ++ if (unlikely(atomic_read(&owner->nr_signaled) != 0)) { ++ assert("zam-959", !owner->curpri); ++ return RETERR(-E_DEADLOCK); ++ } ++ return 0; ++} ++ ++/* Wakes up a single thread */ ++void __reiser4_wake_up(lock_stack * owner) ++{ ++ atomic_set(&owner->wakeup, 1); ++ wake_up(&owner->wait); ++} ++ ++/* Puts a thread to sleep */ ++void reiser4_go_to_sleep(lock_stack * owner) ++{ ++ /* Well, we might sleep here, so holding of any spinlocks is no-no */ ++ assert("nikita-3027", reiser4_schedulable()); ++ ++ wait_event(owner->wait, atomic_read(&owner->wakeup)); ++ atomic_set(&owner->wakeup, 0); ++} ++ ++int lock_stack_isclean(lock_stack * owner) ++{ ++ if (list_empty_careful(&owner->locks)) { ++ assert("zam-353", atomic_read(&owner->nr_signaled) == 0); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++#if REISER4_DEBUG ++ ++/* ++ * debugging functions ++ */ ++ ++static void list_check(struct list_head *head) ++{ ++ struct list_head *pos; ++ ++ list_for_each(pos, head) ++ assert("", (pos->prev != NULL && pos->next != NULL && ++ pos->prev->next == pos && pos->next->prev == pos)); ++} ++ ++/* check consistency of locking data-structures hanging of the @stack */ ++static void check_lock_stack(lock_stack * stack) ++{ ++ spin_lock_stack(stack); ++ /* check that stack->locks is not corrupted */ ++ list_check(&stack->locks); ++ spin_unlock_stack(stack); ++} ++ ++/* check consistency of locking data structures */ ++void check_lock_data(void) ++{ ++ check_lock_stack(&get_current_context()->stack); ++} ++ ++/* check consistency of locking data structures for @node */ ++void check_lock_node_data(znode * node) ++{ ++ spin_lock_zlock(&node->lock); ++ list_check(&node->lock.owners); ++ list_check(&node->lock.requestors); ++ spin_unlock_zlock(&node->lock); ++} ++ ++/* check that given lock request is dead lock safe. This check is, of course, ++ * not exhaustive. */ ++static int ++request_is_deadlock_safe(znode * node, znode_lock_mode mode, ++ znode_lock_request request) ++{ ++ lock_stack *owner; ++ ++ owner = get_current_lock_stack(); ++ /* ++ * check that hipri lock request is not issued when there are locked ++ * nodes at the higher levels. ++ */ ++ if (request & ZNODE_LOCK_HIPRI && !(request & ZNODE_LOCK_NONBLOCK) && ++ znode_get_level(node) != 0) { ++ lock_handle *item; ++ ++ list_for_each_entry(item, &owner->locks, locks_link) { ++ znode *other; ++ ++ other = item->node; ++ ++ if (znode_get_level(other) == 0) ++ continue; ++ if (znode_get_level(other) > znode_get_level(node)) ++ return 0; ++ } ++ } ++ return 1; ++} ++ ++#endif ++ ++/* return pointer to static storage with name of lock_mode. For ++ debugging */ ++const char *lock_mode_name(znode_lock_mode lock /* lock mode to get name of */ ) ++{ ++ if (lock == ZNODE_READ_LOCK) ++ return "read"; ++ else if (lock == ZNODE_WRITE_LOCK) ++ return "write"; ++ else { ++ static char buf[30]; ++ ++ sprintf(buf, "unknown: %i", lock); ++ return buf; ++ } ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 79 ++ End: ++*/ +diff --git a/fs/reiser4/lock.h b/fs/reiser4/lock.h +new file mode 100644 +index 0000000..e130466 +--- /dev/null ++++ b/fs/reiser4/lock.h +@@ -0,0 +1,249 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Long term locking data structures. See lock.c for details. */ ++ ++#ifndef __LOCK_H__ ++#define __LOCK_H__ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/node/node.h" ++#include "txnmgr.h" ++#include "readahead.h" ++ ++#include ++#include ++#include /* for PAGE_CACHE_SIZE */ ++#include ++#include ++ ++/* Per-znode lock object */ ++struct zlock { ++ spinlock_t guard; ++ /* The number of readers if positive; the number of recursively taken ++ write locks if negative. Protected by zlock spin lock. */ ++ int nr_readers; ++ /* A number of processes (lock_stacks) that have this object ++ locked with high priority */ ++ unsigned nr_hipri_owners; ++ /* A number of attempts to lock znode in high priority direction */ ++ unsigned nr_hipri_requests; ++ /* A linked list of lock_handle objects that contains pointers ++ for all lock_stacks which have this lock object locked */ ++ unsigned nr_hipri_write_requests; ++ struct list_head owners; ++ /* A linked list of lock_stacks that wait for this lock */ ++ struct list_head requestors; ++}; ++ ++static inline void spin_lock_zlock(zlock *lock) ++{ ++ /* check that zlock is not locked */ ++ assert("", LOCK_CNT_NIL(spin_locked_zlock)); ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", LOCK_CNT_NIL(spin_locked_stack)); ++ ++ spin_lock(&lock->guard); ++ ++ LOCK_CNT_INC(spin_locked_zlock); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline void spin_unlock_zlock(zlock *lock) ++{ ++ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_zlock)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(spin_locked_zlock); ++ LOCK_CNT_DEC(spin_locked); ++ ++ spin_unlock(&lock->guard); ++} ++ ++#define lock_is_locked(lock) ((lock)->nr_readers != 0) ++#define lock_is_rlocked(lock) ((lock)->nr_readers > 0) ++#define lock_is_wlocked(lock) ((lock)->nr_readers < 0) ++#define lock_is_wlocked_once(lock) ((lock)->nr_readers == -1) ++#define lock_can_be_rlocked(lock) ((lock)->nr_readers >=0) ++#define lock_mode_compatible(lock, mode) \ ++ (((mode) == ZNODE_WRITE_LOCK && !lock_is_locked(lock)) || \ ++ ((mode) == ZNODE_READ_LOCK && lock_can_be_rlocked(lock))) ++ ++/* Since we have R/W znode locks we need additional bidirectional `link' ++ objects to implement n<->m relationship between lock owners and lock ++ objects. We call them `lock handles'. ++ ++ Locking: see lock.c/"SHORT-TERM LOCKING" ++*/ ++struct lock_handle { ++ /* This flag indicates that a signal to yield a lock was passed to ++ lock owner and counted in owner->nr_signalled ++ ++ Locking: this is accessed under spin lock on ->node. ++ */ ++ int signaled; ++ /* A link to owner of a lock */ ++ lock_stack *owner; ++ /* A link to znode locked */ ++ znode *node; ++ /* A list of all locks for a process */ ++ struct list_head locks_link; ++ /* A list of all owners for a znode */ ++ struct list_head owners_link; ++}; ++ ++typedef struct lock_request { ++ /* A pointer to uninitialized link object */ ++ lock_handle *handle; ++ /* A pointer to the object we want to lock */ ++ znode *node; ++ /* Lock mode (ZNODE_READ_LOCK or ZNODE_WRITE_LOCK) */ ++ znode_lock_mode mode; ++ /* how dispatch_lock_requests() returns lock request result code */ ++ int ret_code; ++} lock_request; ++ ++/* A lock stack structure for accumulating locks owned by a process */ ++struct lock_stack { ++ /* A guard lock protecting a lock stack */ ++ spinlock_t sguard; ++ /* number of znodes which were requested by high priority processes */ ++ atomic_t nr_signaled; ++ /* Current priority of a process ++ ++ This is only accessed by the current thread and thus requires no ++ locking. ++ */ ++ int curpri; ++ /* A list of all locks owned by this process. Elements can be added to ++ * this list only by the current thread. ->node pointers in this list ++ * can be only changed by the current thread. */ ++ struct list_head locks; ++ /* When lock_stack waits for the lock, it puts itself on double-linked ++ requestors list of that lock */ ++ struct list_head requestors_link; ++ /* Current lock request info. ++ ++ This is only accessed by the current thread and thus requires no ++ locking. ++ */ ++ lock_request request; ++ /* the following two fields are the lock stack's ++ * synchronization object to use with the standard linux/wait.h ++ * interface. See reiser4_go_to_sleep and __reiser4_wake_up for ++ * usage details. */ ++ wait_queue_head_t wait; ++ atomic_t wakeup; ++#if REISER4_DEBUG ++ int nr_locks; /* number of lock handles in the above list */ ++#endif ++}; ++ ++/* ++ User-visible znode locking functions ++*/ ++ ++extern int longterm_lock_znode(lock_handle * handle, ++ znode * node, ++ znode_lock_mode mode, ++ znode_lock_request request); ++ ++extern void longterm_unlock_znode(lock_handle * handle); ++ ++extern int reiser4_check_deadlock(void); ++ ++extern lock_stack *get_current_lock_stack(void); ++ ++extern void init_lock_stack(lock_stack * owner); ++extern void reiser4_init_lock(zlock * lock); ++ ++static inline void init_lh(lock_handle *lh) ++{ ++#if REISER4_DEBUG ++ memset(lh, 0, sizeof *lh); ++ INIT_LIST_HEAD(&lh->locks_link); ++ INIT_LIST_HEAD(&lh->owners_link); ++#else ++ lh->node = NULL; ++#endif ++} ++ ++static inline void done_lh(lock_handle *lh) ++{ ++ assert("zam-342", lh != NULL); ++ if (lh->node != NULL) ++ longterm_unlock_znode(lh); ++} ++ ++extern void move_lh(lock_handle * new, lock_handle * old); ++extern void copy_lh(lock_handle * new, lock_handle * old); ++ ++extern int reiser4_prepare_to_sleep(lock_stack * owner); ++extern void reiser4_go_to_sleep(lock_stack * owner); ++extern void __reiser4_wake_up(lock_stack * owner); ++ ++extern int lock_stack_isclean(lock_stack * owner); ++ ++/* zlock object state check macros: only used in assertions. Both forms imply that the ++ lock is held by the current thread. */ ++extern int znode_is_write_locked(const znode *); ++extern void reiser4_invalidate_lock(lock_handle *); ++ ++/* lock ordering is: first take zlock spin lock, then lock stack spin lock */ ++#define spin_ordering_pred_stack(stack) \ ++ (LOCK_CNT_NIL(spin_locked_stack) && \ ++ LOCK_CNT_NIL(spin_locked_txnmgr) && \ ++ LOCK_CNT_NIL(spin_locked_inode) && \ ++ LOCK_CNT_NIL(rw_locked_cbk_cache) && \ ++ LOCK_CNT_NIL(spin_locked_super_eflush) ) ++ ++static inline void spin_lock_stack(lock_stack *stack) ++{ ++ assert("", spin_ordering_pred_stack(stack)); ++ spin_lock(&(stack->sguard)); ++ LOCK_CNT_INC(spin_locked_stack); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline void spin_unlock_stack(lock_stack *stack) ++{ ++ assert_spin_locked(&(stack->sguard)); ++ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_stack)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ LOCK_CNT_DEC(spin_locked_stack); ++ LOCK_CNT_DEC(spin_locked); ++ spin_unlock(&(stack->sguard)); ++} ++ ++static inline void reiser4_wake_up(lock_stack * owner) ++{ ++ spin_lock_stack(owner); ++ __reiser4_wake_up(owner); ++ spin_unlock_stack(owner); ++} ++ ++const char *lock_mode_name(znode_lock_mode lock); ++ ++#if REISER4_DEBUG ++extern void check_lock_data(void); ++extern void check_lock_node_data(znode * node); ++#else ++#define check_lock_data() noop ++#define check_lock_node_data() noop ++#endif ++ ++/* __LOCK_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/oid.c b/fs/reiser4/oid.c +new file mode 100644 +index 0000000..f311d06 +--- /dev/null ++++ b/fs/reiser4/oid.c +@@ -0,0 +1,141 @@ ++/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "debug.h" ++#include "super.h" ++#include "txnmgr.h" ++ ++/* we used to have oid allocation plugin. It was removed because it ++ was recognized as providing unneeded level of abstraction. If one ++ ever will find it useful - look at yet_unneeded_abstractions/oid ++*/ ++ ++/* ++ * initialize in-memory data for oid allocator at @super. @nr_files and @next ++ * are provided by disk format plugin that reads them from the disk during ++ * mount. ++ */ ++int oid_init_allocator(struct super_block *super, oid_t nr_files, oid_t next) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = get_super_private(super); ++ ++ sbinfo->next_to_use = next; ++ sbinfo->oids_in_use = nr_files; ++ return 0; ++} ++ ++/* ++ * allocate oid and return it. ABSOLUTE_MAX_OID is returned when allocator ++ * runs out of oids. ++ */ ++oid_t oid_allocate(struct super_block * super) ++{ ++ reiser4_super_info_data *sbinfo; ++ oid_t oid; ++ ++ sbinfo = get_super_private(super); ++ ++ spin_lock_reiser4_super(sbinfo); ++ if (sbinfo->next_to_use != ABSOLUTE_MAX_OID) { ++ oid = sbinfo->next_to_use++; ++ sbinfo->oids_in_use++; ++ } else ++ oid = ABSOLUTE_MAX_OID; ++ spin_unlock_reiser4_super(sbinfo); ++ return oid; ++} ++ ++/* ++ * Tell oid allocator that @oid is now free. ++ */ ++int oid_release(struct super_block *super, oid_t oid UNUSED_ARG) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = get_super_private(super); ++ ++ spin_lock_reiser4_super(sbinfo); ++ sbinfo->oids_in_use--; ++ spin_unlock_reiser4_super(sbinfo); ++ return 0; ++} ++ ++/* ++ * return next @oid that would be allocated (i.e., returned by oid_allocate()) ++ * without actually allocating it. This is used by disk format plugin to save ++ * oid allocator state on the disk. ++ */ ++oid_t oid_next(const struct super_block * super) ++{ ++ reiser4_super_info_data *sbinfo; ++ oid_t oid; ++ ++ sbinfo = get_super_private(super); ++ ++ spin_lock_reiser4_super(sbinfo); ++ oid = sbinfo->next_to_use; ++ spin_unlock_reiser4_super(sbinfo); ++ return oid; ++} ++ ++/* ++ * returns number of currently used oids. This is used by statfs(2) to report ++ * number of "inodes" and by disk format plugin to save oid allocator state on ++ * the disk. ++ */ ++long oids_used(const struct super_block *super) ++{ ++ reiser4_super_info_data *sbinfo; ++ oid_t used; ++ ++ sbinfo = get_super_private(super); ++ ++ spin_lock_reiser4_super(sbinfo); ++ used = sbinfo->oids_in_use; ++ spin_unlock_reiser4_super(sbinfo); ++ if (used < (__u64) ((long)~0) >> 1) ++ return (long)used; ++ else ++ return (long)-1; ++} ++ ++/* ++ * Count oid as allocated in atom. This is done after call to oid_allocate() ++ * at the point when we are irrevocably committed to creation of the new file ++ * (i.e., when oid allocation cannot be any longer rolled back due to some ++ * error). ++ */ ++void oid_count_allocated(void) ++{ ++ txn_atom *atom; ++ ++ atom = get_current_atom_locked(); ++ atom->nr_objects_created++; ++ spin_unlock_atom(atom); ++} ++ ++/* ++ * Count oid as free in atom. This is done after call to oid_release() at the ++ * point when we are irrevocably committed to the deletion of the file (i.e., ++ * when oid release cannot be any longer rolled back due to some error). ++ */ ++void oid_count_released(void) ++{ ++ txn_atom *atom; ++ ++ atom = get_current_atom_locked(); ++ atom->nr_objects_deleted++; ++ spin_unlock_atom(atom); ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/page_cache.c b/fs/reiser4/page_cache.c +new file mode 100644 +index 0000000..e1f436d +--- /dev/null ++++ b/fs/reiser4/page_cache.c +@@ -0,0 +1,736 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Memory pressure hooks. Fake inodes handling. */ ++ ++/* GLOSSARY ++ ++ . Formatted and unformatted nodes. ++ Elements of reiser4 balanced tree to store data and metadata. ++ Unformatted nodes are pointed to by extent pointers. Such nodes ++ are used to store data of large objects. Unlike unformatted nodes, ++ formatted ones have associated format described by node4X plugin. ++ ++ . Jnode (or journal node) ++ The in-memory header which is used to track formatted and unformatted ++ nodes, bitmap nodes, etc. In particular, jnodes are used to track ++ transactional information associated with each block(see reiser4/jnode.c ++ for details). ++ ++ . Znode ++ The in-memory header which is used to track formatted nodes. Contains ++ embedded jnode (see reiser4/znode.c for details). ++*/ ++ ++/* We store all file system meta data (and data, of course) in the page cache. ++ ++ What does this mean? In stead of using bread/brelse we create special ++ "fake" inode (one per super block) and store content of formatted nodes ++ into pages bound to this inode in the page cache. In newer kernels bread() ++ already uses inode attached to block device (bd_inode). Advantage of having ++ our own fake inode is that we can install appropriate methods in its ++ address_space operations. Such methods are called by VM on memory pressure ++ (or during background page flushing) and we can use them to react ++ appropriately. ++ ++ In initial version we only support one block per page. Support for multiple ++ blocks per page is complicated by relocation. ++ ++ To each page, used by reiser4, jnode is attached. jnode is analogous to ++ buffer head. Difference is that jnode is bound to the page permanently: ++ jnode cannot be removed from memory until its backing page is. ++ ++ jnode contain pointer to page (->pg field) and page contain pointer to ++ jnode in ->private field. Pointer from jnode to page is protected to by ++ jnode's spinlock and pointer from page to jnode is protected by page lock ++ (PG_locked bit). Lock ordering is: first take page lock, then jnode spin ++ lock. To go into reverse direction use jnode_lock_page() function that uses ++ standard try-lock-and-release device. ++ ++ Properties: ++ ++ 1. when jnode-to-page mapping is established (by jnode_attach_page()), page ++ reference counter is increased. ++ ++ 2. when jnode-to-page mapping is destroyed (by page_clear_jnode(), page ++ reference counter is decreased. ++ ++ 3. on jload() reference counter on jnode page is increased, page is ++ kmapped and `referenced'. ++ ++ 4. on jrelse() inverse operations are performed. ++ ++ 5. kmapping/kunmapping of unformatted pages is done by read/write methods. ++ ++ DEADLOCKS RELATED TO MEMORY PRESSURE. [OUTDATED. Only interesting ++ historically.] ++ ++ [In the following discussion, `lock' invariably means long term lock on ++ znode.] (What about page locks?) ++ ++ There is some special class of deadlock possibilities related to memory ++ pressure. Locks acquired by other reiser4 threads are accounted for in ++ deadlock prevention mechanism (lock.c), but when ->vm_writeback() is ++ invoked additional hidden arc is added to the locking graph: thread that ++ tries to allocate memory waits for ->vm_writeback() to finish. If this ++ thread keeps lock and ->vm_writeback() tries to acquire this lock, deadlock ++ prevention is useless. ++ ++ Another related problem is possibility for ->vm_writeback() to run out of ++ memory itself. This is not a problem for ext2 and friends, because their ++ ->vm_writeback() don't allocate much memory, but reiser4 flush is ++ definitely able to allocate huge amounts of memory. ++ ++ It seems that there is no reliable way to cope with the problems above. In ++ stead it was decided that ->vm_writeback() (as invoked in the kswapd ++ context) wouldn't perform any flushing itself, but rather should just wake ++ up some auxiliary thread dedicated for this purpose (or, the same thread ++ that does periodic commit of old atoms (ktxnmgrd.c)). ++ ++ Details: ++ ++ 1. Page is called `reclaimable' against particular reiser4 mount F if this ++ page can be ultimately released by try_to_free_pages() under presumptions ++ that: ++ ++ a. ->vm_writeback() for F is no-op, and ++ ++ b. none of the threads accessing F are making any progress, and ++ ++ c. other reiser4 mounts obey the same memory reservation protocol as F ++ (described below). ++ ++ For example, clean un-pinned page, or page occupied by ext2 data are ++ reclaimable against any reiser4 mount. ++ ++ When there is more than one reiser4 mount in a system, condition (c) makes ++ reclaim-ability not easily verifiable beyond trivial cases mentioned above. ++ ++ THIS COMMENT IS VALID FOR "MANY BLOCKS ON PAGE" CASE ++ ++ Fake inode is used to bound formatted nodes and each node is indexed within ++ fake inode by its block number. If block size of smaller than page size, it ++ may so happen that block mapped to the page with formatted node is occupied ++ by unformatted node or is unallocated. This lead to some complications, ++ because flushing whole page can lead to an incorrect overwrite of ++ unformatted node that is moreover, can be cached in some other place as ++ part of the file body. To avoid this, buffers for unformatted nodes are ++ never marked dirty. Also pages in the fake are never marked dirty. This ++ rules out usage of ->writepage() as memory pressure hook. In stead ++ ->releasepage() is used. ++ ++ Josh is concerned that page->buffer is going to die. This should not pose ++ significant problem though, because we need to add some data structures to ++ the page anyway (jnode) and all necessary book keeping can be put there. ++ ++*/ ++ ++/* Life cycle of pages/nodes. ++ ++ jnode contains reference to page and page contains reference back to ++ jnode. This reference is counted in page ->count. Thus, page bound to jnode ++ cannot be released back into free pool. ++ ++ 1. Formatted nodes. ++ ++ 1. formatted node is represented by znode. When new znode is created its ++ ->pg pointer is NULL initially. ++ ++ 2. when node content is loaded into znode (by call to zload()) for the ++ first time following happens (in call to ->read_node() or ++ ->allocate_node()): ++ ++ 1. new page is added to the page cache. ++ ++ 2. this page is attached to znode and its ->count is increased. ++ ++ 3. page is kmapped. ++ ++ 3. if more calls to zload() follow (without corresponding zrelses), page ++ counter is left intact and in its stead ->d_count is increased in znode. ++ ++ 4. each call to zrelse decreases ->d_count. When ->d_count drops to zero ++ ->release_node() is called and page is kunmapped as result. ++ ++ 5. at some moment node can be captured by a transaction. Its ->x_count ++ is then increased by transaction manager. ++ ++ 6. if node is removed from the tree (empty node with JNODE_HEARD_BANSHEE ++ bit set) following will happen (also see comment at the top of znode.c): ++ ++ 1. when last lock is released, node will be uncaptured from ++ transaction. This released reference that transaction manager acquired ++ at the step 5. ++ ++ 2. when last reference is released, zput() detects that node is ++ actually deleted and calls ->delete_node() ++ operation. page_cache_delete_node() implementation detaches jnode from ++ page and releases page. ++ ++ 7. otherwise (node wasn't removed from the tree), last reference to ++ znode will be released after transaction manager committed transaction ++ node was in. This implies squallocing of this node (see ++ flush.c). Nothing special happens at this point. Znode is still in the ++ hash table and page is still attached to it. ++ ++ 8. znode is actually removed from the memory because of the memory ++ pressure, or during umount (znodes_tree_done()). Anyway, znode is ++ removed by the call to zdrop(). At this moment, page is detached from ++ znode and removed from the inode address space. ++ ++*/ ++ ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree.h" ++#include "vfs_ops.h" ++#include "inode.h" ++#include "super.h" ++#include "entd.h" ++#include "page_cache.h" ++#include "ktxnmgrd.h" ++ ++#include ++#include ++#include /* for struct page */ ++#include /* for struct page */ ++#include ++#include ++#include ++#include ++ ++static struct bio *page_bio(struct page *, jnode *, int rw, gfp_t gfp); ++ ++static struct address_space_operations formatted_fake_as_ops; ++ ++static const oid_t fake_ino = 0x1; ++static const oid_t bitmap_ino = 0x2; ++static const oid_t cc_ino = 0x3; ++ ++static void ++init_fake_inode(struct super_block *super, struct inode *fake, ++ struct inode **pfake) ++{ ++ assert("nikita-2168", fake->i_state & I_NEW); ++ fake->i_mapping->a_ops = &formatted_fake_as_ops; ++ *pfake = fake; ++ /* NOTE-NIKITA something else? */ ++ unlock_new_inode(fake); ++} ++ ++/** ++ * reiser4_init_formatted_fake - iget inodes for formatted nodes and bitmaps ++ * @super: super block to init fake inode for ++ * ++ * Initializes fake inode to which formatted nodes are bound in the page cache ++ * and inode for bitmaps. ++ */ ++int reiser4_init_formatted_fake(struct super_block *super) ++{ ++ struct inode *fake; ++ struct inode *bitmap; ++ struct inode *cc; ++ reiser4_super_info_data *sinfo; ++ ++ assert("nikita-1703", super != NULL); ++ ++ sinfo = get_super_private_nocheck(super); ++ fake = iget_locked(super, oid_to_ino(fake_ino)); ++ ++ if (fake != NULL) { ++ init_fake_inode(super, fake, &sinfo->fake); ++ ++ bitmap = iget_locked(super, oid_to_ino(bitmap_ino)); ++ if (bitmap != NULL) { ++ init_fake_inode(super, bitmap, &sinfo->bitmap); ++ ++ cc = iget_locked(super, oid_to_ino(cc_ino)); ++ if (cc != NULL) { ++ init_fake_inode(super, cc, &sinfo->cc); ++ return 0; ++ } else { ++ iput(sinfo->fake); ++ iput(sinfo->bitmap); ++ sinfo->fake = NULL; ++ sinfo->bitmap = NULL; ++ } ++ } else { ++ iput(sinfo->fake); ++ sinfo->fake = NULL; ++ } ++ } ++ return RETERR(-ENOMEM); ++} ++ ++/** ++ * reiser4_done_formatted_fake - release inode used by formatted nodes and bitmaps ++ * @super: super block to init fake inode for ++ * ++ * Releases inodes which were used as address spaces of bitmap and formatted ++ * nodes. ++ */ ++void reiser4_done_formatted_fake(struct super_block *super) ++{ ++ reiser4_super_info_data *sinfo; ++ ++ sinfo = get_super_private_nocheck(super); ++ ++ if (sinfo->fake != NULL) { ++ iput(sinfo->fake); ++ sinfo->fake = NULL; ++ } ++ ++ if (sinfo->bitmap != NULL) { ++ iput(sinfo->bitmap); ++ sinfo->bitmap = NULL; ++ } ++ ++ if (sinfo->cc != NULL) { ++ iput(sinfo->cc); ++ sinfo->cc = NULL; ++ } ++ return; ++} ++ ++void reiser4_wait_page_writeback(struct page *page) ++{ ++ assert("zam-783", PageLocked(page)); ++ ++ do { ++ unlock_page(page); ++ wait_on_page_writeback(page); ++ lock_page(page); ++ } while (PageWriteback(page)); ++} ++ ++/* return tree @page is in */ ++reiser4_tree *reiser4_tree_by_page(const struct page *page /* page to query */ ) ++{ ++ assert("nikita-2461", page != NULL); ++ return &get_super_private(page->mapping->host->i_sb)->tree; ++} ++ ++/* completion handler for single page bio-based read. ++ ++ mpage_end_io_read() would also do. But it's static. ++ ++*/ ++static int ++end_bio_single_page_read(struct bio *bio, unsigned int bytes_done UNUSED_ARG, ++ int err UNUSED_ARG) ++{ ++ struct page *page; ++ ++ if (bio->bi_size != 0) { ++ warning("nikita-3332", "Truncated single page read: %i", ++ bio->bi_size); ++ return 1; ++ } ++ ++ page = bio->bi_io_vec[0].bv_page; ++ ++ if (test_bit(BIO_UPTODATE, &bio->bi_flags)) { ++ SetPageUptodate(page); ++ } else { ++ ClearPageUptodate(page); ++ SetPageError(page); ++ } ++ unlock_page(page); ++ bio_put(bio); ++ return 0; ++} ++ ++/* completion handler for single page bio-based write. ++ ++ mpage_end_io_write() would also do. But it's static. ++ ++*/ ++static int ++end_bio_single_page_write(struct bio *bio, unsigned int bytes_done UNUSED_ARG, ++ int err UNUSED_ARG) ++{ ++ struct page *page; ++ ++ if (bio->bi_size != 0) { ++ warning("nikita-3333", "Truncated single page write: %i", ++ bio->bi_size); ++ return 1; ++ } ++ ++ page = bio->bi_io_vec[0].bv_page; ++ ++ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) ++ SetPageError(page); ++ end_page_writeback(page); ++ bio_put(bio); ++ return 0; ++} ++ ++/* ->readpage() method for formatted nodes */ ++static int formatted_readpage(struct file *f UNUSED_ARG, ++ struct page *page /* page to read */ ) ++{ ++ assert("nikita-2412", PagePrivate(page) && jprivate(page)); ++ return reiser4_page_io(page, jprivate(page), READ, ++ reiser4_ctx_gfp_mask_get()); ++} ++ ++/** ++ * reiser4_page_io - submit single-page bio request ++ * @page: page to perform io for ++ * @node: jnode of page ++ * @rw: read or write ++ * @gfp: gfp mask for bio allocation ++ * ++ * Submits single page read or write. ++ */ ++int reiser4_page_io(struct page *page, jnode *node, int rw, gfp_t gfp) ++{ ++ struct bio *bio; ++ int result; ++ ++ assert("nikita-2094", page != NULL); ++ assert("nikita-2226", PageLocked(page)); ++ assert("nikita-2634", node != NULL); ++ assert("nikita-2893", rw == READ || rw == WRITE); ++ ++ if (rw) { ++ if (unlikely(page->mapping->host->i_sb->s_flags & MS_RDONLY)) { ++ unlock_page(page); ++ return 0; ++ } ++ } ++ ++ bio = page_bio(page, node, rw, gfp); ++ if (!IS_ERR(bio)) { ++ if (rw == WRITE) { ++ set_page_writeback(page); ++ unlock_page(page); ++ } ++ reiser4_submit_bio(rw, bio); ++ result = 0; ++ } else { ++ unlock_page(page); ++ result = PTR_ERR(bio); ++ } ++ ++ return result; ++} ++ ++/* helper function to construct bio for page */ ++static struct bio *page_bio(struct page *page, jnode * node, int rw, gfp_t gfp) ++{ ++ struct bio *bio; ++ assert("nikita-2092", page != NULL); ++ assert("nikita-2633", node != NULL); ++ ++ /* Simple implementation in the assumption that blocksize == pagesize. ++ ++ We only have to submit one block, but submit_bh() will allocate bio ++ anyway, so lets use all the bells-and-whistles of bio code. ++ */ ++ ++ bio = bio_alloc(gfp, 1); ++ if (bio != NULL) { ++ int blksz; ++ struct super_block *super; ++ reiser4_block_nr blocknr; ++ ++ super = page->mapping->host->i_sb; ++ assert("nikita-2029", super != NULL); ++ blksz = super->s_blocksize; ++ assert("nikita-2028", blksz == (int)PAGE_CACHE_SIZE); ++ ++ spin_lock_jnode(node); ++ blocknr = *jnode_get_io_block(node); ++ spin_unlock_jnode(node); ++ ++ assert("nikita-2275", blocknr != (reiser4_block_nr) 0); ++ assert("nikita-2276", !reiser4_blocknr_is_fake(&blocknr)); ++ ++ bio->bi_bdev = super->s_bdev; ++ /* fill bio->bi_sector before calling bio_add_page(), because ++ * q->merge_bvec_fn may want to inspect it (see ++ * drivers/md/linear.c:linear_mergeable_bvec() for example. */ ++ bio->bi_sector = blocknr * (blksz >> 9); ++ ++ if (!bio_add_page(bio, page, blksz, 0)) { ++ warning("nikita-3452", ++ "Single page bio cannot be constructed"); ++ return ERR_PTR(RETERR(-EINVAL)); ++ } ++ ++ /* bio -> bi_idx is filled by bio_init() */ ++ bio->bi_end_io = (rw == READ) ? ++ end_bio_single_page_read : end_bio_single_page_write; ++ ++ return bio; ++ } else ++ return ERR_PTR(RETERR(-ENOMEM)); ++} ++ ++/* this function is internally called by jnode_make_dirty() */ ++int reiser4_set_page_dirty_internal(struct page *page) ++{ ++ struct address_space *mapping; ++ ++ mapping = page->mapping; ++ BUG_ON(mapping == NULL); ++ ++ if (!TestSetPageDirty(page)) { ++ if (mapping_cap_account_dirty(mapping)) ++ inc_zone_page_state(page, NR_FILE_DIRTY); ++ ++ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); ++ } ++ ++ /* znode must be dirty ? */ ++ if (mapping->host == reiser4_get_super_fake(mapping->host->i_sb)) ++ assert("", JF_ISSET(jprivate(page), JNODE_DIRTY)); ++ return 0; ++} ++ ++#if REISER4_DEBUG ++ ++/** ++ * can_hit_entd ++ * ++ * This is used on ++ */ ++static int can_hit_entd(reiser4_context *ctx, struct super_block *s) ++{ ++ if (ctx == NULL || ((unsigned long)ctx->magic) != context_magic) ++ return 1; ++ if (ctx->super != s) ++ return 1; ++ if (get_super_private(s)->entd.tsk == current) ++ return 0; ++ if (!lock_stack_isclean(&ctx->stack)) ++ return 0; ++ if (ctx->trans->atom != NULL) ++ return 0; ++ return 1; ++} ++ ++#endif ++ ++/** ++ * reiser4_writepage - writepage of struct address_space_operations ++ * @page: page to write ++ * @wbc: ++ * ++ * ++ */ ++/* Common memory pressure notification. */ ++int reiser4_writepage(struct page *page, ++ struct writeback_control *wbc) ++{ ++ struct super_block *s; ++ reiser4_context *ctx; ++ ++ assert("vs-828", PageLocked(page)); ++ ++ s = page->mapping->host->i_sb; ++ ctx = get_current_context_check(); ++ ++ assert("", can_hit_entd(ctx, s)); ++ ++ return write_page_by_ent(page, wbc); ++} ++ ++/* ->set_page_dirty() method of formatted address_space */ ++static int formatted_set_page_dirty(struct page *page) ++{ ++ assert("nikita-2173", page != NULL); ++ BUG(); ++ return __set_page_dirty_nobuffers(page); ++} ++ ++/* writepages method of address space operations in reiser4 is used to involve ++ into transactions pages which are dirtied via mmap. Only regular files can ++ have such pages. Fake inode is used to access formatted nodes via page ++ cache. As formatted nodes can never be mmaped, fake inode's writepages has ++ nothing to do */ ++static int ++writepages_fake(struct address_space *mapping, struct writeback_control *wbc) ++{ ++ return 0; ++} ++ ++/* address space operations for the fake inode */ ++static struct address_space_operations formatted_fake_as_ops = { ++ /* Perform a writeback of a single page as a memory-freeing ++ * operation. */ ++ .writepage = reiser4_writepage, ++ /* this is called to read formatted node */ ++ .readpage = formatted_readpage, ++ /* ->sync_page() method of fake inode address space operations. Called ++ from wait_on_page() and lock_page(). ++ ++ This is most annoyingly misnomered method. Actually it is called ++ from wait_on_page_bit() and lock_page() and its purpose is to ++ actually start io by jabbing device drivers. ++ */ ++ .sync_page = block_sync_page, ++ /* Write back some dirty pages from this mapping. Called from sync. ++ called during sync (pdflush) */ ++ .writepages = writepages_fake, ++ /* Set a page dirty */ ++ .set_page_dirty = formatted_set_page_dirty, ++ /* used for read-ahead. Not applicable */ ++ .readpages = NULL, ++ .prepare_write = NULL, ++ .commit_write = NULL, ++ .bmap = NULL, ++ /* called just before page is being detached from inode mapping and ++ removed from memory. Called on truncate, cut/squeeze, and ++ umount. */ ++ .invalidatepage = reiser4_invalidatepage, ++ /* this is called by shrink_cache() so that file system can try to ++ release objects (jnodes, buffers, journal heads) attached to page ++ and, may be made page itself free-able. ++ */ ++ .releasepage = reiser4_releasepage, ++ .direct_IO = NULL ++}; ++ ++/* called just before page is released (no longer used by reiser4). Callers: ++ jdelete() and extent2tail(). */ ++void reiser4_drop_page(struct page *page) ++{ ++ assert("nikita-2181", PageLocked(page)); ++ clear_page_dirty_for_io(page); ++ ClearPageUptodate(page); ++#if defined(PG_skipped) ++ ClearPageSkipped(page); ++#endif ++ unlock_page(page); ++} ++ ++#define JNODE_GANG_SIZE (16) ++ ++/* find all jnodes from range specified and invalidate them */ ++static int ++truncate_jnodes_range(struct inode *inode, pgoff_t from, pgoff_t count) ++{ ++ reiser4_inode *info; ++ int truncated_jnodes; ++ reiser4_tree *tree; ++ unsigned long index; ++ unsigned long end; ++ ++ if (inode_file_plugin(inode) == ++ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)) ++ /* No need to get rid of jnodes here: if the single jnode of ++ page cluster did not have page, then it was found and killed ++ before in ++ truncate_page_cluster_cryptcompress()->jput()->jput_final(), ++ otherwise it will be dropped by reiser4_invalidatepage() */ ++ return 0; ++ truncated_jnodes = 0; ++ ++ info = reiser4_inode_data(inode); ++ tree = reiser4_tree_by_inode(inode); ++ ++ index = from; ++ end = from + count; ++ ++ while (1) { ++ jnode *gang[JNODE_GANG_SIZE]; ++ int taken; ++ int i; ++ jnode *node; ++ ++ assert("nikita-3466", index <= end); ++ ++ read_lock_tree(tree); ++ taken = ++ radix_tree_gang_lookup(jnode_tree_by_reiser4_inode(info), ++ (void **)gang, index, ++ JNODE_GANG_SIZE); ++ for (i = 0; i < taken; ++i) { ++ node = gang[i]; ++ if (index_jnode(node) < end) ++ jref(node); ++ else ++ gang[i] = NULL; ++ } ++ read_unlock_tree(tree); ++ ++ for (i = 0; i < taken; ++i) { ++ node = gang[i]; ++ if (node != NULL) { ++ index = max(index, index_jnode(node)); ++ spin_lock_jnode(node); ++ assert("edward-1457", node->pg == NULL); ++ /* this is always called after ++ truncate_inode_pages_range(). Therefore, here ++ jnode can not have page. New pages can not be ++ created because truncate_jnodes_range goes ++ under exclusive access on file obtained, ++ where as new page creation requires ++ non-exclusive access obtained */ ++ JF_SET(node, JNODE_HEARD_BANSHEE); ++ reiser4_uncapture_jnode(node); ++ unhash_unformatted_jnode(node); ++ truncated_jnodes++; ++ jput(node); ++ } else ++ break; ++ } ++ if (i != taken || taken == 0) ++ break; ++ } ++ return truncated_jnodes; ++} ++ ++/* Truncating files in reiser4: problems and solutions. ++ ++ VFS calls fs's truncate after it has called truncate_inode_pages() ++ to get rid of pages corresponding to part of file being truncated. ++ In reiser4 it may cause existence of unallocated extents which do ++ not have jnodes. Flush code does not expect that. Solution of this ++ problem is straightforward. As vfs's truncate is implemented using ++ setattr operation, it seems reasonable to have ->setattr() that ++ will cut file body. However, flush code also does not expect dirty ++ pages without parent items, so it is impossible to cut all items, ++ then truncate all pages in two steps. We resolve this problem by ++ cutting items one-by-one. Each such fine-grained step performed ++ under longterm znode lock calls at the end ->kill_hook() method of ++ a killed item to remove its binded pages and jnodes. ++ ++ The following function is a common part of mentioned kill hooks. ++ Also, this is called before tail-to-extent conversion (to not manage ++ few copies of the data). ++*/ ++void reiser4_invalidate_pages(struct address_space *mapping, pgoff_t from, ++ unsigned long count, int even_cows) ++{ ++ loff_t from_bytes, count_bytes; ++ ++ if (count == 0) ++ return; ++ from_bytes = ((loff_t) from) << PAGE_CACHE_SHIFT; ++ count_bytes = ((loff_t) count) << PAGE_CACHE_SHIFT; ++ ++ unmap_mapping_range(mapping, from_bytes, count_bytes, even_cows); ++ truncate_inode_pages_range(mapping, from_bytes, ++ from_bytes + count_bytes - 1); ++ truncate_jnodes_range(mapping->host, from, count); ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 120 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/page_cache.h b/fs/reiser4/page_cache.h +new file mode 100644 +index 0000000..ab74f8f +--- /dev/null ++++ b/fs/reiser4/page_cache.h +@@ -0,0 +1,68 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++/* Memory pressure hooks. Fake inodes handling. See page_cache.c. */ ++ ++#if !defined( __REISER4_PAGE_CACHE_H__ ) ++#define __REISER4_PAGE_CACHE_H__ ++ ++#include "forward.h" ++#include "context.h" /* for reiser4_ctx_gfp_mask_get() */ ++ ++#include /* for struct super_block, address_space */ ++#include /* for struct page */ ++#include /* for lock_page() */ ++#include /* for __vmalloc() */ ++ ++extern int reiser4_init_formatted_fake(struct super_block *); ++extern void reiser4_done_formatted_fake(struct super_block *); ++ ++extern reiser4_tree *reiser4_tree_by_page(const struct page *); ++ ++extern int reiser4_set_page_dirty_internal(struct page *); ++ ++#define reiser4_submit_bio(rw, bio) submit_bio((rw), (bio)) ++ ++extern void reiser4_wait_page_writeback(struct page *); ++static inline void lock_and_wait_page_writeback(struct page *page) ++{ ++ lock_page(page); ++ if (unlikely(PageWriteback(page))) ++ reiser4_wait_page_writeback(page); ++} ++ ++#define jprivate(page) ((jnode *)page_private(page)) ++ ++extern int reiser4_page_io(struct page *, jnode *, int rw, gfp_t); ++extern void reiser4_drop_page(struct page *); ++extern void reiser4_invalidate_pages(struct address_space *, pgoff_t from, ++ unsigned long count, int even_cows); ++extern void capture_reiser4_inodes(struct super_block *, ++ struct writeback_control *); ++static inline void * reiser4_vmalloc (unsigned long size) ++{ ++ return __vmalloc(size, ++ reiser4_ctx_gfp_mask_get() | __GFP_HIGHMEM, ++ PAGE_KERNEL); ++} ++ ++#define PAGECACHE_TAG_REISER4_MOVED PAGECACHE_TAG_DIRTY ++ ++#if REISER4_DEBUG ++extern void print_page(const char *prefix, struct page *page); ++#else ++#define print_page(prf, p) noop ++#endif ++ ++/* __REISER4_PAGE_CACHE_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/Makefile b/fs/reiser4/plugin/Makefile +new file mode 100644 +index 0000000..4b2c9f8 +--- /dev/null ++++ b/fs/reiser4/plugin/Makefile +@@ -0,0 +1,26 @@ ++obj-$(CONFIG_REISER4_FS) += plugins.o ++ ++plugins-objs := \ ++ plugin.o \ ++ plugin_set.o \ ++ object.o \ ++ inode_ops.o \ ++ inode_ops_rename.o \ ++ file_ops.o \ ++ file_ops_readdir.o \ ++ file_plugin_common.o \ ++ dir_plugin_common.o \ ++ digest.o \ ++ hash.o \ ++ fibration.o \ ++ tail_policy.o \ ++ regular.o ++ ++obj-$(CONFIG_REISER4_FS) += item/ ++obj-$(CONFIG_REISER4_FS) += file/ ++obj-$(CONFIG_REISER4_FS) += dir/ ++obj-$(CONFIG_REISER4_FS) += node/ ++obj-$(CONFIG_REISER4_FS) += compress/ ++obj-$(CONFIG_REISER4_FS) += space/ ++obj-$(CONFIG_REISER4_FS) += disk_format/ ++obj-$(CONFIG_REISER4_FS) += security/ +diff --git a/fs/reiser4/plugin/cluster.c b/fs/reiser4/plugin/cluster.c +new file mode 100644 +index 0000000..b400d5f +--- /dev/null ++++ b/fs/reiser4/plugin/cluster.c +@@ -0,0 +1,71 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Contains reiser4 cluster plugins (see ++ http://www.namesys.com/cryptcompress_design.html ++ "Concepts of clustering" for details). */ ++ ++#include "plugin_header.h" ++#include "plugin.h" ++#include "../inode.h" ++ ++static int change_cluster(struct inode *inode, ++ reiser4_plugin * plugin, ++ pset_member memb) ++{ ++ assert("edward-1324", inode != NULL); ++ assert("edward-1325", plugin != NULL); ++ assert("edward-1326", is_reiser4_inode(inode)); ++ assert("edward-1327", plugin->h.type_id == REISER4_CLUSTER_PLUGIN_TYPE); ++ ++ /* Can't change the cluster plugin for already existent regular files. */ ++ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE)) ++ return RETERR(-EINVAL); ++ ++ /* If matches, nothing to change. */ ++ if (inode_hash_plugin(inode) != NULL && ++ inode_hash_plugin(inode)->h.id == plugin->h.id) ++ return 0; ++ ++ return aset_set_unsafe(&reiser4_inode_data(inode)->pset, ++ PSET_CLUSTER, plugin); ++} ++ ++static reiser4_plugin_ops cluster_plugin_ops = { ++ .init = NULL, ++ .load = NULL, ++ .save_len = NULL, ++ .save = NULL, ++ .change = &change_cluster ++}; ++ ++#define SUPPORT_CLUSTER(SHIFT, ID, LABEL, DESC) \ ++ [CLUSTER_ ## ID ## _ID] = { \ ++ .h = { \ ++ .type_id = REISER4_CLUSTER_PLUGIN_TYPE, \ ++ .id = CLUSTER_ ## ID ## _ID, \ ++ .pops = &cluster_plugin_ops, \ ++ .label = LABEL, \ ++ .desc = DESC, \ ++ .linkage = {NULL, NULL} \ ++ }, \ ++ .shift = SHIFT \ ++ } ++ ++cluster_plugin cluster_plugins[LAST_CLUSTER_ID] = { ++ SUPPORT_CLUSTER(16, 64K, "64K", "Large"), ++ SUPPORT_CLUSTER(15, 32K, "32K", "Big"), ++ SUPPORT_CLUSTER(14, 16K, "16K", "Average"), ++ SUPPORT_CLUSTER(13, 8K, "8K", "Small"), ++ SUPPORT_CLUSTER(12, 4K, "4K", "Minimal") ++}; ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/cluster.h b/fs/reiser4/plugin/cluster.h +new file mode 100644 +index 0000000..019f156 +--- /dev/null ++++ b/fs/reiser4/plugin/cluster.h +@@ -0,0 +1,343 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* This file contains page/cluster index translators and offset modulators ++ See http://www.namesys.com/cryptcompress_design.html for details */ ++ ++#if !defined( __FS_REISER4_CLUSTER_H__ ) ++#define __FS_REISER4_CLUSTER_H__ ++ ++#include "../inode.h" ++ ++static inline int inode_cluster_shift(struct inode *inode) ++{ ++ assert("edward-92", inode != NULL); ++ assert("edward-93", reiser4_inode_data(inode) != NULL); ++ ++ return inode_cluster_plugin(inode)->shift; ++} ++ ++static inline unsigned cluster_nrpages_shift(struct inode *inode) ++{ ++ return inode_cluster_shift(inode) - PAGE_CACHE_SHIFT; ++} ++ ++/* cluster size in page units */ ++static inline unsigned cluster_nrpages(struct inode *inode) ++{ ++ return 1U << cluster_nrpages_shift(inode); ++} ++ ++static inline size_t inode_cluster_size(struct inode *inode) ++{ ++ assert("edward-96", inode != NULL); ++ ++ return 1U << inode_cluster_shift(inode); ++} ++ ++static inline cloff_t pg_to_clust(pgoff_t idx, struct inode *inode) ++{ ++ return idx >> cluster_nrpages_shift(inode); ++} ++ ++static inline pgoff_t clust_to_pg(cloff_t idx, struct inode *inode) ++{ ++ return idx << cluster_nrpages_shift(inode); ++} ++ ++static inline pgoff_t pg_to_clust_to_pg(pgoff_t idx, struct inode *inode) ++{ ++ return clust_to_pg(pg_to_clust(idx, inode), inode); ++} ++ ++static inline pgoff_t off_to_pg(loff_t off) ++{ ++ return (off >> PAGE_CACHE_SHIFT); ++} ++ ++static inline loff_t pg_to_off(pgoff_t idx) ++{ ++ return ((loff_t) (idx) << PAGE_CACHE_SHIFT); ++} ++ ++static inline cloff_t off_to_clust(loff_t off, struct inode *inode) ++{ ++ return off >> inode_cluster_shift(inode); ++} ++ ++static inline loff_t clust_to_off(cloff_t idx, struct inode *inode) ++{ ++ return (loff_t) idx << inode_cluster_shift(inode); ++} ++ ++static inline unsigned long count_to_nr(loff_t count, unsigned shift) ++{ ++ return (count + (1UL << shift) - 1) >> shift; ++} ++ ++/* number of pages occupied by @count bytes */ ++static inline pgoff_t count_to_nrpages(loff_t count) ++{ ++ return count_to_nr(count, PAGE_CACHE_SHIFT); ++} ++ ++/* number of clusters occupied by @count bytes */ ++static inline cloff_t count_to_nrclust(loff_t count, struct inode *inode) ++{ ++ return count_to_nr(count, inode_cluster_shift(inode)); ++} ++ ++/* number of clusters occupied by @count pages */ ++static inline cloff_t pgcount_to_nrclust(pgoff_t count, struct inode *inode) ++{ ++ return count_to_nr(count, cluster_nrpages_shift(inode)); ++} ++ ++static inline loff_t off_to_clust_to_off(loff_t off, struct inode *inode) ++{ ++ return clust_to_off(off_to_clust(off, inode), inode); ++} ++ ++static inline pgoff_t off_to_clust_to_pg(loff_t off, struct inode *inode) ++{ ++ return clust_to_pg(off_to_clust(off, inode), inode); ++} ++ ++static inline unsigned off_to_pgoff(loff_t off) ++{ ++ return off & (PAGE_CACHE_SIZE - 1); ++} ++ ++static inline unsigned off_to_cloff(loff_t off, struct inode *inode) ++{ ++ return off & ((loff_t) (inode_cluster_size(inode)) - 1); ++} ++ ++static inline unsigned ++pg_to_off_to_cloff(unsigned long idx, struct inode *inode) ++{ ++ return off_to_cloff(pg_to_off(idx), inode); ++} ++ ++/* if @size != 0, returns index of the page ++ which contains the last byte of the file */ ++static inline pgoff_t size_to_pg(loff_t size) ++{ ++ return (size ? off_to_pg(size - 1) : 0); ++} ++ ++/* minimal index of the page which doesn't contain ++ file data */ ++static inline pgoff_t size_to_next_pg(loff_t size) ++{ ++ return (size ? off_to_pg(size - 1) + 1 : 0); ++} ++ ++/* how many bytes of file of size @cnt can be contained ++ in page of index @idx */ ++static inline unsigned cnt_to_pgcnt(loff_t cnt, pgoff_t idx) ++{ ++ if (idx > off_to_pg(cnt)) ++ return 0; ++ if (idx < off_to_pg(cnt)) ++ return PAGE_CACHE_SIZE; ++ return off_to_pgoff(cnt); ++} ++ ++/* how many bytes of file of size @cnt can be contained ++ in logical cluster of index @idx */ ++static inline unsigned cnt_to_clcnt(loff_t cnt, cloff_t idx, ++ struct inode *inode) ++{ ++ if (idx > off_to_clust(cnt, inode)) ++ return 0; ++ if (idx < off_to_clust(cnt, inode)) ++ return inode_cluster_size(inode); ++ return off_to_cloff(cnt, inode); ++} ++ ++static inline unsigned ++fsize_to_count(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ assert("edward-288", clust != NULL); ++ assert("edward-289", inode != NULL); ++ ++ return cnt_to_clcnt(inode->i_size, clust->index, inode); ++} ++ ++static inline int ++cluster_is_complete(reiser4_cluster_t * clust, struct inode * inode) ++{ ++ return clust->tc.lsize == inode_cluster_size(inode); ++} ++ ++static inline void reiser4_slide_init(reiser4_slide_t * win) ++{ ++ assert("edward-1084", win != NULL); ++ memset(win, 0, sizeof *win); ++} ++ ++static inline tfm_action ++cluster_get_tfm_act(tfm_cluster_t * tc) ++{ ++ assert("edward-1356", tc != NULL); ++ return tc->act; ++} ++ ++static inline void ++cluster_set_tfm_act(tfm_cluster_t * tc, tfm_action act) ++{ ++ assert("edward-1356", tc != NULL); ++ tc->act = act; ++} ++ ++static inline void ++cluster_init_act (reiser4_cluster_t * clust, tfm_action act, reiser4_slide_t * window){ ++ assert("edward-84", clust != NULL); ++ memset(clust, 0, sizeof *clust); ++ cluster_set_tfm_act(&clust->tc, act); ++ clust->dstat = INVAL_DISK_CLUSTER; ++ clust->win = window; ++} ++ ++static inline void ++cluster_init_read(reiser4_cluster_t * clust, reiser4_slide_t * window) ++{ ++ cluster_init_act (clust, TFMA_READ, window); ++} ++ ++static inline void ++cluster_init_write(reiser4_cluster_t * clust, reiser4_slide_t * window) ++{ ++ cluster_init_act (clust, TFMA_WRITE, window); ++} ++ ++static inline int dclust_get_extension_dsize(hint_t * hint) ++{ ++ return hint->ext_coord.extension.ctail.dsize; ++} ++ ++static inline void dclust_set_extension_dsize(hint_t * hint, int dsize) ++{ ++ hint->ext_coord.extension.ctail.dsize = dsize; ++} ++ ++static inline int dclust_get_extension_shift(hint_t * hint) ++{ ++ return hint->ext_coord.extension.ctail.shift; ++} ++ ++static inline int dclust_get_extension_ncount(hint_t * hint) ++{ ++ return hint->ext_coord.extension.ctail.ncount; ++} ++ ++static inline void dclust_inc_extension_ncount(hint_t * hint) ++{ ++ hint->ext_coord.extension.ctail.ncount ++; ++} ++ ++static inline void dclust_init_extension(hint_t * hint) ++{ ++ memset(&hint->ext_coord.extension.ctail, 0, ++ sizeof(hint->ext_coord.extension.ctail)); ++} ++ ++static inline int hint_is_unprepped_dclust(hint_t * hint) ++{ ++ assert("edward-1451", hint_is_valid(hint)); ++ return dclust_get_extension_shift(hint) == (int)UCTAIL_SHIFT; ++} ++ ++static inline void coord_set_between_clusters(coord_t * coord) ++{ ++#if REISER4_DEBUG ++ int result; ++ result = zload(coord->node); ++ assert("edward-1296", !result); ++#endif ++ if (!coord_is_between_items(coord)) { ++ coord->between = AFTER_ITEM; ++ coord->unit_pos = 0; ++ } ++#if REISER4_DEBUG ++ zrelse(coord->node); ++#endif ++} ++ ++int reiser4_inflate_cluster(reiser4_cluster_t *, struct inode *); ++int find_disk_cluster(reiser4_cluster_t *, struct inode *, int read, ++ znode_lock_mode mode); ++int flush_cluster_pages(reiser4_cluster_t *, jnode *, struct inode *); ++int reiser4_deflate_cluster(reiser4_cluster_t *, struct inode *); ++void truncate_page_cluster_cryptcompress(struct inode *inode, cloff_t start, ++ int even_cows); ++void invalidate_hint_cluster(reiser4_cluster_t * clust); ++void put_hint_cluster(reiser4_cluster_t * clust, struct inode *inode, ++ znode_lock_mode mode); ++int get_disk_cluster_locked(reiser4_cluster_t * clust, struct inode *inode, ++ znode_lock_mode lock_mode); ++void reset_cluster_params(reiser4_cluster_t * clust); ++int set_cluster_by_page(reiser4_cluster_t * clust, struct page * page, ++ int count); ++int prepare_page_cluster(struct inode *inode, reiser4_cluster_t * clust, ++ int capture); ++void reiser4_release_cluster_pages(reiser4_cluster_t *); ++void put_cluster_handle(reiser4_cluster_t * clust); ++int grab_tfm_stream(struct inode *inode, tfm_cluster_t * tc, tfm_stream_id id); ++int tfm_cluster_is_uptodate(tfm_cluster_t * tc); ++void tfm_cluster_set_uptodate(tfm_cluster_t * tc); ++void tfm_cluster_clr_uptodate(tfm_cluster_t * tc); ++ ++/* move cluster handle to the target position ++ specified by the page of index @pgidx ++*/ ++static inline void move_cluster_forward(reiser4_cluster_t * clust, ++ struct inode *inode, ++ pgoff_t pgidx) ++{ ++ assert("edward-1297", clust != NULL); ++ assert("edward-1298", inode != NULL); ++ ++ reset_cluster_params(clust); ++ if (clust->index_valid && ++ /* Hole in the indices. Hint became invalid and can not be ++ used by find_cluster_item() even if seal/node versions ++ will coincide */ ++ pg_to_clust(pgidx, inode) != clust->index + 1) { ++ reiser4_unset_hint(clust->hint); ++ invalidate_hint_cluster(clust); ++ } ++ clust->index = pg_to_clust(pgidx, inode); ++ clust->index_valid = 1; ++} ++ ++static inline int ++alloc_clust_pages(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ assert("edward-791", clust != NULL); ++ assert("edward-792", inode != NULL); ++ clust->pages = ++ kmalloc(sizeof(*clust->pages) << inode_cluster_shift(inode), ++ reiser4_ctx_gfp_mask_get()); ++ if (!clust->pages) ++ return -ENOMEM; ++ return 0; ++} ++ ++static inline void free_clust_pages(reiser4_cluster_t * clust) ++{ ++ kfree(clust->pages); ++} ++ ++#endif /* __FS_REISER4_CLUSTER_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/compress/Makefile b/fs/reiser4/plugin/compress/Makefile +new file mode 100644 +index 0000000..82793a4 +--- /dev/null ++++ b/fs/reiser4/plugin/compress/Makefile +@@ -0,0 +1,6 @@ ++obj-$(CONFIG_REISER4_FS) += compress_plugins.o ++ ++compress_plugins-objs := \ ++ compress.o \ ++ minilzo.o \ ++ compress_mode.o +diff --git a/fs/reiser4/plugin/compress/compress.c b/fs/reiser4/plugin/compress/compress.c +new file mode 100644 +index 0000000..7e64d0c +--- /dev/null ++++ b/fs/reiser4/plugin/compress/compress.c +@@ -0,0 +1,381 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++/* reiser4 compression transform plugins */ ++ ++#include "../../debug.h" ++#include "../../inode.h" ++#include "../plugin.h" ++#include "minilzo.h" ++ ++#include ++#include ++#include ++ ++static int change_compression(struct inode *inode, ++ reiser4_plugin * plugin, ++ pset_member memb) ++{ ++ assert("edward-1316", inode != NULL); ++ assert("edward-1317", plugin != NULL); ++ assert("edward-1318", is_reiser4_inode(inode)); ++ assert("edward-1319", ++ plugin->h.type_id == REISER4_COMPRESSION_PLUGIN_TYPE); ++ ++ /* cannot change compression plugin of already existing regular object */ ++ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE)) ++ return RETERR(-EINVAL); ++ ++ /* If matches, nothing to change. */ ++ if (inode_hash_plugin(inode) != NULL && ++ inode_hash_plugin(inode)->h.id == plugin->h.id) ++ return 0; ++ ++ return aset_set_unsafe(&reiser4_inode_data(inode)->pset, ++ PSET_COMPRESSION, plugin); ++} ++ ++static reiser4_plugin_ops compression_plugin_ops = { ++ .init = NULL, ++ .load = NULL, ++ .save_len = NULL, ++ .save = NULL, ++ .change = &change_compression ++}; ++ ++/******************************************************************************/ ++/* gzip1 compression */ ++/******************************************************************************/ ++ ++#define GZIP1_DEF_LEVEL Z_BEST_SPEED ++#define GZIP1_DEF_WINBITS 15 ++#define GZIP1_DEF_MEMLEVEL MAX_MEM_LEVEL ++ ++static int gzip1_init(void) ++{ ++ int ret = -EINVAL; ++#if REISER4_ZLIB ++ ret = 0; ++#endif ++ if (ret == -EINVAL) ++ warning("edward-1337", "Zlib not compiled into kernel"); ++ return ret; ++} ++ ++static int gzip1_overrun(unsigned src_len UNUSED_ARG) ++{ ++ return 0; ++} ++ ++static coa_t gzip1_alloc(tfm_action act) ++{ ++ coa_t coa = NULL; ++#if REISER4_ZLIB ++ int ret = 0; ++ switch (act) { ++ case TFMA_WRITE: /* compress */ ++ coa = reiser4_vmalloc(zlib_deflate_workspacesize()); ++ if (!coa) { ++ ret = -ENOMEM; ++ break; ++ } ++ memset(coa, 0, zlib_deflate_workspacesize()); ++ break; ++ case TFMA_READ: /* decompress */ ++ coa = reiser4_vmalloc(zlib_inflate_workspacesize()); ++ if (!coa) { ++ ret = -ENOMEM; ++ break; ++ } ++ memset(coa, 0, zlib_inflate_workspacesize()); ++ break; ++ default: ++ impossible("edward-767", ++ "trying to alloc workspace for unknown tfm action"); ++ } ++ if (ret) { ++ warning("edward-768", ++ "alloc workspace for gzip1 (tfm action = %d) failed\n", ++ act); ++ return ERR_PTR(ret); ++ } ++#endif ++ return coa; ++} ++ ++static void gzip1_free(coa_t coa, tfm_action act) ++{ ++ assert("edward-769", coa != NULL); ++ ++ switch (act) { ++ case TFMA_WRITE: /* compress */ ++ vfree(coa); ++ break; ++ case TFMA_READ: /* decompress */ ++ vfree(coa); ++ break; ++ default: ++ impossible("edward-770", "unknown tfm action"); ++ } ++ return; ++} ++ ++static int gzip1_min_size_deflate(void) ++{ ++ return 64; ++} ++ ++static void ++gzip1_compress(coa_t coa, __u8 * src_first, unsigned src_len, ++ __u8 * dst_first, unsigned *dst_len) ++{ ++#if REISER4_ZLIB ++ int ret = 0; ++ struct z_stream_s stream; ++ ++ memset(&stream, 0, sizeof(stream)); ++ ++ assert("edward-842", coa != NULL); ++ assert("edward-875", src_len != 0); ++ ++ stream.workspace = coa; ++ ret = zlib_deflateInit2(&stream, GZIP1_DEF_LEVEL, Z_DEFLATED, ++ -GZIP1_DEF_WINBITS, GZIP1_DEF_MEMLEVEL, ++ Z_DEFAULT_STRATEGY); ++ if (ret != Z_OK) { ++ warning("edward-771", "zlib_deflateInit2 returned %d\n", ret); ++ goto rollback; ++ } ++ ret = zlib_deflateReset(&stream); ++ if (ret != Z_OK) { ++ warning("edward-772", "zlib_deflateReset returned %d\n", ret); ++ goto rollback; ++ } ++ stream.next_in = src_first; ++ stream.avail_in = src_len; ++ stream.next_out = dst_first; ++ stream.avail_out = *dst_len; ++ ++ ret = zlib_deflate(&stream, Z_FINISH); ++ if (ret != Z_STREAM_END) { ++ if (ret != Z_OK) ++ warning("edward-773", ++ "zlib_deflate returned %d\n", ret); ++ goto rollback; ++ } ++ *dst_len = stream.total_out; ++ return; ++ rollback: ++ *dst_len = src_len; ++#endif ++ return; ++} ++ ++static void ++gzip1_decompress(coa_t coa, __u8 * src_first, unsigned src_len, ++ __u8 * dst_first, unsigned *dst_len) ++{ ++#if REISER4_ZLIB ++ int ret = 0; ++ struct z_stream_s stream; ++ ++ memset(&stream, 0, sizeof(stream)); ++ ++ assert("edward-843", coa != NULL); ++ assert("edward-876", src_len != 0); ++ ++ stream.workspace = coa; ++ ret = zlib_inflateInit2(&stream, -GZIP1_DEF_WINBITS); ++ if (ret != Z_OK) { ++ warning("edward-774", "zlib_inflateInit2 returned %d\n", ret); ++ return; ++ } ++ ret = zlib_inflateReset(&stream); ++ if (ret != Z_OK) { ++ warning("edward-775", "zlib_inflateReset returned %d\n", ret); ++ return; ++ } ++ ++ stream.next_in = src_first; ++ stream.avail_in = src_len; ++ stream.next_out = dst_first; ++ stream.avail_out = *dst_len; ++ ++ ret = zlib_inflate(&stream, Z_SYNC_FLUSH); ++ /* ++ * Work around a bug in zlib, which sometimes wants to taste an extra ++ * byte when being used in the (undocumented) raw deflate mode. ++ * (From USAGI). ++ */ ++ if (ret == Z_OK && !stream.avail_in && stream.avail_out) { ++ u8 zerostuff = 0; ++ stream.next_in = &zerostuff; ++ stream.avail_in = 1; ++ ret = zlib_inflate(&stream, Z_FINISH); ++ } ++ if (ret != Z_STREAM_END) { ++ warning("edward-776", "zlib_inflate returned %d\n", ret); ++ return; ++ } ++ *dst_len = stream.total_out; ++#endif ++ return; ++} ++ ++/******************************************************************************/ ++/* lzo1 compression */ ++/******************************************************************************/ ++ ++static int lzo1_init(void) ++{ ++ int ret; ++ ret = lzo_init(); ++ if (ret != LZO_E_OK) ++ warning("edward-848", "lzo_init() failed with ret = %d\n", ret); ++ return ret; ++} ++ ++static int lzo1_overrun(unsigned in_len) ++{ ++ return in_len / 64 + 16 + 3; ++} ++ ++#define LZO_HEAP_SIZE(size) \ ++ sizeof(lzo_align_t) * (((size) + (sizeof(lzo_align_t) - 1)) / sizeof(lzo_align_t)) ++ ++static coa_t lzo1_alloc(tfm_action act) ++{ ++ int ret = 0; ++ coa_t coa = NULL; ++ ++ switch (act) { ++ case TFMA_WRITE: /* compress */ ++ coa = reiser4_vmalloc(LZO_HEAP_SIZE(LZO1X_1_MEM_COMPRESS)); ++ if (!coa) { ++ ret = -ENOMEM; ++ break; ++ } ++ memset(coa, 0, LZO_HEAP_SIZE(LZO1X_1_MEM_COMPRESS)); ++ case TFMA_READ: /* decompress */ ++ break; ++ default: ++ impossible("edward-877", ++ "trying to alloc workspace for unknown tfm action"); ++ } ++ if (ret) { ++ warning("edward-878", ++ "alloc workspace for lzo1 (tfm action = %d) failed\n", ++ act); ++ return ERR_PTR(ret); ++ } ++ return coa; ++} ++ ++static void lzo1_free(coa_t coa, tfm_action act) ++{ ++ assert("edward-879", coa != NULL); ++ ++ switch (act) { ++ case TFMA_WRITE: /* compress */ ++ vfree(coa); ++ break; ++ case TFMA_READ: /* decompress */ ++ impossible("edward-1304", ++ "trying to free non-allocated workspace"); ++ default: ++ impossible("edward-880", "unknown tfm action"); ++ } ++ return; ++} ++ ++static int lzo1_min_size_deflate(void) ++{ ++ return 256; ++} ++ ++static void ++lzo1_compress(coa_t coa, __u8 * src_first, unsigned src_len, ++ __u8 * dst_first, unsigned *dst_len) ++{ ++ int result; ++ ++ assert("edward-846", coa != NULL); ++ assert("edward-847", src_len != 0); ++ ++ result = lzo1x_1_compress(src_first, src_len, dst_first, dst_len, coa); ++ if (result != LZO_E_OK) { ++ warning("edward-849", "lzo1x_1_compress failed\n"); ++ goto out; ++ } ++ if (*dst_len >= src_len) { ++ //warning("edward-850", "lzo1x_1_compress: incompressible data\n"); ++ goto out; ++ } ++ return; ++ out: ++ *dst_len = src_len; ++ return; ++} ++ ++static void ++lzo1_decompress(coa_t coa, __u8 * src_first, unsigned src_len, ++ __u8 * dst_first, unsigned *dst_len) ++{ ++ int result; ++ ++ assert("edward-851", coa == NULL); ++ assert("edward-852", src_len != 0); ++ ++ result = lzo1x_decompress(src_first, src_len, dst_first, dst_len, NULL); ++ if (result != LZO_E_OK) ++ warning("edward-853", "lzo1x_1_decompress failed\n"); ++ return; ++} ++ ++compression_plugin compression_plugins[LAST_COMPRESSION_ID] = { ++ [LZO1_COMPRESSION_ID] = { ++ .h = { ++ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE, ++ .id = LZO1_COMPRESSION_ID, ++ .pops = &compression_plugin_ops, ++ .label = "lzo1", ++ .desc = "lzo1 compression transform", ++ .linkage = {NULL, NULL} ++ }, ++ .init = lzo1_init, ++ .overrun = lzo1_overrun, ++ .alloc = lzo1_alloc, ++ .free = lzo1_free, ++ .min_size_deflate = lzo1_min_size_deflate, ++ .checksum = reiser4_adler32, ++ .compress = lzo1_compress, ++ .decompress = lzo1_decompress ++ }, ++ [GZIP1_COMPRESSION_ID] = { ++ .h = { ++ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE, ++ .id = GZIP1_COMPRESSION_ID, ++ .pops = &compression_plugin_ops, ++ .label = "gzip1", ++ .desc = "gzip1 compression transform", ++ .linkage = {NULL, NULL} ++ }, ++ .init = gzip1_init, ++ .overrun = gzip1_overrun, ++ .alloc = gzip1_alloc, ++ .free = gzip1_free, ++ .min_size_deflate = gzip1_min_size_deflate, ++ .checksum = reiser4_adler32, ++ .compress = gzip1_compress, ++ .decompress = gzip1_decompress ++ } ++}; ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/compress/compress.h b/fs/reiser4/plugin/compress/compress.h +new file mode 100644 +index 0000000..922ca0b +--- /dev/null ++++ b/fs/reiser4/plugin/compress/compress.h +@@ -0,0 +1,38 @@ ++#if !defined( __FS_REISER4_COMPRESS_H__ ) ++#define __FS_REISER4_COMPRESS_H__ ++ ++#include ++#include ++ ++typedef enum { ++ TFMA_READ, ++ TFMA_WRITE, ++ TFMA_LAST ++} tfm_action; ++ ++/* builtin compression plugins */ ++ ++typedef enum { ++ LZO1_COMPRESSION_ID, ++ GZIP1_COMPRESSION_ID, ++ LAST_COMPRESSION_ID, ++} reiser4_compression_id; ++ ++typedef unsigned long cloff_t; ++typedef void *coa_t; ++typedef coa_t coa_set[LAST_COMPRESSION_ID][TFMA_LAST]; ++ ++__u32 reiser4_adler32(char *data, __u32 len); ++ ++#endif /* __FS_REISER4_COMPRESS_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/compress/compress_mode.c b/fs/reiser4/plugin/compress/compress_mode.c +new file mode 100644 +index 0000000..2ae7856 +--- /dev/null ++++ b/fs/reiser4/plugin/compress/compress_mode.c +@@ -0,0 +1,162 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++/* This file contains Reiser4 compression mode plugins. ++ ++ Compression mode plugin is a set of handlers called by compressor ++ at flush time and represent some heuristics including the ones ++ which are to avoid compression of incompressible data, see ++ http://www.namesys.com/cryptcompress_design.html for more details. ++*/ ++#include "../../inode.h" ++#include "../plugin.h" ++ ++static int should_deflate_none(struct inode * inode, cloff_t index) ++{ ++ return 0; ++} ++ ++static int should_deflate_common(struct inode * inode, cloff_t index) ++{ ++ return compression_is_on(cryptcompress_inode_data(inode)); ++} ++ ++static int discard_hook_ultim(struct inode *inode, cloff_t index) ++{ ++ turn_off_compression(cryptcompress_inode_data(inode)); ++ return 0; ++} ++ ++static int discard_hook_lattd(struct inode *inode, cloff_t index) ++{ ++ cryptcompress_info_t * info = cryptcompress_inode_data(inode); ++ ++ assert("edward-1462", ++ get_lattice_factor(info) >= MIN_LATTICE_FACTOR && ++ get_lattice_factor(info) <= MAX_LATTICE_FACTOR); ++ ++ turn_off_compression(info); ++ if (get_lattice_factor(info) < MAX_LATTICE_FACTOR) ++ set_lattice_factor(info, get_lattice_factor(info) << 1); ++ return 0; ++} ++ ++static int accept_hook_lattd(struct inode *inode, cloff_t index) ++{ ++ turn_on_compression(cryptcompress_inode_data(inode)); ++ set_lattice_factor(cryptcompress_inode_data(inode), MIN_LATTICE_FACTOR); ++ return 0; ++} ++ ++/* Check on dynamic lattice, the adaptive compression modes which ++ defines the following behavior: ++ ++ Compression is on: try to compress everything and turn ++ it off, whenever cluster is incompressible. ++ ++ Compression is off: try to compress clusters of indexes ++ k * FACTOR (k = 0, 1, 2, ...) and turn it on, if some of ++ them is compressible. If incompressible, then increase FACTOR */ ++ ++/* check if @index belongs to one-dimensional lattice ++ of sparce factor @factor */ ++static int is_on_lattice(cloff_t index, int factor) ++{ ++ return (factor ? index % factor == 0: index == 0); ++} ++ ++static int should_deflate_lattd(struct inode * inode, cloff_t index) ++{ ++ return should_deflate_common(inode, index) || ++ is_on_lattice(index, ++ get_lattice_factor ++ (cryptcompress_inode_data(inode))); ++} ++ ++/* compression mode_plugins */ ++compression_mode_plugin compression_mode_plugins[LAST_COMPRESSION_MODE_ID] = { ++ [NONE_COMPRESSION_MODE_ID] = { ++ .h = { ++ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ .id = NONE_COMPRESSION_MODE_ID, ++ .pops = NULL, ++ .label = "none", ++ .desc = "Compress nothing", ++ .linkage = {NULL, NULL} ++ }, ++ .should_deflate = should_deflate_none, ++ .accept_hook = NULL, ++ .discard_hook = NULL ++ }, ++ /* Check-on-dynamic-lattice adaptive compression mode */ ++ [LATTD_COMPRESSION_MODE_ID] = { ++ .h = { ++ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ .id = LATTD_COMPRESSION_MODE_ID, ++ .pops = NULL, ++ .label = "lattd", ++ .desc = "Check on dynamic lattice", ++ .linkage = {NULL, NULL} ++ }, ++ .should_deflate = should_deflate_lattd, ++ .accept_hook = accept_hook_lattd, ++ .discard_hook = discard_hook_lattd ++ }, ++ /* Check-ultimately compression mode: ++ Turn off compression forever as soon as we meet ++ incompressible data */ ++ [ULTIM_COMPRESSION_MODE_ID] = { ++ .h = { ++ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ .id = ULTIM_COMPRESSION_MODE_ID, ++ .pops = NULL, ++ .label = "ultim", ++ .desc = "Check ultimately", ++ .linkage = {NULL, NULL} ++ }, ++ .should_deflate = should_deflate_common, ++ .accept_hook = NULL, ++ .discard_hook = discard_hook_ultim ++ }, ++ /* Force-to-compress-everything compression mode */ ++ [FORCE_COMPRESSION_MODE_ID] = { ++ .h = { ++ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ .id = FORCE_COMPRESSION_MODE_ID, ++ .pops = NULL, ++ .label = "force", ++ .desc = "Force to compress everything", ++ .linkage = {NULL, NULL} ++ }, ++ .should_deflate = NULL, ++ .accept_hook = NULL, ++ .discard_hook = NULL ++ }, ++ /* Convert-to-extent compression mode. ++ In this mode items will be converted to extents and management ++ will be passed to (classic) unix file plugin as soon as ->write() ++ detects that the first complete logical cluster (of index #0) is ++ incompressible. */ ++ [CONVX_COMPRESSION_MODE_ID] = { ++ .h = { ++ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ .id = CONVX_COMPRESSION_MODE_ID, ++ .pops = NULL, ++ .label = "conv", ++ .desc = "Convert to extent", ++ .linkage = {NULL, NULL} ++ }, ++ .should_deflate = should_deflate_common, ++ .accept_hook = NULL, ++ .discard_hook = NULL ++ } ++}; ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/compress/lzoconf.h b/fs/reiser4/plugin/compress/lzoconf.h +new file mode 100644 +index 0000000..cc0fa4d +--- /dev/null ++++ b/fs/reiser4/plugin/compress/lzoconf.h +@@ -0,0 +1,216 @@ ++/* lzoconf.h -- configuration for the LZO real-time data compression library ++ adopted for reiser4 compression transform plugin. ++ ++ This file is part of the LZO real-time data compression library ++ and not included in any proprietary licenses of reiser4. ++ ++ Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer ++ All Rights Reserved. ++ ++ The LZO library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of ++ the License, or (at your option) any later version. ++ ++ The LZO library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with the LZO library; see the file COPYING. ++ If not, write to the Free Software Foundation, Inc., ++ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ Markus F.X.J. Oberhumer ++ ++ http://www.oberhumer.com/opensource/lzo/ ++ */ ++ ++#include /* for UINT_MAX, ULONG_MAX - edward */ ++ ++#ifndef __LZOCONF_H ++#define __LZOCONF_H ++ ++#define LZO_VERSION 0x1080 ++#define LZO_VERSION_STRING "1.08" ++#define LZO_VERSION_DATE "Jul 12 2002" ++ ++/* internal Autoconf configuration file - only used when building LZO */ ++ ++/*********************************************************************** ++// LZO requires a conforming ++************************************************************************/ ++ ++#define CHAR_BIT 8 ++#define USHRT_MAX 0xffff ++ ++/* workaround a cpp bug under hpux 10.20 */ ++#define LZO_0xffffffffL 4294967295ul ++ ++/*********************************************************************** ++// architecture defines ++************************************************************************/ ++ ++#if !defined(__LZO_i386) ++# if defined(__i386__) || defined(__386__) || defined(_M_IX86) ++# define __LZO_i386 ++# endif ++#endif ++ ++/* memory checkers */ ++#if !defined(__LZO_CHECKER) ++# if defined(__BOUNDS_CHECKING_ON) ++# define __LZO_CHECKER ++# elif defined(__CHECKER__) ++# define __LZO_CHECKER ++# elif defined(__INSURE__) ++# define __LZO_CHECKER ++# elif defined(__PURIFY__) ++# define __LZO_CHECKER ++# endif ++#endif ++ ++/*********************************************************************** ++// integral and pointer types ++************************************************************************/ ++ ++/* Integral types with 32 bits or more */ ++#if !defined(LZO_UINT32_MAX) ++# if (UINT_MAX >= LZO_0xffffffffL) ++ typedef unsigned int lzo_uint32; ++ typedef int lzo_int32; ++# define LZO_UINT32_MAX UINT_MAX ++# define LZO_INT32_MAX INT_MAX ++# define LZO_INT32_MIN INT_MIN ++# elif (ULONG_MAX >= LZO_0xffffffffL) ++ typedef unsigned long lzo_uint32; ++ typedef long lzo_int32; ++# define LZO_UINT32_MAX ULONG_MAX ++# define LZO_INT32_MAX LONG_MAX ++# define LZO_INT32_MIN LONG_MIN ++# else ++# error "lzo_uint32" ++# endif ++#endif ++ ++/* lzo_uint is used like size_t */ ++#if !defined(LZO_UINT_MAX) ++# if (UINT_MAX >= LZO_0xffffffffL) ++ typedef unsigned int lzo_uint; ++ typedef int lzo_int; ++# define LZO_UINT_MAX UINT_MAX ++# define LZO_INT_MAX INT_MAX ++# define LZO_INT_MIN INT_MIN ++# elif (ULONG_MAX >= LZO_0xffffffffL) ++ typedef unsigned long lzo_uint; ++ typedef long lzo_int; ++# define LZO_UINT_MAX ULONG_MAX ++# define LZO_INT_MAX LONG_MAX ++# define LZO_INT_MIN LONG_MIN ++# else ++# error "lzo_uint" ++# endif ++#endif ++ ++ typedef int lzo_bool; ++ ++/*********************************************************************** ++// memory models ++************************************************************************/ ++ ++/* Memory model that allows to access memory at offsets of lzo_uint. */ ++#if !defined(__LZO_MMODEL) ++# if (LZO_UINT_MAX <= UINT_MAX) ++# define __LZO_MMODEL ++# else ++# error "__LZO_MMODEL" ++# endif ++#endif ++ ++/* no typedef here because of const-pointer issues */ ++#define lzo_byte unsigned char __LZO_MMODEL ++#define lzo_bytep unsigned char __LZO_MMODEL * ++#define lzo_charp char __LZO_MMODEL * ++#define lzo_voidp void __LZO_MMODEL * ++#define lzo_shortp short __LZO_MMODEL * ++#define lzo_ushortp unsigned short __LZO_MMODEL * ++#define lzo_uint32p lzo_uint32 __LZO_MMODEL * ++#define lzo_int32p lzo_int32 __LZO_MMODEL * ++#define lzo_uintp lzo_uint __LZO_MMODEL * ++#define lzo_intp lzo_int __LZO_MMODEL * ++#define lzo_voidpp lzo_voidp __LZO_MMODEL * ++#define lzo_bytepp lzo_bytep __LZO_MMODEL * ++ ++#ifndef lzo_sizeof_dict_t ++# define lzo_sizeof_dict_t sizeof(lzo_bytep) ++#endif ++ ++typedef int (*lzo_compress_t) (const lzo_byte * src, lzo_uint src_len, ++ lzo_byte * dst, lzo_uintp dst_len, ++ lzo_voidp wrkmem); ++ ++ ++/*********************************************************************** ++// error codes and prototypes ++************************************************************************/ ++ ++/* Error codes for the compression/decompression functions. Negative ++ * values are errors, positive values will be used for special but ++ * normal events. ++ */ ++#define LZO_E_OK 0 ++#define LZO_E_ERROR (-1) ++#define LZO_E_OUT_OF_MEMORY (-2) /* not used right now */ ++#define LZO_E_NOT_COMPRESSIBLE (-3) /* not used right now */ ++#define LZO_E_INPUT_OVERRUN (-4) ++#define LZO_E_OUTPUT_OVERRUN (-5) ++#define LZO_E_LOOKBEHIND_OVERRUN (-6) ++#define LZO_E_EOF_NOT_FOUND (-7) ++#define LZO_E_INPUT_NOT_CONSUMED (-8) ++ ++/* lzo_init() should be the first function you call. ++ * Check the return code ! ++ * ++ * lzo_init() is a macro to allow checking that the library and the ++ * compiler's view of various types are consistent. ++ */ ++#define lzo_init() __lzo_init2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\ ++ (int)sizeof(long),(int)sizeof(lzo_uint32),(int)sizeof(lzo_uint),\ ++ (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\ ++ (int)sizeof(lzo_compress_t)) ++ extern int __lzo_init2(unsigned, int, int, int, int, int, int, ++ int, int, int); ++ ++/* checksum functions */ ++extern lzo_uint32 lzo_crc32(lzo_uint32 _c, const lzo_byte * _buf, ++ lzo_uint _len); ++/* misc. */ ++ typedef union { ++ lzo_bytep p; ++ lzo_uint u; ++ } __lzo_pu_u; ++ typedef union { ++ lzo_bytep p; ++ lzo_uint32 u32; ++ } __lzo_pu32_u; ++ typedef union { ++ void *vp; ++ lzo_bytep bp; ++ lzo_uint32 u32; ++ long l; ++ } lzo_align_t; ++ ++#define LZO_PTR_ALIGN_UP(_ptr,_size) \ ++ ((_ptr) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(_ptr),(lzo_uint)(_size))) ++ ++/* deprecated - only for backward compatibility */ ++#define LZO_ALIGN(_ptr,_size) LZO_PTR_ALIGN_UP(_ptr,_size) ++ ++#endif /* already included */ +diff --git a/fs/reiser4/plugin/compress/minilzo.c b/fs/reiser4/plugin/compress/minilzo.c +new file mode 100644 +index 0000000..2dba187 +--- /dev/null ++++ b/fs/reiser4/plugin/compress/minilzo.c +@@ -0,0 +1,1967 @@ ++/* minilzo.c -- mini subset of the LZO real-time data compression library ++ adopted for reiser4 compression transform plugin. ++ ++ This file is part of the LZO real-time data compression library ++ and not included in any proprietary licenses of reiser4. ++ ++ Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer ++ All Rights Reserved. ++ ++ The LZO library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of ++ the License, or (at your option) any later version. ++ ++ The LZO library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with the LZO library; see the file COPYING. ++ If not, write to the Free Software Foundation, Inc., ++ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ Markus F.X.J. Oberhumer ++ ++ http://www.oberhumer.com/opensource/lzo/ ++ */ ++ ++/* ++ * NOTE: ++ * the full LZO package can be found at ++ * http://www.oberhumer.com/opensource/lzo/ ++ */ ++ ++#include "../../debug.h" /* for reiser4 assert macro -edward */ ++ ++#define __LZO_IN_MINILZO ++#define LZO_BUILD ++ ++#include "minilzo.h" ++ ++#if !defined(MINILZO_VERSION) || (MINILZO_VERSION != 0x1080) ++# error "version mismatch in miniLZO source files" ++#endif ++ ++#ifndef __LZO_CONF_H ++#define __LZO_CONF_H ++ ++# define BOUNDS_CHECKING_OFF_DURING(stmt) stmt ++# define BOUNDS_CHECKING_OFF_IN_EXPR(expr) (expr) ++ ++# define HAVE_MEMCMP ++# define HAVE_MEMCPY ++# define HAVE_MEMMOVE ++# define HAVE_MEMSET ++ ++#undef NDEBUG ++#if !defined(LZO_DEBUG) ++# define NDEBUG ++#endif ++#if defined(LZO_DEBUG) || !defined(NDEBUG) ++# if !defined(NO_STDIO_H) ++# include ++# endif ++#endif ++ ++#if !defined(LZO_COMPILE_TIME_ASSERT) ++# define LZO_COMPILE_TIME_ASSERT(expr) \ ++ { typedef int __lzo_compile_time_assert_fail[1 - 2 * !(expr)]; } ++#endif ++ ++#if !defined(LZO_UNUSED) ++# if 1 ++# define LZO_UNUSED(var) ((void)&var) ++# elif 0 ++# define LZO_UNUSED(var) { typedef int __lzo_unused[sizeof(var) ? 2 : 1]; } ++# else ++# define LZO_UNUSED(parm) (parm = parm) ++# endif ++#endif ++ ++#if defined(NO_MEMCMP) ++# undef HAVE_MEMCMP ++#endif ++ ++#if !defined(HAVE_MEMSET) ++# undef memset ++# define memset lzo_memset ++#endif ++ ++# define LZO_BYTE(x) ((unsigned char) ((x) & 0xff)) ++ ++#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b)) ++#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b)) ++#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c)) ++#define LZO_MIN3(a,b,c) ((a) <= (b) ? LZO_MIN(a,c) : LZO_MIN(b,c)) ++ ++#define lzo_sizeof(type) ((lzo_uint) (sizeof(type))) ++ ++#define LZO_HIGH(array) ((lzo_uint) (sizeof(array)/sizeof(*(array)))) ++ ++#define LZO_SIZE(bits) (1u << (bits)) ++#define LZO_MASK(bits) (LZO_SIZE(bits) - 1) ++ ++#define LZO_LSIZE(bits) (1ul << (bits)) ++#define LZO_LMASK(bits) (LZO_LSIZE(bits) - 1) ++ ++#define LZO_USIZE(bits) ((lzo_uint) 1 << (bits)) ++#define LZO_UMASK(bits) (LZO_USIZE(bits) - 1) ++ ++#define LZO_STYPE_MAX(b) (((1l << (8*(b)-2)) - 1l) + (1l << (8*(b)-2))) ++#define LZO_UTYPE_MAX(b) (((1ul << (8*(b)-1)) - 1ul) + (1ul << (8*(b)-1))) ++ ++#if !defined(SIZEOF_UNSIGNED) ++# if (UINT_MAX == 0xffff) ++# define SIZEOF_UNSIGNED 2 ++# elif (UINT_MAX == LZO_0xffffffffL) ++# define SIZEOF_UNSIGNED 4 ++# elif (UINT_MAX >= LZO_0xffffffffL) ++# define SIZEOF_UNSIGNED 8 ++# else ++# error "SIZEOF_UNSIGNED" ++# endif ++#endif ++ ++#if !defined(SIZEOF_UNSIGNED_LONG) ++# if (ULONG_MAX == LZO_0xffffffffL) ++# define SIZEOF_UNSIGNED_LONG 4 ++# elif (ULONG_MAX >= LZO_0xffffffffL) ++# define SIZEOF_UNSIGNED_LONG 8 ++# else ++# error "SIZEOF_UNSIGNED_LONG" ++# endif ++#endif ++ ++#if !defined(SIZEOF_SIZE_T) ++# define SIZEOF_SIZE_T SIZEOF_UNSIGNED ++#endif ++#if !defined(SIZE_T_MAX) ++# define SIZE_T_MAX LZO_UTYPE_MAX(SIZEOF_SIZE_T) ++#endif ++ ++#if 1 && defined(__LZO_i386) && (UINT_MAX == LZO_0xffffffffL) ++# if !defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX == 0xffff) ++# define LZO_UNALIGNED_OK_2 ++# endif ++# if !defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX == LZO_0xffffffffL) ++# define LZO_UNALIGNED_OK_4 ++# endif ++#endif ++ ++#if defined(LZO_UNALIGNED_OK_2) || defined(LZO_UNALIGNED_OK_4) ++# if !defined(LZO_UNALIGNED_OK) ++# define LZO_UNALIGNED_OK ++# endif ++#endif ++ ++#if defined(__LZO_NO_UNALIGNED) ++# undef LZO_UNALIGNED_OK ++# undef LZO_UNALIGNED_OK_2 ++# undef LZO_UNALIGNED_OK_4 ++#endif ++ ++#if defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX != 0xffff) ++# error "LZO_UNALIGNED_OK_2 must not be defined on this system" ++#endif ++#if defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL) ++# error "LZO_UNALIGNED_OK_4 must not be defined on this system" ++#endif ++ ++#if defined(__LZO_NO_ALIGNED) ++# undef LZO_ALIGNED_OK_4 ++#endif ++ ++#if defined(LZO_ALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL) ++# error "LZO_ALIGNED_OK_4 must not be defined on this system" ++#endif ++ ++#define LZO_LITTLE_ENDIAN 1234 ++#define LZO_BIG_ENDIAN 4321 ++#define LZO_PDP_ENDIAN 3412 ++ ++#if !defined(LZO_BYTE_ORDER) ++# if defined(MFX_BYTE_ORDER) ++# define LZO_BYTE_ORDER MFX_BYTE_ORDER ++# elif defined(__LZO_i386) ++# define LZO_BYTE_ORDER LZO_LITTLE_ENDIAN ++# elif defined(BYTE_ORDER) ++# define LZO_BYTE_ORDER BYTE_ORDER ++# elif defined(__BYTE_ORDER) ++# define LZO_BYTE_ORDER __BYTE_ORDER ++# endif ++#endif ++ ++#if defined(LZO_BYTE_ORDER) ++# if (LZO_BYTE_ORDER != LZO_LITTLE_ENDIAN) && \ ++ (LZO_BYTE_ORDER != LZO_BIG_ENDIAN) ++# error "invalid LZO_BYTE_ORDER" ++# endif ++#endif ++ ++#if defined(LZO_UNALIGNED_OK) && !defined(LZO_BYTE_ORDER) ++# error "LZO_BYTE_ORDER is not defined" ++#endif ++ ++#define LZO_OPTIMIZE_GNUC_i386_IS_BUGGY ++ ++#if defined(NDEBUG) && !defined(LZO_DEBUG) && !defined(__LZO_CHECKER) ++# if defined(__GNUC__) && defined(__i386__) ++# if !defined(LZO_OPTIMIZE_GNUC_i386_IS_BUGGY) ++# define LZO_OPTIMIZE_GNUC_i386 ++# endif ++# endif ++#endif ++ ++extern const lzo_uint32 _lzo_crc32_table[256]; ++ ++#define _LZO_STRINGIZE(x) #x ++#define _LZO_MEXPAND(x) _LZO_STRINGIZE(x) ++ ++#define _LZO_CONCAT2(a,b) a ## b ++#define _LZO_CONCAT3(a,b,c) a ## b ## c ++#define _LZO_CONCAT4(a,b,c,d) a ## b ## c ## d ++#define _LZO_CONCAT5(a,b,c,d,e) a ## b ## c ## d ## e ++ ++#define _LZO_ECONCAT2(a,b) _LZO_CONCAT2(a,b) ++#define _LZO_ECONCAT3(a,b,c) _LZO_CONCAT3(a,b,c) ++#define _LZO_ECONCAT4(a,b,c,d) _LZO_CONCAT4(a,b,c,d) ++#define _LZO_ECONCAT5(a,b,c,d,e) _LZO_CONCAT5(a,b,c,d,e) ++ ++#ifndef __LZO_PTR_H ++#define __LZO_PTR_H ++ ++#if !defined(lzo_ptrdiff_t) ++# if (UINT_MAX >= LZO_0xffffffffL) ++typedef ptrdiff_t lzo_ptrdiff_t; ++# else ++typedef long lzo_ptrdiff_t; ++# endif ++#endif ++ ++#if !defined(__LZO_HAVE_PTR_T) ++# if defined(lzo_ptr_t) ++# define __LZO_HAVE_PTR_T ++# endif ++#endif ++#if !defined(__LZO_HAVE_PTR_T) ++# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_LONG) ++# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_LONG) ++typedef unsigned long lzo_ptr_t; ++typedef long lzo_sptr_t; ++# define __LZO_HAVE_PTR_T ++# endif ++# endif ++#endif ++#if !defined(__LZO_HAVE_PTR_T) ++# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED) ++# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED) ++typedef unsigned int lzo_ptr_t; ++typedef int lzo_sptr_t; ++# define __LZO_HAVE_PTR_T ++# endif ++# endif ++#endif ++#if !defined(__LZO_HAVE_PTR_T) ++# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_SHORT) ++# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_SHORT) ++typedef unsigned short lzo_ptr_t; ++typedef short lzo_sptr_t; ++# define __LZO_HAVE_PTR_T ++# endif ++# endif ++#endif ++#if !defined(__LZO_HAVE_PTR_T) ++# if defined(LZO_HAVE_CONFIG_H) || defined(SIZEOF_CHAR_P) ++# error "no suitable type for lzo_ptr_t" ++# else ++typedef unsigned long lzo_ptr_t; ++typedef long lzo_sptr_t; ++# define __LZO_HAVE_PTR_T ++# endif ++#endif ++ ++#define PTR(a) ((lzo_ptr_t) (a)) ++#define PTR_LINEAR(a) PTR(a) ++#define PTR_ALIGNED_4(a) ((PTR_LINEAR(a) & 3) == 0) ++#define PTR_ALIGNED_8(a) ((PTR_LINEAR(a) & 7) == 0) ++#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0) ++#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0) ++ ++#define PTR_LT(a,b) (PTR(a) < PTR(b)) ++#define PTR_GE(a,b) (PTR(a) >= PTR(b)) ++#define PTR_DIFF(a,b) ((lzo_ptrdiff_t) (PTR(a) - PTR(b))) ++#define pd(a,b) ((lzo_uint) ((a)-(b))) ++ ++typedef union { ++ char a_char; ++ unsigned char a_uchar; ++ short a_short; ++ unsigned short a_ushort; ++ int a_int; ++ unsigned int a_uint; ++ long a_long; ++ unsigned long a_ulong; ++ lzo_int a_lzo_int; ++ lzo_uint a_lzo_uint; ++ lzo_int32 a_lzo_int32; ++ lzo_uint32 a_lzo_uint32; ++ ptrdiff_t a_ptrdiff_t; ++ lzo_ptrdiff_t a_lzo_ptrdiff_t; ++ lzo_ptr_t a_lzo_ptr_t; ++ lzo_voidp a_lzo_voidp; ++ void *a_void_p; ++ lzo_bytep a_lzo_bytep; ++ lzo_bytepp a_lzo_bytepp; ++ lzo_uintp a_lzo_uintp; ++ lzo_uint *a_lzo_uint_p; ++ lzo_uint32p a_lzo_uint32p; ++ lzo_uint32 *a_lzo_uint32_p; ++ unsigned char *a_uchar_p; ++ char *a_char_p; ++} lzo_full_align_t; ++ ++#endif ++#define LZO_DETERMINISTIC ++#define LZO_DICT_USE_PTR ++# define lzo_dict_t const lzo_bytep ++# define lzo_dict_p lzo_dict_t __LZO_MMODEL * ++#if !defined(lzo_moff_t) ++#define lzo_moff_t lzo_uint ++#endif ++#endif ++static lzo_ptr_t __lzo_ptr_linear(const lzo_voidp ptr) ++{ ++ return PTR_LINEAR(ptr); ++} ++ ++static unsigned __lzo_align_gap(const lzo_voidp ptr, lzo_uint size) ++{ ++ lzo_ptr_t p, s, n; ++ ++ assert("lzo-01", size > 0); ++ ++ p = __lzo_ptr_linear(ptr); ++ s = (lzo_ptr_t) (size - 1); ++ n = (((p + s) / size) * size) - p; ++ ++ assert("lzo-02", (long)n >= 0); ++ assert("lzo-03", n <= s); ++ ++ return (unsigned)n; ++} ++ ++#ifndef __LZO_UTIL_H ++#define __LZO_UTIL_H ++ ++#ifndef __LZO_CONF_H ++#endif ++ ++#if 1 && defined(HAVE_MEMCPY) ++#define MEMCPY8_DS(dest,src,len) \ ++ memcpy(dest,src,len); \ ++ dest += len; \ ++ src += len ++#endif ++ ++#if !defined(MEMCPY8_DS) ++ ++#define MEMCPY8_DS(dest,src,len) \ ++ { register lzo_uint __l = (len) / 8; \ ++ do { \ ++ *dest++ = *src++; \ ++ *dest++ = *src++; \ ++ *dest++ = *src++; \ ++ *dest++ = *src++; \ ++ *dest++ = *src++; \ ++ *dest++ = *src++; \ ++ *dest++ = *src++; \ ++ *dest++ = *src++; \ ++ } while (--__l > 0); } ++ ++#endif ++ ++#define MEMCPY_DS(dest,src,len) \ ++ do *dest++ = *src++; \ ++ while (--len > 0) ++ ++#define MEMMOVE_DS(dest,src,len) \ ++ do *dest++ = *src++; \ ++ while (--len > 0) ++ ++#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET) ++ ++#define BZERO8_PTR(s,l,n) memset((s),0,(lzo_uint)(l)*(n)) ++ ++#else ++ ++#define BZERO8_PTR(s,l,n) \ ++ lzo_memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n)) ++ ++#endif ++#endif ++ ++/* If you use the LZO library in a product, you *must* keep this ++ * copyright string in the executable of your product. ++ */ ++ ++static const lzo_byte __lzo_copyright[] = ++#if !defined(__LZO_IN_MINLZO) ++ LZO_VERSION_STRING; ++#else ++ "\n\n\n" ++ "LZO real-time data compression library.\n" ++ "Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 Markus Franz Xaver Johannes Oberhumer\n" ++ "\n" ++ "http://www.oberhumer.com/opensource/lzo/\n" ++ "\n" ++ "LZO version: v" LZO_VERSION_STRING ", " LZO_VERSION_DATE "\n" ++ "LZO build date: " __DATE__ " " __TIME__ "\n\n" ++ "LZO special compilation options:\n" ++#ifdef __cplusplus ++ " __cplusplus\n" ++#endif ++#if defined(__PIC__) ++ " __PIC__\n" ++#elif defined(__pic__) ++ " __pic__\n" ++#endif ++#if (UINT_MAX < LZO_0xffffffffL) ++ " 16BIT\n" ++#endif ++#if defined(__LZO_STRICT_16BIT) ++ " __LZO_STRICT_16BIT\n" ++#endif ++#if (UINT_MAX > LZO_0xffffffffL) ++ " UINT_MAX=" _LZO_MEXPAND(UINT_MAX) "\n" ++#endif ++#if (ULONG_MAX > LZO_0xffffffffL) ++ " ULONG_MAX=" _LZO_MEXPAND(ULONG_MAX) "\n" ++#endif ++#if defined(LZO_BYTE_ORDER) ++ " LZO_BYTE_ORDER=" _LZO_MEXPAND(LZO_BYTE_ORDER) "\n" ++#endif ++#if defined(LZO_UNALIGNED_OK_2) ++ " LZO_UNALIGNED_OK_2\n" ++#endif ++#if defined(LZO_UNALIGNED_OK_4) ++ " LZO_UNALIGNED_OK_4\n" ++#endif ++#if defined(LZO_ALIGNED_OK_4) ++ " LZO_ALIGNED_OK_4\n" ++#endif ++#if defined(LZO_DICT_USE_PTR) ++ " LZO_DICT_USE_PTR\n" ++#endif ++#if defined(__LZO_QUERY_COMPRESS) ++ " __LZO_QUERY_COMPRESS\n" ++#endif ++#if defined(__LZO_QUERY_DECOMPRESS) ++ " __LZO_QUERY_DECOMPRESS\n" ++#endif ++#if defined(__LZO_IN_MINILZO) ++ " __LZO_IN_MINILZO\n" ++#endif ++ "\n\n" "$Id: LZO " LZO_VERSION_STRING " built " __DATE__ " " __TIME__ ++#if defined(__GNUC__) && defined(__VERSION__) ++ " by gcc " __VERSION__ ++#elif defined(__BORLANDC__) ++ " by Borland C " _LZO_MEXPAND(__BORLANDC__) ++#elif defined(_MSC_VER) ++ " by Microsoft C " _LZO_MEXPAND(_MSC_VER) ++#elif defined(__PUREC__) ++ " by Pure C " _LZO_MEXPAND(__PUREC__) ++#elif defined(__SC__) ++ " by Symantec C " _LZO_MEXPAND(__SC__) ++#elif defined(__TURBOC__) ++ " by Turbo C " _LZO_MEXPAND(__TURBOC__) ++#elif defined(__WATCOMC__) ++ " by Watcom C " _LZO_MEXPAND(__WATCOMC__) ++#endif ++ " $\n" ++ "$Copyright: LZO (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 Markus Franz Xaver Johannes Oberhumer $\n"; ++#endif ++ ++#define LZO_BASE 65521u ++#define LZO_NMAX 5552 ++ ++#define LZO_DO1(buf,i) {s1 += buf[i]; s2 += s1;} ++#define LZO_DO2(buf,i) LZO_DO1(buf,i); LZO_DO1(buf,i+1); ++#define LZO_DO4(buf,i) LZO_DO2(buf,i); LZO_DO2(buf,i+2); ++#define LZO_DO8(buf,i) LZO_DO4(buf,i); LZO_DO4(buf,i+4); ++#define LZO_DO16(buf,i) LZO_DO8(buf,i); LZO_DO8(buf,i+8); ++ ++# define IS_SIGNED(type) (((type) (-1)) < ((type) 0)) ++# define IS_UNSIGNED(type) (((type) (-1)) > ((type) 0)) ++ ++#define IS_POWER_OF_2(x) (((x) & ((x) - 1)) == 0) ++ ++static lzo_bool schedule_insns_bug(void); ++static lzo_bool strength_reduce_bug(int *); ++ ++# define __lzo_assert(x) ((x) ? 1 : 0) ++ ++#undef COMPILE_TIME_ASSERT ++ ++# define COMPILE_TIME_ASSERT(expr) LZO_COMPILE_TIME_ASSERT(expr) ++ ++static lzo_bool basic_integral_check(void) ++{ ++ lzo_bool r = 1; ++ ++ COMPILE_TIME_ASSERT(CHAR_BIT == 8); ++ COMPILE_TIME_ASSERT(sizeof(char) == 1); ++ COMPILE_TIME_ASSERT(sizeof(short) >= 2); ++ COMPILE_TIME_ASSERT(sizeof(long) >= 4); ++ COMPILE_TIME_ASSERT(sizeof(int) >= sizeof(short)); ++ COMPILE_TIME_ASSERT(sizeof(long) >= sizeof(int)); ++ ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint) == sizeof(lzo_int)); ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == sizeof(lzo_int32)); ++ ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) >= 4); ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) >= sizeof(unsigned)); ++#if defined(__LZO_STRICT_16BIT) ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint) == 2); ++#else ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint) >= 4); ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint) >= sizeof(unsigned)); ++#endif ++ ++#if (USHRT_MAX == 65535u) ++ COMPILE_TIME_ASSERT(sizeof(short) == 2); ++#elif (USHRT_MAX == LZO_0xffffffffL) ++ COMPILE_TIME_ASSERT(sizeof(short) == 4); ++#elif (USHRT_MAX >= LZO_0xffffffffL) ++ COMPILE_TIME_ASSERT(sizeof(short) > 4); ++#endif ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned char)); ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned short)); ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned)); ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned long)); ++ COMPILE_TIME_ASSERT(IS_SIGNED(short)); ++ COMPILE_TIME_ASSERT(IS_SIGNED(int)); ++ COMPILE_TIME_ASSERT(IS_SIGNED(long)); ++ ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_uint32)); ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_uint)); ++ COMPILE_TIME_ASSERT(IS_SIGNED(lzo_int32)); ++ COMPILE_TIME_ASSERT(IS_SIGNED(lzo_int)); ++ ++ COMPILE_TIME_ASSERT(INT_MAX == LZO_STYPE_MAX(sizeof(int))); ++ COMPILE_TIME_ASSERT(UINT_MAX == LZO_UTYPE_MAX(sizeof(unsigned))); ++ COMPILE_TIME_ASSERT(LONG_MAX == LZO_STYPE_MAX(sizeof(long))); ++ COMPILE_TIME_ASSERT(ULONG_MAX == LZO_UTYPE_MAX(sizeof(unsigned long))); ++ COMPILE_TIME_ASSERT(USHRT_MAX == LZO_UTYPE_MAX(sizeof(unsigned short))); ++ COMPILE_TIME_ASSERT(LZO_UINT32_MAX == ++ LZO_UTYPE_MAX(sizeof(lzo_uint32))); ++ COMPILE_TIME_ASSERT(LZO_UINT_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint))); ++ ++ r &= __lzo_assert(LZO_BYTE(257) == 1); ++ ++ return r; ++} ++ ++static lzo_bool basic_ptr_check(void) ++{ ++ lzo_bool r = 1; ++ ++ COMPILE_TIME_ASSERT(sizeof(char *) >= sizeof(int)); ++ COMPILE_TIME_ASSERT(sizeof(lzo_byte *) >= sizeof(char *)); ++ ++ COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_byte *)); ++ COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_voidpp)); ++ COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_bytepp)); ++ COMPILE_TIME_ASSERT(sizeof(lzo_voidp) >= sizeof(lzo_uint)); ++ ++ COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) == sizeof(lzo_voidp)); ++ COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) == sizeof(lzo_sptr_t)); ++ COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) >= sizeof(lzo_uint)); ++ ++ COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= 4); ++ COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= sizeof(ptrdiff_t)); ++ ++ COMPILE_TIME_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t)); ++ COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= sizeof(lzo_uint)); ++ ++#if defined(SIZEOF_CHAR_P) ++ COMPILE_TIME_ASSERT(SIZEOF_CHAR_P == sizeof(char *)); ++#endif ++#if defined(SIZEOF_PTRDIFF_T) ++ COMPILE_TIME_ASSERT(SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t)); ++#endif ++ ++ COMPILE_TIME_ASSERT(IS_SIGNED(ptrdiff_t)); ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(size_t)); ++ COMPILE_TIME_ASSERT(IS_SIGNED(lzo_ptrdiff_t)); ++ COMPILE_TIME_ASSERT(IS_SIGNED(lzo_sptr_t)); ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_ptr_t)); ++ COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_moff_t)); ++ ++ return r; ++} ++ ++static lzo_bool ptr_check(void) ++{ ++ lzo_bool r = 1; ++ int i; ++ char _wrkmem[10 * sizeof(lzo_byte *) + sizeof(lzo_full_align_t)]; ++ lzo_bytep wrkmem; ++ lzo_bytepp dict; ++ unsigned char x[4 * sizeof(lzo_full_align_t)]; ++ long d; ++ lzo_full_align_t a; ++ lzo_full_align_t u; ++ ++ for (i = 0; i < (int)sizeof(x); i++) ++ x[i] = LZO_BYTE(i); ++ ++ wrkmem = ++ LZO_PTR_ALIGN_UP((lzo_byte *) _wrkmem, sizeof(lzo_full_align_t)); ++ ++ u.a_lzo_bytep = wrkmem; ++ dict = u.a_lzo_bytepp; ++ ++ d = (long)((const lzo_bytep)dict - (const lzo_bytep)_wrkmem); ++ r &= __lzo_assert(d >= 0); ++ r &= __lzo_assert(d < (long)sizeof(lzo_full_align_t)); ++ ++ memset(&a, 0, sizeof(a)); ++ r &= __lzo_assert(a.a_lzo_voidp == NULL); ++ ++ memset(&a, 0xff, sizeof(a)); ++ r &= __lzo_assert(a.a_ushort == USHRT_MAX); ++ r &= __lzo_assert(a.a_uint == UINT_MAX); ++ r &= __lzo_assert(a.a_ulong == ULONG_MAX); ++ r &= __lzo_assert(a.a_lzo_uint == LZO_UINT_MAX); ++ r &= __lzo_assert(a.a_lzo_uint32 == LZO_UINT32_MAX); ++ ++ if (r == 1) { ++ for (i = 0; i < 8; i++) ++ r &= __lzo_assert((const lzo_voidp)(&dict[i]) == ++ (const ++ lzo_voidp)(&wrkmem[i * ++ sizeof(lzo_byte ++ *)])); ++ } ++ ++ memset(&a, 0, sizeof(a)); ++ r &= __lzo_assert(a.a_char_p == NULL); ++ r &= __lzo_assert(a.a_lzo_bytep == NULL); ++ r &= __lzo_assert(NULL == (void *)0); ++ if (r == 1) { ++ for (i = 0; i < 10; i++) ++ dict[i] = wrkmem; ++ BZERO8_PTR(dict + 1, sizeof(dict[0]), 8); ++ r &= __lzo_assert(dict[0] == wrkmem); ++ for (i = 1; i < 9; i++) ++ r &= __lzo_assert(dict[i] == NULL); ++ r &= __lzo_assert(dict[9] == wrkmem); ++ } ++ ++ if (r == 1) { ++ unsigned k = 1; ++ const unsigned n = (unsigned)sizeof(lzo_uint32); ++ lzo_byte *p0; ++ lzo_byte *p1; ++ ++ k += __lzo_align_gap(&x[k], n); ++ p0 = (lzo_bytep) & x[k]; ++#if defined(PTR_LINEAR) ++ r &= __lzo_assert((PTR_LINEAR(p0) & (n - 1)) == 0); ++#else ++ r &= __lzo_assert(n == 4); ++ r &= __lzo_assert(PTR_ALIGNED_4(p0)); ++#endif ++ ++ r &= __lzo_assert(k >= 1); ++ p1 = (lzo_bytep) & x[1]; ++ r &= __lzo_assert(PTR_GE(p0, p1)); ++ ++ r &= __lzo_assert(k < 1 + n); ++ p1 = (lzo_bytep) & x[1 + n]; ++ r &= __lzo_assert(PTR_LT(p0, p1)); ++ ++ if (r == 1) { ++ lzo_uint32 v0, v1; ++ ++ u.a_uchar_p = &x[k]; ++ v0 = *u.a_lzo_uint32_p; ++ u.a_uchar_p = &x[k + n]; ++ v1 = *u.a_lzo_uint32_p; ++ ++ r &= __lzo_assert(v0 > 0); ++ r &= __lzo_assert(v1 > 0); ++ } ++ } ++ ++ return r; ++} ++ ++static int _lzo_config_check(void) ++{ ++ lzo_bool r = 1; ++ int i; ++ union { ++ lzo_uint32 a; ++ unsigned short b; ++ lzo_uint32 aa[4]; ++ unsigned char x[4 * sizeof(lzo_full_align_t)]; ++ } u; ++ ++ COMPILE_TIME_ASSERT((int)((unsigned char)((signed char)-1)) == 255); ++ COMPILE_TIME_ASSERT((((unsigned char)128) << (int)(8 * sizeof(int) - 8)) ++ < 0); ++ ++ r &= basic_integral_check(); ++ r &= basic_ptr_check(); ++ if (r != 1) ++ return LZO_E_ERROR; ++ ++ u.a = 0; ++ u.b = 0; ++ for (i = 0; i < (int)sizeof(u.x); i++) ++ u.x[i] = LZO_BYTE(i); ++ ++#if defined(LZO_BYTE_ORDER) ++ if (r == 1) { ++# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) ++ lzo_uint32 a = (lzo_uint32) (u.a & LZO_0xffffffffL); ++ unsigned short b = (unsigned short)(u.b & 0xffff); ++ r &= __lzo_assert(a == 0x03020100L); ++ r &= __lzo_assert(b == 0x0100); ++# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) ++ lzo_uint32 a = u.a >> (8 * sizeof(u.a) - 32); ++ unsigned short b = u.b >> (8 * sizeof(u.b) - 16); ++ r &= __lzo_assert(a == 0x00010203L); ++ r &= __lzo_assert(b == 0x0001); ++# else ++# error "invalid LZO_BYTE_ORDER" ++# endif ++ } ++#endif ++ ++#if defined(LZO_UNALIGNED_OK_2) ++ COMPILE_TIME_ASSERT(sizeof(short) == 2); ++ if (r == 1) { ++ unsigned short b[4]; ++ ++ for (i = 0; i < 4; i++) ++ b[i] = *(const unsigned short *)&u.x[i]; ++ ++# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) ++ r &= __lzo_assert(b[0] == 0x0100); ++ r &= __lzo_assert(b[1] == 0x0201); ++ r &= __lzo_assert(b[2] == 0x0302); ++ r &= __lzo_assert(b[3] == 0x0403); ++# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) ++ r &= __lzo_assert(b[0] == 0x0001); ++ r &= __lzo_assert(b[1] == 0x0102); ++ r &= __lzo_assert(b[2] == 0x0203); ++ r &= __lzo_assert(b[3] == 0x0304); ++# endif ++ } ++#endif ++ ++#if defined(LZO_UNALIGNED_OK_4) ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == 4); ++ if (r == 1) { ++ lzo_uint32 a[4]; ++ ++ for (i = 0; i < 4; i++) ++ a[i] = *(const lzo_uint32 *)&u.x[i]; ++ ++# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) ++ r &= __lzo_assert(a[0] == 0x03020100L); ++ r &= __lzo_assert(a[1] == 0x04030201L); ++ r &= __lzo_assert(a[2] == 0x05040302L); ++ r &= __lzo_assert(a[3] == 0x06050403L); ++# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN) ++ r &= __lzo_assert(a[0] == 0x00010203L); ++ r &= __lzo_assert(a[1] == 0x01020304L); ++ r &= __lzo_assert(a[2] == 0x02030405L); ++ r &= __lzo_assert(a[3] == 0x03040506L); ++# endif ++ } ++#endif ++ ++#if defined(LZO_ALIGNED_OK_4) ++ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == 4); ++#endif ++ ++ COMPILE_TIME_ASSERT(lzo_sizeof_dict_t == sizeof(lzo_dict_t)); ++ ++ if (r == 1) { ++ r &= __lzo_assert(!schedule_insns_bug()); ++ } ++ ++ if (r == 1) { ++ static int x[3]; ++ static unsigned xn = 3; ++ register unsigned j; ++ ++ for (j = 0; j < xn; j++) ++ x[j] = (int)j - 3; ++ r &= __lzo_assert(!strength_reduce_bug(x)); ++ } ++ ++ if (r == 1) { ++ r &= ptr_check(); ++ } ++ ++ return r == 1 ? LZO_E_OK : LZO_E_ERROR; ++} ++ ++static lzo_bool schedule_insns_bug(void) ++{ ++#if defined(__LZO_CHECKER) ++ return 0; ++#else ++ const int clone[] = { 1, 2, 0 }; ++ const int *q; ++ q = clone; ++ return (*q) ? 0 : 1; ++#endif ++} ++ ++static lzo_bool strength_reduce_bug(int *x) ++{ ++ return x[0] != -3 || x[1] != -2 || x[2] != -1; ++} ++ ++#undef COMPILE_TIME_ASSERT ++ ++int __lzo_init2(unsigned v, int s1, int s2, int s3, int s4, int s5, ++ int s6, int s7, int s8, int s9) ++{ ++ int r; ++ ++ if (v == 0) ++ return LZO_E_ERROR; ++ ++ r = (s1 == -1 || s1 == (int)sizeof(short)) && ++ (s2 == -1 || s2 == (int)sizeof(int)) && ++ (s3 == -1 || s3 == (int)sizeof(long)) && ++ (s4 == -1 || s4 == (int)sizeof(lzo_uint32)) && ++ (s5 == -1 || s5 == (int)sizeof(lzo_uint)) && ++ (s6 == -1 || s6 == (int)lzo_sizeof_dict_t) && ++ (s7 == -1 || s7 == (int)sizeof(char *)) && ++ (s8 == -1 || s8 == (int)sizeof(lzo_voidp)) && ++ (s9 == -1 || s9 == (int)sizeof(lzo_compress_t)); ++ if (!r) ++ return LZO_E_ERROR; ++ ++ r = _lzo_config_check(); ++ if (r != LZO_E_OK) ++ return r; ++ ++ return r; ++} ++ ++#define do_compress _lzo1x_1_do_compress ++ ++#define LZO_NEED_DICT_H ++#define D_BITS 14 ++#define D_INDEX1(d,p) d = DM((0x21*DX3(p,5,5,6)) >> 5) ++#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) ++ ++#ifndef __LZO_CONFIG1X_H ++#define __LZO_CONFIG1X_H ++ ++#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z) ++# define LZO1X ++#endif ++ ++#define LZO_EOF_CODE ++#undef LZO_DETERMINISTIC ++ ++#define M1_MAX_OFFSET 0x0400 ++#ifndef M2_MAX_OFFSET ++#define M2_MAX_OFFSET 0x0800 ++#endif ++#define M3_MAX_OFFSET 0x4000 ++#define M4_MAX_OFFSET 0xbfff ++ ++#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET) ++ ++#define M1_MIN_LEN 2 ++#define M1_MAX_LEN 2 ++#define M2_MIN_LEN 3 ++#ifndef M2_MAX_LEN ++#define M2_MAX_LEN 8 ++#endif ++#define M3_MIN_LEN 3 ++#define M3_MAX_LEN 33 ++#define M4_MIN_LEN 3 ++#define M4_MAX_LEN 9 ++ ++#define M1_MARKER 0 ++#define M2_MARKER 64 ++#define M3_MARKER 32 ++#define M4_MARKER 16 ++ ++#ifndef MIN_LOOKAHEAD ++#define MIN_LOOKAHEAD (M2_MAX_LEN + 1) ++#endif ++ ++#if defined(LZO_NEED_DICT_H) ++ ++#ifndef LZO_HASH ++#define LZO_HASH LZO_HASH_LZO_INCREMENTAL_B ++#endif ++#define DL_MIN_LEN M2_MIN_LEN ++ ++#ifndef __LZO_DICT_H ++#define __LZO_DICT_H ++ ++#if !defined(D_BITS) && defined(DBITS) ++# define D_BITS DBITS ++#endif ++#if !defined(D_BITS) ++# error "D_BITS is not defined" ++#endif ++#if (D_BITS < 16) ++# define D_SIZE LZO_SIZE(D_BITS) ++# define D_MASK LZO_MASK(D_BITS) ++#else ++# define D_SIZE LZO_USIZE(D_BITS) ++# define D_MASK LZO_UMASK(D_BITS) ++#endif ++#define D_HIGH ((D_MASK >> 1) + 1) ++ ++#if !defined(DD_BITS) ++# define DD_BITS 0 ++#endif ++#define DD_SIZE LZO_SIZE(DD_BITS) ++#define DD_MASK LZO_MASK(DD_BITS) ++ ++#if !defined(DL_BITS) ++# define DL_BITS (D_BITS - DD_BITS) ++#endif ++#if (DL_BITS < 16) ++# define DL_SIZE LZO_SIZE(DL_BITS) ++# define DL_MASK LZO_MASK(DL_BITS) ++#else ++# define DL_SIZE LZO_USIZE(DL_BITS) ++# define DL_MASK LZO_UMASK(DL_BITS) ++#endif ++ ++#if (D_BITS != DL_BITS + DD_BITS) ++# error "D_BITS does not match" ++#endif ++#if (D_BITS < 8 || D_BITS > 18) ++# error "invalid D_BITS" ++#endif ++#if (DL_BITS < 8 || DL_BITS > 20) ++# error "invalid DL_BITS" ++#endif ++#if (DD_BITS < 0 || DD_BITS > 6) ++# error "invalid DD_BITS" ++#endif ++ ++#if !defined(DL_MIN_LEN) ++# define DL_MIN_LEN 3 ++#endif ++#if !defined(DL_SHIFT) ++# define DL_SHIFT ((DL_BITS + (DL_MIN_LEN - 1)) / DL_MIN_LEN) ++#endif ++ ++#define LZO_HASH_GZIP 1 ++#define LZO_HASH_GZIP_INCREMENTAL 2 ++#define LZO_HASH_LZO_INCREMENTAL_A 3 ++#define LZO_HASH_LZO_INCREMENTAL_B 4 ++ ++#if !defined(LZO_HASH) ++# error "choose a hashing strategy" ++#endif ++ ++#if (DL_MIN_LEN == 3) ++# define _DV2_A(p,shift1,shift2) \ ++ (((( (lzo_uint32)((p)[0]) << shift1) ^ (p)[1]) << shift2) ^ (p)[2]) ++# define _DV2_B(p,shift1,shift2) \ ++ (((( (lzo_uint32)((p)[2]) << shift1) ^ (p)[1]) << shift2) ^ (p)[0]) ++# define _DV3_B(p,shift1,shift2,shift3) \ ++ ((_DV2_B((p)+1,shift1,shift2) << (shift3)) ^ (p)[0]) ++#elif (DL_MIN_LEN == 2) ++# define _DV2_A(p,shift1,shift2) \ ++ (( (lzo_uint32)(p[0]) << shift1) ^ p[1]) ++# define _DV2_B(p,shift1,shift2) \ ++ (( (lzo_uint32)(p[1]) << shift1) ^ p[2]) ++#else ++# error "invalid DL_MIN_LEN" ++#endif ++#define _DV_A(p,shift) _DV2_A(p,shift,shift) ++#define _DV_B(p,shift) _DV2_B(p,shift,shift) ++#define DA2(p,s1,s2) \ ++ (((((lzo_uint32)((p)[2]) << (s2)) + (p)[1]) << (s1)) + (p)[0]) ++#define DS2(p,s1,s2) \ ++ (((((lzo_uint32)((p)[2]) << (s2)) - (p)[1]) << (s1)) - (p)[0]) ++#define DX2(p,s1,s2) \ ++ (((((lzo_uint32)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) ++#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0]) ++#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0]) ++#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0]) ++#define DMS(v,s) ((lzo_uint) (((v) & (D_MASK >> (s))) << (s))) ++#define DM(v) DMS(v,0) ++ ++#if (LZO_HASH == LZO_HASH_GZIP) ++# define _DINDEX(dv,p) (_DV_A((p),DL_SHIFT)) ++ ++#elif (LZO_HASH == LZO_HASH_GZIP_INCREMENTAL) ++# define __LZO_HASH_INCREMENTAL ++# define DVAL_FIRST(dv,p) dv = _DV_A((p),DL_SHIFT) ++# define DVAL_NEXT(dv,p) dv = (((dv) << DL_SHIFT) ^ p[2]) ++# define _DINDEX(dv,p) (dv) ++# define DVAL_LOOKAHEAD DL_MIN_LEN ++ ++#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_A) ++# define __LZO_HASH_INCREMENTAL ++# define DVAL_FIRST(dv,p) dv = _DV_A((p),5) ++# define DVAL_NEXT(dv,p) \ ++ dv ^= (lzo_uint32)(p[-1]) << (2*5); dv = (((dv) << 5) ^ p[2]) ++# define _DINDEX(dv,p) ((0x9f5f * (dv)) >> 5) ++# define DVAL_LOOKAHEAD DL_MIN_LEN ++ ++#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_B) ++# define __LZO_HASH_INCREMENTAL ++# define DVAL_FIRST(dv,p) dv = _DV_B((p),5) ++# define DVAL_NEXT(dv,p) \ ++ dv ^= p[-1]; dv = (((dv) >> 5) ^ ((lzo_uint32)(p[2]) << (2*5))) ++# define _DINDEX(dv,p) ((0x9f5f * (dv)) >> 5) ++# define DVAL_LOOKAHEAD DL_MIN_LEN ++ ++#else ++# error "choose a hashing strategy" ++#endif ++ ++#ifndef DINDEX ++#define DINDEX(dv,p) ((lzo_uint)((_DINDEX(dv,p)) & DL_MASK) << DD_BITS) ++#endif ++#if !defined(DINDEX1) && defined(D_INDEX1) ++#define DINDEX1 D_INDEX1 ++#endif ++#if !defined(DINDEX2) && defined(D_INDEX2) ++#define DINDEX2 D_INDEX2 ++#endif ++ ++#if !defined(__LZO_HASH_INCREMENTAL) ++# define DVAL_FIRST(dv,p) ((void) 0) ++# define DVAL_NEXT(dv,p) ((void) 0) ++# define DVAL_LOOKAHEAD 0 ++#endif ++ ++#if !defined(DVAL_ASSERT) ++#if defined(__LZO_HASH_INCREMENTAL) && !defined(NDEBUG) ++static void DVAL_ASSERT(lzo_uint32 dv, const lzo_byte * p) ++{ ++ lzo_uint32 df; ++ DVAL_FIRST(df, (p)); ++ assert(DINDEX(dv, p) == DINDEX(df, p)); ++} ++#else ++# define DVAL_ASSERT(dv,p) ((void) 0) ++#endif ++#endif ++ ++# define DENTRY(p,in) (p) ++# define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex] ++ ++#if (DD_BITS == 0) ++ ++# define UPDATE_D(dict,drun,dv,p,in) dict[ DINDEX(dv,p) ] = DENTRY(p,in) ++# define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in) ++# define UPDATE_P(ptr,drun,p,in) (ptr)[0] = DENTRY(p,in) ++ ++#else ++ ++# define UPDATE_D(dict,drun,dv,p,in) \ ++ dict[ DINDEX(dv,p) + drun++ ] = DENTRY(p,in); drun &= DD_MASK ++# define UPDATE_I(dict,drun,index,p,in) \ ++ dict[ (index) + drun++ ] = DENTRY(p,in); drun &= DD_MASK ++# define UPDATE_P(ptr,drun,p,in) \ ++ (ptr) [ drun++ ] = DENTRY(p,in); drun &= DD_MASK ++ ++#endif ++ ++#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \ ++ (m_pos == NULL || (m_off = (lzo_moff_t) (ip - m_pos)) > max_offset) ++ ++#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ ++ (BOUNDS_CHECKING_OFF_IN_EXPR( \ ++ (PTR_LT(m_pos,in) || \ ++ (m_off = (lzo_moff_t) PTR_DIFF(ip,m_pos)) <= 0 || \ ++ m_off > max_offset) )) ++ ++#if defined(LZO_DETERMINISTIC) ++# define LZO_CHECK_MPOS LZO_CHECK_MPOS_DET ++#else ++# define LZO_CHECK_MPOS LZO_CHECK_MPOS_NON_DET ++#endif ++#endif ++#endif ++#endif ++#define DO_COMPRESS lzo1x_1_compress ++static ++lzo_uint do_compress(const lzo_byte * in, lzo_uint in_len, ++ lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem) ++{ ++ register const lzo_byte *ip; ++ lzo_byte *op; ++ const lzo_byte *const in_end = in + in_len; ++ const lzo_byte *const ip_end = in + in_len - M2_MAX_LEN - 5; ++ const lzo_byte *ii; ++ lzo_dict_p const dict = (lzo_dict_p) wrkmem; ++ ++ op = out; ++ ip = in; ++ ii = ip; ++ ++ ip += 4; ++ for (;;) { ++ register const lzo_byte *m_pos; ++ ++ lzo_moff_t m_off; ++ lzo_uint m_len; ++ lzo_uint dindex; ++ ++ DINDEX1(dindex, ip); ++ GINDEX(m_pos, m_off, dict, dindex, in); ++ if (LZO_CHECK_MPOS_NON_DET(m_pos, m_off, in, ip, M4_MAX_OFFSET)) ++ goto literal; ++#if 1 ++ if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) ++ goto try_match; ++ DINDEX2(dindex, ip); ++#endif ++ GINDEX(m_pos, m_off, dict, dindex, in); ++ if (LZO_CHECK_MPOS_NON_DET(m_pos, m_off, in, ip, M4_MAX_OFFSET)) ++ goto literal; ++ if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) ++ goto try_match; ++ goto literal; ++ ++ try_match: ++#if 1 && defined(LZO_UNALIGNED_OK_2) ++ if (*(const lzo_ushortp)m_pos != *(const lzo_ushortp)ip) { ++#else ++ if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) { ++#endif ++ ; ++ } else { ++ if (m_pos[2] == ip[2]) { ++ goto match; ++ } else { ++ ; ++ } ++ } ++ ++ literal: ++ UPDATE_I(dict, 0, dindex, ip, in); ++ ++ip; ++ if (ip >= ip_end) ++ break; ++ continue; ++ ++ match: ++ UPDATE_I(dict, 0, dindex, ip, in); ++ if (pd(ip, ii) > 0) { ++ register lzo_uint t = pd(ip, ii); ++ ++ if (t <= 3) { ++ assert("lzo-04", op - 2 > out); ++ op[-2] |= LZO_BYTE(t); ++ } else if (t <= 18) ++ *op++ = LZO_BYTE(t - 3); ++ else { ++ register lzo_uint tt = t - 18; ++ ++ *op++ = 0; ++ while (tt > 255) { ++ tt -= 255; ++ *op++ = 0; ++ } ++ assert("lzo-05", tt > 0); ++ *op++ = LZO_BYTE(tt); ++ } ++ do ++ *op++ = *ii++; ++ while (--t > 0); ++ } ++ ++ assert("lzo-06", ii == ip); ++ ip += 3; ++ if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ ++ || m_pos[6] != *ip++ || m_pos[7] != *ip++ ++ || m_pos[8] != *ip++ ++#ifdef LZO1Y ++ || m_pos[9] != *ip++ || m_pos[10] != *ip++ ++ || m_pos[11] != *ip++ || m_pos[12] != *ip++ ++ || m_pos[13] != *ip++ || m_pos[14] != *ip++ ++#endif ++ ) { ++ --ip; ++ m_len = ip - ii; ++ assert("lzo-07", m_len >= 3); ++ assert("lzo-08", m_len <= M2_MAX_LEN); ++ ++ if (m_off <= M2_MAX_OFFSET) { ++ m_off -= 1; ++#if defined(LZO1X) ++ *op++ = ++ LZO_BYTE(((m_len - ++ 1) << 5) | ((m_off & 7) << 2)); ++ *op++ = LZO_BYTE(m_off >> 3); ++#elif defined(LZO1Y) ++ *op++ = ++ LZO_BYTE(((m_len + ++ 1) << 4) | ((m_off & 3) << 2)); ++ *op++ = LZO_BYTE(m_off >> 2); ++#endif ++ } else if (m_off <= M3_MAX_OFFSET) { ++ m_off -= 1; ++ *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); ++ goto m3_m4_offset; ++ } else ++#if defined(LZO1X) ++ { ++ m_off -= 0x4000; ++ assert("lzo-09", m_off > 0); ++ assert("lzo-10", m_off <= 0x7fff); ++ *op++ = LZO_BYTE(M4_MARKER | ++ ((m_off & 0x4000) >> 11) | ++ (m_len - 2)); ++ goto m3_m4_offset; ++ } ++#elif defined(LZO1Y) ++ goto m4_match; ++#endif ++ } else { ++ { ++ const lzo_byte *end = in_end; ++ const lzo_byte *m = m_pos + M2_MAX_LEN + 1; ++ while (ip < end && *m == *ip) ++ m++, ip++; ++ m_len = (ip - ii); ++ } ++ assert("lzo-11", m_len > M2_MAX_LEN); ++ ++ if (m_off <= M3_MAX_OFFSET) { ++ m_off -= 1; ++ if (m_len <= 33) ++ *op++ = ++ LZO_BYTE(M3_MARKER | (m_len - 2)); ++ else { ++ m_len -= 33; ++ *op++ = M3_MARKER | 0; ++ goto m3_m4_len; ++ } ++ } else { ++#if defined(LZO1Y) ++ m4_match: ++#endif ++ m_off -= 0x4000; ++ assert("lzo-12", m_off > 0); ++ assert("lzo-13", m_off <= 0x7fff); ++ if (m_len <= M4_MAX_LEN) ++ *op++ = LZO_BYTE(M4_MARKER | ++ ((m_off & 0x4000) >> ++ 11) | (m_len - 2)); ++ else { ++ m_len -= M4_MAX_LEN; ++ *op++ = ++ LZO_BYTE(M4_MARKER | ++ ((m_off & 0x4000) >> 11)); ++ m3_m4_len: ++ while (m_len > 255) { ++ m_len -= 255; ++ *op++ = 0; ++ } ++ assert("lzo-14", m_len > 0); ++ *op++ = LZO_BYTE(m_len); ++ } ++ } ++ ++ m3_m4_offset: ++ *op++ = LZO_BYTE((m_off & 63) << 2); ++ *op++ = LZO_BYTE(m_off >> 6); ++ } ++ ++ ii = ip; ++ if (ip >= ip_end) ++ break; ++ } ++ ++ *out_len = op - out; ++ return pd(in_end, ii); ++} ++ ++int DO_COMPRESS(const lzo_byte * in, lzo_uint in_len, ++ lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem) ++{ ++ lzo_byte *op = out; ++ lzo_uint t; ++ ++#if defined(__LZO_QUERY_COMPRESS) ++ if (__LZO_IS_COMPRESS_QUERY(in, in_len, out, out_len, wrkmem)) ++ return __LZO_QUERY_COMPRESS(in, in_len, out, out_len, wrkmem, ++ D_SIZE, lzo_sizeof(lzo_dict_t)); ++#endif ++ ++ if (in_len <= M2_MAX_LEN + 5) ++ t = in_len; ++ else { ++ t = do_compress(in, in_len, op, out_len, wrkmem); ++ op += *out_len; ++ } ++ ++ if (t > 0) { ++ const lzo_byte *ii = in + in_len - t; ++ ++ if (op == out && t <= 238) ++ *op++ = LZO_BYTE(17 + t); ++ else if (t <= 3) ++ op[-2] |= LZO_BYTE(t); ++ else if (t <= 18) ++ *op++ = LZO_BYTE(t - 3); ++ else { ++ lzo_uint tt = t - 18; ++ ++ *op++ = 0; ++ while (tt > 255) { ++ tt -= 255; ++ *op++ = 0; ++ } ++ assert("lzo-15", tt > 0); ++ *op++ = LZO_BYTE(tt); ++ } ++ do ++ *op++ = *ii++; ++ while (--t > 0); ++ } ++ ++ *op++ = M4_MARKER | 1; ++ *op++ = 0; ++ *op++ = 0; ++ ++ *out_len = op - out; ++ return LZO_E_OK; ++} ++ ++#undef do_compress ++#undef DO_COMPRESS ++#undef LZO_HASH ++ ++#undef LZO_TEST_DECOMPRESS_OVERRUN ++#undef LZO_TEST_DECOMPRESS_OVERRUN_INPUT ++#undef LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT ++#undef LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND ++#undef DO_DECOMPRESS ++#define DO_DECOMPRESS lzo1x_decompress ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN) ++# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) ++# define LZO_TEST_DECOMPRESS_OVERRUN_INPUT 2 ++# endif ++# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) ++# define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT 2 ++# endif ++# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) ++# define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND ++# endif ++#endif ++ ++#undef TEST_IP ++#undef TEST_OP ++#undef TEST_LOOKBEHIND ++#undef NEED_IP ++#undef NEED_OP ++#undef HAVE_TEST_IP ++#undef HAVE_TEST_OP ++#undef HAVE_NEED_IP ++#undef HAVE_NEED_OP ++#undef HAVE_ANY_IP ++#undef HAVE_ANY_OP ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) ++# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1) ++# define TEST_IP (ip < ip_end) ++# endif ++# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2) ++# define NEED_IP(x) \ ++ if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun ++# endif ++#endif ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) ++# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1) ++# define TEST_OP (op <= op_end) ++# endif ++# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2) ++# undef TEST_OP ++# define NEED_OP(x) \ ++ if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun ++# endif ++#endif ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) ++# define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun ++#else ++# define TEST_LOOKBEHIND(m_pos,op) ((void) 0) ++#endif ++ ++#if !defined(LZO_EOF_CODE) && !defined(TEST_IP) ++# define TEST_IP (ip < ip_end) ++#endif ++ ++#if defined(TEST_IP) ++# define HAVE_TEST_IP ++#else ++# define TEST_IP 1 ++#endif ++#if defined(TEST_OP) ++# define HAVE_TEST_OP ++#else ++# define TEST_OP 1 ++#endif ++ ++#if defined(NEED_IP) ++# define HAVE_NEED_IP ++#else ++# define NEED_IP(x) ((void) 0) ++#endif ++#if defined(NEED_OP) ++# define HAVE_NEED_OP ++#else ++# define NEED_OP(x) ((void) 0) ++#endif ++ ++#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP) ++# define HAVE_ANY_IP ++#endif ++#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP) ++# define HAVE_ANY_OP ++#endif ++ ++#undef __COPY4 ++#define __COPY4(dst,src) * (lzo_uint32p)(dst) = * (const lzo_uint32p)(src) ++ ++#undef COPY4 ++#if defined(LZO_UNALIGNED_OK_4) ++# define COPY4(dst,src) __COPY4(dst,src) ++#elif defined(LZO_ALIGNED_OK_4) ++# define COPY4(dst,src) __COPY4((lzo_ptr_t)(dst),(lzo_ptr_t)(src)) ++#endif ++ ++#if defined(DO_DECOMPRESS) ++int DO_DECOMPRESS(const lzo_byte * in, lzo_uint in_len, ++ lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem) ++#endif ++{ ++ register lzo_byte *op; ++ register const lzo_byte *ip; ++ register lzo_uint t; ++#if defined(COPY_DICT) ++ lzo_uint m_off; ++ const lzo_byte *dict_end; ++#else ++ register const lzo_byte *m_pos; ++#endif ++ ++ const lzo_byte *const ip_end = in + in_len; ++#if defined(HAVE_ANY_OP) ++ lzo_byte *const op_end = out + *out_len; ++#endif ++#if defined(LZO1Z) ++ lzo_uint last_m_off = 0; ++#endif ++ ++ LZO_UNUSED(wrkmem); ++ ++#if defined(__LZO_QUERY_DECOMPRESS) ++ if (__LZO_IS_DECOMPRESS_QUERY(in, in_len, out, out_len, wrkmem)) ++ return __LZO_QUERY_DECOMPRESS(in, in_len, out, out_len, wrkmem, ++ 0, 0); ++#endif ++ ++#if defined(COPY_DICT) ++ if (dict) { ++ if (dict_len > M4_MAX_OFFSET) { ++ dict += dict_len - M4_MAX_OFFSET; ++ dict_len = M4_MAX_OFFSET; ++ } ++ dict_end = dict + dict_len; ++ } else { ++ dict_len = 0; ++ dict_end = NULL; ++ } ++#endif ++ ++ *out_len = 0; ++ ++ op = out; ++ ip = in; ++ ++ if (*ip > 17) { ++ t = *ip++ - 17; ++ if (t < 4) ++ goto match_next; ++ assert("lzo-16", t > 0); ++ NEED_OP(t); ++ NEED_IP(t + 1); ++ do ++ *op++ = *ip++; ++ while (--t > 0); ++ goto first_literal_run; ++ } ++ ++ while (TEST_IP && TEST_OP) { ++ t = *ip++; ++ if (t >= 16) ++ goto match; ++ if (t == 0) { ++ NEED_IP(1); ++ while (*ip == 0) { ++ t += 255; ++ ip++; ++ NEED_IP(1); ++ } ++ t += 15 + *ip++; ++ } ++ assert("lzo-17", t > 0); ++ NEED_OP(t + 3); ++ NEED_IP(t + 4); ++#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) ++#if !defined(LZO_UNALIGNED_OK_4) ++ if (PTR_ALIGNED2_4(op, ip)) { ++#endif ++ COPY4(op, ip); ++ op += 4; ++ ip += 4; ++ if (--t > 0) { ++ if (t >= 4) { ++ do { ++ COPY4(op, ip); ++ op += 4; ++ ip += 4; ++ t -= 4; ++ } while (t >= 4); ++ if (t > 0) ++ do ++ *op++ = *ip++; ++ while (--t > 0); ++ } else ++ do ++ *op++ = *ip++; ++ while (--t > 0); ++ } ++#if !defined(LZO_UNALIGNED_OK_4) ++ } else ++#endif ++#endif ++#if !defined(LZO_UNALIGNED_OK_4) ++ { ++ *op++ = *ip++; ++ *op++ = *ip++; ++ *op++ = *ip++; ++ do ++ *op++ = *ip++; ++ while (--t > 0); ++ } ++#endif ++ ++ first_literal_run: ++ ++ t = *ip++; ++ if (t >= 16) ++ goto match; ++#if defined(COPY_DICT) ++#if defined(LZO1Z) ++ m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); ++ last_m_off = m_off; ++#else ++ m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); ++#endif ++ NEED_OP(3); ++ t = 3; ++ COPY_DICT(t, m_off) ++#else ++#if defined(LZO1Z) ++ t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); ++ m_pos = op - t; ++ last_m_off = t; ++#else ++ m_pos = op - (1 + M2_MAX_OFFSET); ++ m_pos -= t >> 2; ++ m_pos -= *ip++ << 2; ++#endif ++ TEST_LOOKBEHIND(m_pos, out); ++ NEED_OP(3); ++ *op++ = *m_pos++; ++ *op++ = *m_pos++; ++ *op++ = *m_pos; ++#endif ++ goto match_done; ++ ++ while (TEST_IP && TEST_OP) { ++ match: ++ if (t >= 64) { ++#if defined(COPY_DICT) ++#if defined(LZO1X) ++ m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); ++ t = (t >> 5) - 1; ++#elif defined(LZO1Y) ++ m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); ++ t = (t >> 4) - 3; ++#elif defined(LZO1Z) ++ m_off = t & 0x1f; ++ if (m_off >= 0x1c) ++ m_off = last_m_off; ++ else { ++ m_off = 1 + (m_off << 6) + (*ip++ >> 2); ++ last_m_off = m_off; ++ } ++ t = (t >> 5) - 1; ++#endif ++#else ++#if defined(LZO1X) ++ m_pos = op - 1; ++ m_pos -= (t >> 2) & 7; ++ m_pos -= *ip++ << 3; ++ t = (t >> 5) - 1; ++#elif defined(LZO1Y) ++ m_pos = op - 1; ++ m_pos -= (t >> 2) & 3; ++ m_pos -= *ip++ << 2; ++ t = (t >> 4) - 3; ++#elif defined(LZO1Z) ++ { ++ lzo_uint off = t & 0x1f; ++ m_pos = op; ++ if (off >= 0x1c) { ++ assert(last_m_off > 0); ++ m_pos -= last_m_off; ++ } else { ++ off = ++ 1 + (off << 6) + ++ (*ip++ >> 2); ++ m_pos -= off; ++ last_m_off = off; ++ } ++ } ++ t = (t >> 5) - 1; ++#endif ++ TEST_LOOKBEHIND(m_pos, out); ++ assert("lzo-18", t > 0); ++ NEED_OP(t + 3 - 1); ++ goto copy_match; ++#endif ++ } else if (t >= 32) { ++ t &= 31; ++ if (t == 0) { ++ NEED_IP(1); ++ while (*ip == 0) { ++ t += 255; ++ ip++; ++ NEED_IP(1); ++ } ++ t += 31 + *ip++; ++ } ++#if defined(COPY_DICT) ++#if defined(LZO1Z) ++ m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); ++ last_m_off = m_off; ++#else ++ m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); ++#endif ++#else ++#if defined(LZO1Z) ++ { ++ lzo_uint off = ++ 1 + (ip[0] << 6) + (ip[1] >> 2); ++ m_pos = op - off; ++ last_m_off = off; ++ } ++#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) ++ m_pos = op - 1; ++ m_pos -= (*(const lzo_ushortp)ip) >> 2; ++#else ++ m_pos = op - 1; ++ m_pos -= (ip[0] >> 2) + (ip[1] << 6); ++#endif ++#endif ++ ip += 2; ++ } else if (t >= 16) { ++#if defined(COPY_DICT) ++ m_off = (t & 8) << 11; ++#else ++ m_pos = op; ++ m_pos -= (t & 8) << 11; ++#endif ++ t &= 7; ++ if (t == 0) { ++ NEED_IP(1); ++ while (*ip == 0) { ++ t += 255; ++ ip++; ++ NEED_IP(1); ++ } ++ t += 7 + *ip++; ++ } ++#if defined(COPY_DICT) ++#if defined(LZO1Z) ++ m_off += (ip[0] << 6) + (ip[1] >> 2); ++#else ++ m_off += (ip[0] >> 2) + (ip[1] << 6); ++#endif ++ ip += 2; ++ if (m_off == 0) ++ goto eof_found; ++ m_off += 0x4000; ++#if defined(LZO1Z) ++ last_m_off = m_off; ++#endif ++#else ++#if defined(LZO1Z) ++ m_pos -= (ip[0] << 6) + (ip[1] >> 2); ++#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN) ++ m_pos -= (*(const lzo_ushortp)ip) >> 2; ++#else ++ m_pos -= (ip[0] >> 2) + (ip[1] << 6); ++#endif ++ ip += 2; ++ if (m_pos == op) ++ goto eof_found; ++ m_pos -= 0x4000; ++#if defined(LZO1Z) ++ last_m_off = op - m_pos; ++#endif ++#endif ++ } else { ++#if defined(COPY_DICT) ++#if defined(LZO1Z) ++ m_off = 1 + (t << 6) + (*ip++ >> 2); ++ last_m_off = m_off; ++#else ++ m_off = 1 + (t >> 2) + (*ip++ << 2); ++#endif ++ NEED_OP(2); ++ t = 2; ++ COPY_DICT(t, m_off) ++#else ++#if defined(LZO1Z) ++ t = 1 + (t << 6) + (*ip++ >> 2); ++ m_pos = op - t; ++ last_m_off = t; ++#else ++ m_pos = op - 1; ++ m_pos -= t >> 2; ++ m_pos -= *ip++ << 2; ++#endif ++ TEST_LOOKBEHIND(m_pos, out); ++ NEED_OP(2); ++ *op++ = *m_pos++; ++ *op++ = *m_pos; ++#endif ++ goto match_done; ++ } ++ ++#if defined(COPY_DICT) ++ ++ NEED_OP(t + 3 - 1); ++ t += 3 - 1; ++ COPY_DICT(t, m_off) ++#else ++ ++ TEST_LOOKBEHIND(m_pos, out); ++ assert("lzo-19", t > 0); ++ NEED_OP(t + 3 - 1); ++#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) ++#if !defined(LZO_UNALIGNED_OK_4) ++ if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op, m_pos)) { ++ assert((op - m_pos) >= 4); ++#else ++ if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { ++#endif ++ COPY4(op, m_pos); ++ op += 4; ++ m_pos += 4; ++ t -= 4 - (3 - 1); ++ do { ++ COPY4(op, m_pos); ++ op += 4; ++ m_pos += 4; ++ t -= 4; ++ } while (t >= 4); ++ if (t > 0) ++ do ++ *op++ = *m_pos++; ++ while (--t > 0); ++ } else ++#endif ++ { ++ copy_match: ++ *op++ = *m_pos++; ++ *op++ = *m_pos++; ++ do ++ *op++ = *m_pos++; ++ while (--t > 0); ++ } ++ ++#endif ++ ++ match_done: ++#if defined(LZO1Z) ++ t = ip[-1] & 3; ++#else ++ t = ip[-2] & 3; ++#endif ++ if (t == 0) ++ break; ++ ++ match_next: ++ assert("lzo-20", t > 0); ++ NEED_OP(t); ++ NEED_IP(t + 1); ++ do ++ *op++ = *ip++; ++ while (--t > 0); ++ t = *ip++; ++ } ++ } ++ ++#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP) ++ *out_len = op - out; ++ return LZO_E_EOF_NOT_FOUND; ++#endif ++ ++ eof_found: ++ assert("lzo-21", t == 1); ++ *out_len = op - out; ++ return (ip == ip_end ? LZO_E_OK : ++ (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); ++ ++#if defined(HAVE_NEED_IP) ++ input_overrun: ++ *out_len = op - out; ++ return LZO_E_INPUT_OVERRUN; ++#endif ++ ++#if defined(HAVE_NEED_OP) ++ output_overrun: ++ *out_len = op - out; ++ return LZO_E_OUTPUT_OVERRUN; ++#endif ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) ++ lookbehind_overrun: ++ *out_len = op - out; ++ return LZO_E_LOOKBEHIND_OVERRUN; ++#endif ++} ++ ++#define LZO_TEST_DECOMPRESS_OVERRUN ++#undef DO_DECOMPRESS ++#define DO_DECOMPRESS lzo1x_decompress_safe ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN) ++# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) ++# define LZO_TEST_DECOMPRESS_OVERRUN_INPUT 2 ++# endif ++# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) ++# define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT 2 ++# endif ++# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) ++# define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND ++# endif ++#endif ++ ++#undef TEST_IP ++#undef TEST_OP ++#undef TEST_LOOKBEHIND ++#undef NEED_IP ++#undef NEED_OP ++#undef HAVE_TEST_IP ++#undef HAVE_TEST_OP ++#undef HAVE_NEED_IP ++#undef HAVE_NEED_OP ++#undef HAVE_ANY_IP ++#undef HAVE_ANY_OP ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT) ++# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1) ++# define TEST_IP (ip < ip_end) ++# endif ++# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2) ++# define NEED_IP(x) \ ++ if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun ++# endif ++#endif ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT) ++# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1) ++# define TEST_OP (op <= op_end) ++# endif ++# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2) ++# undef TEST_OP ++# define NEED_OP(x) \ ++ if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun ++# endif ++#endif ++ ++#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND) ++# define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun ++#else ++# define TEST_LOOKBEHIND(m_pos,op) ((void) 0) ++#endif ++ ++#if !defined(LZO_EOF_CODE) && !defined(TEST_IP) ++# define TEST_IP (ip < ip_end) ++#endif ++ ++#if defined(TEST_IP) ++# define HAVE_TEST_IP ++#else ++# define TEST_IP 1 ++#endif ++#if defined(TEST_OP) ++# define HAVE_TEST_OP ++#else ++# define TEST_OP 1 ++#endif ++ ++#if defined(NEED_IP) ++# define HAVE_NEED_IP ++#else ++# define NEED_IP(x) ((void) 0) ++#endif ++#if defined(NEED_OP) ++# define HAVE_NEED_OP ++#else ++# define NEED_OP(x) ((void) 0) ++#endif ++ ++#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP) ++# define HAVE_ANY_IP ++#endif ++#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP) ++# define HAVE_ANY_OP ++#endif ++ ++#undef __COPY4 ++#define __COPY4(dst,src) * (lzo_uint32p)(dst) = * (const lzo_uint32p)(src) ++ ++#undef COPY4 ++#if defined(LZO_UNALIGNED_OK_4) ++# define COPY4(dst,src) __COPY4(dst,src) ++#elif defined(LZO_ALIGNED_OK_4) ++# define COPY4(dst,src) __COPY4((lzo_ptr_t)(dst),(lzo_ptr_t)(src)) ++#endif ++ ++/***** End of minilzo.c *****/ +diff --git a/fs/reiser4/plugin/compress/minilzo.h b/fs/reiser4/plugin/compress/minilzo.h +new file mode 100644 +index 0000000..6a47001 +--- /dev/null ++++ b/fs/reiser4/plugin/compress/minilzo.h +@@ -0,0 +1,70 @@ ++/* minilzo.h -- mini subset of the LZO real-time data compression library ++ adopted for reiser4 compression transform plugin. ++ ++ This file is part of the LZO real-time data compression library ++ and not included in any proprietary licenses of reiser4. ++ ++ Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer ++ Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer ++ All Rights Reserved. ++ ++ The LZO library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of ++ the License, or (at your option) any later version. ++ ++ The LZO library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with the LZO library; see the file COPYING. ++ If not, write to the Free Software Foundation, Inc., ++ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ Markus F.X.J. Oberhumer ++ ++ http://www.oberhumer.com/opensource/lzo/ ++ */ ++ ++/* ++ * NOTE: ++ * the full LZO package can be found at ++ * http://www.oberhumer.com/opensource/lzo/ ++ */ ++ ++#ifndef __MINILZO_H ++#define __MINILZO_H ++ ++#define MINILZO_VERSION 0x1080 ++ ++#include "lzoconf.h" ++ ++/* Memory required for the wrkmem parameter. ++ * When the required size is 0, you can also pass a NULL pointer. ++ */ ++ ++#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS ++#define LZO1X_1_MEM_COMPRESS ((lzo_uint32) (16384L * lzo_sizeof_dict_t)) ++#define LZO1X_MEM_DECOMPRESS (0) ++ ++/* compression */ ++extern int lzo1x_1_compress(const lzo_byte * src, lzo_uint src_len, ++ lzo_byte * dst, lzo_uintp dst_len, ++ lzo_voidp wrkmem); ++/* decompression */ ++extern int lzo1x_decompress(const lzo_byte * src, lzo_uint src_len, ++ lzo_byte * dst, lzo_uintp dst_len, ++ lzo_voidp wrkmem /* NOT USED */); ++/* safe decompression with overrun testing */ ++extern int lzo1x_decompress_safe(const lzo_byte * src, lzo_uint src_len, ++ lzo_byte * dst, lzo_uintp dst_len, ++ lzo_voidp wrkmem /* NOT USED */ ); ++ ++#endif /* already included */ +diff --git a/fs/reiser4/plugin/crypto/cipher.c b/fs/reiser4/plugin/crypto/cipher.c +new file mode 100644 +index 0000000..e918154 +--- /dev/null ++++ b/fs/reiser4/plugin/crypto/cipher.c +@@ -0,0 +1,37 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, ++ licensing governed by reiser4/README */ ++/* Reiser4 cipher transform plugins */ ++ ++#include "../../debug.h" ++#include "../plugin.h" ++ ++cipher_plugin cipher_plugins[LAST_CIPHER_ID] = { ++ [NONE_CIPHER_ID] = { ++ .h = { ++ .type_id = REISER4_CIPHER_PLUGIN_TYPE, ++ .id = NONE_CIPHER_ID, ++ .pops = NULL, ++ .label = "none", ++ .desc = "no cipher transform", ++ .linkage = {NULL, NULL} ++ }, ++ .alloc = NULL, ++ .free = NULL, ++ .scale = NULL, ++ .align_stream = NULL, ++ .setkey = NULL, ++ .encrypt = NULL, ++ .decrypt = NULL ++ } ++}; ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/crypto/cipher.h b/fs/reiser4/plugin/crypto/cipher.h +new file mode 100644 +index 0000000..e896c67 +--- /dev/null ++++ b/fs/reiser4/plugin/crypto/cipher.h +@@ -0,0 +1,55 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++/* This file contains definitions for the objects operated ++ by reiser4 key manager, which is something like keyring ++ wrapped by appropriate reiser4 plugin */ ++ ++#if !defined( __FS_REISER4_CRYPT_H__ ) ++#define __FS_REISER4_CRYPT_H__ ++ ++#include ++ ++/* key info imported from user space */ ++typedef struct crypto_data { ++ int keysize; /* uninstantiated key size */ ++ __u8 * key; /* uninstantiated key */ ++ int keyid_size; /* size of passphrase */ ++ __u8 * keyid; /* passphrase */ ++} crypto_data_t; ++ ++/* This object contains all needed infrastructure to implement ++ cipher transform. This is operated (allocating, inheriting, ++ validating, binding to host inode, etc..) by reiser4 key manager. ++ ++ This info can be allocated in two cases: ++ 1. importing a key from user space. ++ 2. reading inode from disk */ ++typedef struct crypto_stat { ++ struct inode * host; ++ struct crypto_hash * digest; ++ struct crypto_blkcipher * cipher; ++#if 0 ++ cipher_key_plugin * kplug; /* key manager */ ++#endif ++ __u8 * keyid; /* key fingerprint, created by digest plugin, ++ using uninstantiated key and passphrase. ++ supposed to be stored in disk stat-data */ ++ int inst; /* this indicates if the cipher key is ++ instantiated (case 1 above) */ ++ int keysize; /* uninstantiated key size (bytes), supposed ++ to be stored in disk stat-data */ ++ int keyload_count; /* number of the objects which has this ++ crypto-stat attached */ ++} crypto_stat_t; ++ ++#endif /* __FS_REISER4_CRYPT_H__ */ ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/crypto/digest.c b/fs/reiser4/plugin/crypto/digest.c +new file mode 100644 +index 0000000..7508917 +--- /dev/null ++++ b/fs/reiser4/plugin/crypto/digest.c +@@ -0,0 +1,58 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* reiser4 digest transform plugin (is used by cryptcompress object plugin) */ ++/* EDWARD-FIXME-HANS: and it does what? a digest is a what? */ ++#include "../../debug.h" ++#include "../plugin_header.h" ++#include "../plugin.h" ++#include "../file/cryptcompress.h" ++ ++#include ++ ++extern digest_plugin digest_plugins[LAST_DIGEST_ID]; ++ ++static struct crypto_hash * alloc_sha256 (void) ++{ ++#if REISER4_SHA256 ++ return crypto_alloc_hash ("sha256", 0, CRYPTO_ALG_ASYNC); ++#else ++ warning("edward-1418", "sha256 unsupported"); ++ return ERR_PTR(-EINVAL); ++#endif ++} ++ ++static void free_sha256 (struct crypto_hash * tfm) ++{ ++#if REISER4_SHA256 ++ crypto_free_hash(tfm); ++#endif ++ return; ++} ++ ++/* digest plugins */ ++digest_plugin digest_plugins[LAST_DIGEST_ID] = { ++ [SHA256_32_DIGEST_ID] = { ++ .h = { ++ .type_id = REISER4_DIGEST_PLUGIN_TYPE, ++ .id = SHA256_32_DIGEST_ID, ++ .pops = NULL, ++ .label = "sha256_32", ++ .desc = "sha256_32 digest transform", ++ .linkage = {NULL, NULL} ++ }, ++ .fipsize = sizeof(__u32), ++ .alloc = alloc_sha256, ++ .free = free_sha256 ++ } ++}; ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/dir/Makefile b/fs/reiser4/plugin/dir/Makefile +new file mode 100644 +index 0000000..ed370b1 +--- /dev/null ++++ b/fs/reiser4/plugin/dir/Makefile +@@ -0,0 +1,5 @@ ++obj-$(CONFIG_REISER4_FS) += dir_plugins.o ++ ++dir_plugins-objs := \ ++ hashed_dir.o \ ++ seekable_dir.o +diff --git a/fs/reiser4/plugin/dir/dir.h b/fs/reiser4/plugin/dir/dir.h +new file mode 100644 +index 0000000..4a91ebe +--- /dev/null ++++ b/fs/reiser4/plugin/dir/dir.h +@@ -0,0 +1,36 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* this file contains declarations of methods implementing directory plugins */ ++ ++#if !defined( __REISER4_DIR_H__ ) ++#define __REISER4_DIR_H__ ++ ++/*#include "../../key.h" ++ ++#include */ ++ ++/* declarations of functions implementing HASHED_DIR_PLUGIN_ID dir plugin */ ++ ++/* "hashed" directory methods of dir plugin */ ++void build_entry_key_hashed(const struct inode *, const struct qstr *, ++ reiser4_key *); ++ ++/* declarations of functions implementing SEEKABLE_HASHED_DIR_PLUGIN_ID dir plugin */ ++ ++/* "seekable" directory methods of dir plugin */ ++void build_entry_key_seekable(const struct inode *, const struct qstr *, ++ reiser4_key *); ++ ++/* __REISER4_DIR_H__ */ ++#endif ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/dir/hashed_dir.c b/fs/reiser4/plugin/dir/hashed_dir.c +new file mode 100644 +index 0000000..0f34824 +--- /dev/null ++++ b/fs/reiser4/plugin/dir/hashed_dir.c +@@ -0,0 +1,81 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Directory plugin using hashes (see fs/reiser4/plugin/hash.c) to map file ++ names to the files. */ ++ ++/* ++ * Hashed directory logically consists of persistent directory ++ * entries. Directory entry is a pair of a file name and a key of stat-data of ++ * a file that has this name in the given directory. ++ * ++ * Directory entries are stored in the tree in the form of directory ++ * items. Directory item should implement dir_entry_ops portion of item plugin ++ * interface (see plugin/item/item.h). Hashed directory interacts with ++ * directory item plugin exclusively through dir_entry_ops operations. ++ * ++ * Currently there are two implementations of directory items: "simple ++ * directory item" (plugin/item/sde.[ch]), and "compound directory item" ++ * (plugin/item/cde.[ch]) with the latter being the default. ++ * ++ * There is, however some delicate way through which directory code interferes ++ * with item plugin: key assignment policy. A key for a directory item is ++ * chosen by directory code, and as described in kassign.c, this key contains ++ * a portion of file name. Directory item uses this knowledge to avoid storing ++ * this portion of file name twice: in the key and in the directory item body. ++ * ++ */ ++ ++#include "../../inode.h" ++ ++void complete_entry_key(const struct inode *, const char *name, ++ int len, reiser4_key * result); ++ ++/* this is implementation of build_entry_key method of dir ++ plugin for HASHED_DIR_PLUGIN_ID ++ */ ++void build_entry_key_hashed(const struct inode *dir, /* directory where entry is ++ * (or will be) in.*/ ++ const struct qstr *qname, /* name of file referenced ++ * by this entry */ ++ reiser4_key * result /* resulting key of directory ++ * entry */ ) ++{ ++ const char *name; ++ int len; ++ ++ assert("nikita-1139", dir != NULL); ++ assert("nikita-1140", qname != NULL); ++ assert("nikita-1141", qname->name != NULL); ++ assert("nikita-1142", result != NULL); ++ ++ name = qname->name; ++ len = qname->len; ++ ++ assert("nikita-2867", strlen(name) == len); ++ ++ reiser4_key_init(result); ++ /* locality of directory entry's key is objectid of parent ++ directory */ ++ set_key_locality(result, get_inode_oid(dir)); ++ /* minor packing locality is constant */ ++ set_key_type(result, KEY_FILE_NAME_MINOR); ++ /* dot is special case---we always want it to be first entry in ++ a directory. Actually, we just want to have smallest ++ directory entry. ++ */ ++ if (len == 1 && name[0] == '.') ++ return; ++ ++ /* initialize part of entry key which depends on file name */ ++ complete_entry_key(dir, name, len, result); ++} ++ ++/* Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/dir/seekable_dir.c b/fs/reiser4/plugin/dir/seekable_dir.c +new file mode 100644 +index 0000000..c1c6c4c +--- /dev/null ++++ b/fs/reiser4/plugin/dir/seekable_dir.c +@@ -0,0 +1,46 @@ ++/* Copyright 2005 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#include "../../inode.h" ++ ++/* this is implementation of build_entry_key method of dir ++ plugin for SEEKABLE_HASHED_DIR_PLUGIN_ID ++ This is for directories where we want repeatable and restartable readdir() ++ even in case 32bit user level struct dirent (readdir(3)). ++*/ ++void ++build_entry_key_seekable(const struct inode *dir, const struct qstr *name, ++ reiser4_key * result) ++{ ++ oid_t objectid; ++ ++ assert("nikita-2283", dir != NULL); ++ assert("nikita-2284", name != NULL); ++ assert("nikita-2285", name->name != NULL); ++ assert("nikita-2286", result != NULL); ++ ++ reiser4_key_init(result); ++ /* locality of directory entry's key is objectid of parent ++ directory */ ++ set_key_locality(result, get_inode_oid(dir)); ++ /* minor packing locality is constant */ ++ set_key_type(result, KEY_FILE_NAME_MINOR); ++ /* dot is special case---we always want it to be first entry in ++ a directory. Actually, we just want to have smallest ++ directory entry. ++ */ ++ if ((name->len == 1) && (name->name[0] == '.')) ++ return; ++ ++ /* objectid of key is 31 lowest bits of hash. */ ++ objectid = ++ inode_hash_plugin(dir)->hash(name->name, ++ (int)name->len) & 0x7fffffff; ++ ++ assert("nikita-2303", !(objectid & ~KEY_OBJECTID_MASK)); ++ set_key_objectid(result, objectid); ++ ++ /* offset is always 0. */ ++ set_key_offset(result, (__u64) 0); ++ return; ++} +diff --git a/fs/reiser4/plugin/dir_plugin_common.c b/fs/reiser4/plugin/dir_plugin_common.c +new file mode 100644 +index 0000000..f5e1028 +--- /dev/null ++++ b/fs/reiser4/plugin/dir_plugin_common.c +@@ -0,0 +1,872 @@ ++/* Copyright 2005 by Hans Reiser, licensing governed by ++ reiser4/README */ ++ ++/* this file contains typical implementations for most of methods of ++ directory plugin ++*/ ++ ++#include "../inode.h" ++ ++int reiser4_find_entry(struct inode *dir, struct dentry *name, ++ lock_handle *, znode_lock_mode, reiser4_dir_entry_desc *); ++int reiser4_lookup_name(struct inode *parent, struct dentry *dentry, reiser4_key * key); ++void check_light_weight(struct inode *inode, struct inode *parent); ++ ++/* this is common implementation of get_parent method of dir plugin ++ this is used by NFS kernel server to "climb" up directory tree to ++ check permissions ++ */ ++struct dentry *get_parent_common(struct inode *child) ++{ ++ struct super_block *s; ++ struct inode *parent; ++ struct dentry dotdot; ++ struct dentry *dentry; ++ reiser4_key key; ++ int result; ++ ++ /* ++ * lookup dotdot entry. ++ */ ++ ++ s = child->i_sb; ++ memset(&dotdot, 0, sizeof(dotdot)); ++ dotdot.d_name.name = ".."; ++ dotdot.d_name.len = 2; ++ dotdot.d_op = &get_super_private(s)->ops.dentry; ++ ++ result = reiser4_lookup_name(child, &dotdot, &key); ++ if (result != 0) ++ return ERR_PTR(result); ++ ++ parent = reiser4_iget(s, &key, 1); ++ if (!IS_ERR(parent)) { ++ /* ++ * FIXME-NIKITA dubious: attributes are inherited from @child ++ * to @parent. But: ++ * ++ * (*) this is the only this we can do ++ * ++ * (*) attributes of light-weight object are inherited ++ * from a parent through which object was looked up first, ++ * so it is ambiguous anyway. ++ * ++ */ ++ check_light_weight(parent, child); ++ reiser4_iget_complete(parent); ++ dentry = d_alloc_anon(parent); ++ if (dentry == NULL) { ++ iput(parent); ++ dentry = ERR_PTR(RETERR(-ENOMEM)); ++ } else ++ dentry->d_op = &get_super_private(s)->ops.dentry; ++ } else if (PTR_ERR(parent) == -ENOENT) ++ dentry = ERR_PTR(RETERR(-ESTALE)); ++ else ++ dentry = (void *)parent; ++ return dentry; ++} ++ ++/* this is common implementation of is_name_acceptable method of dir ++ plugin ++ */ ++int is_name_acceptable_common(const struct inode *inode, /* directory to check */ ++ const char *name UNUSED_ARG, /* name to check */ ++ int len /* @name's length */ ) ++{ ++ assert("nikita-733", inode != NULL); ++ assert("nikita-734", name != NULL); ++ assert("nikita-735", len > 0); ++ ++ return len <= reiser4_max_filename_len(inode); ++} ++ ++/* there is no common implementation of build_entry_key method of dir ++ plugin. See plugin/dir/hashed_dir.c:build_entry_key_hashed() or ++ plugin/dir/seekable.c:build_entry_key_seekable() for example ++*/ ++ ++/* this is common implementation of build_readdir_key method of dir ++ plugin ++ see reiser4_readdir_common for more details ++*/ ++int build_readdir_key_common(struct file *dir /* directory being read */ , ++ reiser4_key * result /* where to store key */ ) ++{ ++ reiser4_file_fsdata *fdata; ++ struct inode *inode; ++ ++ assert("nikita-1361", dir != NULL); ++ assert("nikita-1362", result != NULL); ++ assert("nikita-1363", dir->f_dentry != NULL); ++ inode = dir->f_dentry->d_inode; ++ assert("nikita-1373", inode != NULL); ++ ++ fdata = reiser4_get_file_fsdata(dir); ++ if (IS_ERR(fdata)) ++ return PTR_ERR(fdata); ++ assert("nikita-1364", fdata != NULL); ++ return extract_key_from_de_id(get_inode_oid(inode), ++ &fdata->dir.readdir.position. ++ dir_entry_key, result); ++ ++} ++ ++void reiser4_adjust_dir_file(struct inode *, const struct dentry *, int offset, ++ int adj); ++ ++/* this is common implementation of add_entry method of dir plugin ++*/ ++int reiser4_add_entry_common(struct inode *object, /* directory to add new name ++ * in */ ++ struct dentry *where, /* new name */ ++ reiser4_object_create_data * data, /* parameters of ++ * new object */ ++ reiser4_dir_entry_desc * entry /* parameters of ++ * new directory ++ * entry */) ++{ ++ int result; ++ coord_t *coord; ++ lock_handle lh; ++ reiser4_dentry_fsdata *fsdata; ++ reiser4_block_nr reserve; ++ ++ assert("nikita-1114", object != NULL); ++ assert("nikita-1250", where != NULL); ++ ++ fsdata = reiser4_get_dentry_fsdata(where); ++ if (unlikely(IS_ERR(fsdata))) ++ return PTR_ERR(fsdata); ++ ++ reserve = inode_dir_plugin(object)->estimate.add_entry(object); ++ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) ++ return RETERR(-ENOSPC); ++ ++ init_lh(&lh); ++ coord = &fsdata->dec.entry_coord; ++ coord_clear_iplug(coord); ++ ++ /* check for this entry in a directory. This is plugin method. */ ++ result = reiser4_find_entry(object, where, &lh, ZNODE_WRITE_LOCK, ++ entry); ++ if (likely(result == -ENOENT)) { ++ /* add new entry. Just pass control to the directory ++ item plugin. */ ++ assert("nikita-1709", inode_dir_item_plugin(object)); ++ assert("nikita-2230", coord->node == lh.node); ++ reiser4_seal_done(&fsdata->dec.entry_seal); ++ result = ++ inode_dir_item_plugin(object)->s.dir.add_entry(object, ++ coord, &lh, ++ where, ++ entry); ++ if (result == 0) { ++ reiser4_adjust_dir_file(object, where, ++ fsdata->dec.pos + 1, +1); ++ INODE_INC_FIELD(object, i_size); ++ } ++ } else if (result == 0) { ++ assert("nikita-2232", coord->node == lh.node); ++ result = RETERR(-EEXIST); ++ } ++ done_lh(&lh); ++ ++ return result; ++} ++ ++/** ++ * rem_entry - remove entry from directory item ++ * @dir: ++ * @dentry: ++ * @entry: ++ * @coord: ++ * @lh: ++ * ++ * Checks that coordinate @coord is set properly and calls item plugin ++ * method to cut entry. ++ */ ++static int ++rem_entry(struct inode *dir, struct dentry *dentry, ++ reiser4_dir_entry_desc * entry, coord_t * coord, lock_handle * lh) ++{ ++ item_plugin *iplug; ++ struct inode *child; ++ ++ iplug = inode_dir_item_plugin(dir); ++ child = dentry->d_inode; ++ assert("nikita-3399", child != NULL); ++ ++ /* check that we are really destroying an entry for @child */ ++ if (REISER4_DEBUG) { ++ int result; ++ reiser4_key key; ++ ++ result = iplug->s.dir.extract_key(coord, &key); ++ if (result != 0) ++ return result; ++ if (get_key_objectid(&key) != get_inode_oid(child)) { ++ warning("nikita-3397", ++ "rem_entry: %#llx != %#llx\n", ++ get_key_objectid(&key), ++ (unsigned long long)get_inode_oid(child)); ++ return RETERR(-EIO); ++ } ++ } ++ return iplug->s.dir.rem_entry(dir, &dentry->d_name, coord, lh, entry); ++} ++ ++/** ++ * reiser4_rem_entry_common - remove entry from a directory ++ * @dir: directory to remove entry from ++ * @where: name that is being removed ++ * @entry: description of entry being removed ++ * ++ * This is common implementation of rem_entry method of dir plugin. ++ */ ++int reiser4_rem_entry_common(struct inode *dir, ++ struct dentry *dentry, ++ reiser4_dir_entry_desc *entry) ++{ ++ int result; ++ coord_t *coord; ++ lock_handle lh; ++ reiser4_dentry_fsdata *fsdata; ++ __u64 tograb; ++ ++ assert("nikita-1124", dir != NULL); ++ assert("nikita-1125", dentry != NULL); ++ ++ tograb = inode_dir_plugin(dir)->estimate.rem_entry(dir); ++ result = reiser4_grab_space(tograb, BA_CAN_COMMIT | BA_RESERVED); ++ if (result != 0) ++ return RETERR(-ENOSPC); ++ ++ init_lh(&lh); ++ ++ /* check for this entry in a directory. This is plugin method. */ ++ result = reiser4_find_entry(dir, dentry, &lh, ZNODE_WRITE_LOCK, entry); ++ fsdata = reiser4_get_dentry_fsdata(dentry); ++ if (IS_ERR(fsdata)) { ++ done_lh(&lh); ++ return PTR_ERR(fsdata); ++ } ++ ++ coord = &fsdata->dec.entry_coord; ++ ++ assert("nikita-3404", ++ get_inode_oid(dentry->d_inode) != get_inode_oid(dir) || ++ dir->i_size <= 1); ++ ++ coord_clear_iplug(coord); ++ if (result == 0) { ++ /* remove entry. Just pass control to the directory item ++ plugin. */ ++ assert("vs-542", inode_dir_item_plugin(dir)); ++ reiser4_seal_done(&fsdata->dec.entry_seal); ++ reiser4_adjust_dir_file(dir, dentry, fsdata->dec.pos, -1); ++ result = ++ WITH_COORD(coord, ++ rem_entry(dir, dentry, entry, coord, &lh)); ++ if (result == 0) { ++ if (dir->i_size >= 1) ++ INODE_DEC_FIELD(dir, i_size); ++ else { ++ warning("nikita-2509", "Dir %llu is runt", ++ (unsigned long long) ++ get_inode_oid(dir)); ++ result = RETERR(-EIO); ++ } ++ ++ assert("nikita-3405", dentry->d_inode->i_nlink != 1 || ++ dentry->d_inode->i_size != 2 || ++ inode_dir_plugin(dentry->d_inode) == NULL); ++ } ++ } ++ done_lh(&lh); ++ ++ return result; ++} ++ ++static reiser4_block_nr estimate_init(struct inode *parent, ++ struct inode *object); ++static int create_dot_dotdot(struct inode *object, struct inode *parent); ++ ++/* this is common implementation of init method of dir plugin ++ create "." and ".." entries ++*/ ++int reiser4_dir_init_common(struct inode *object, /* new directory */ ++ struct inode *parent, /* parent directory */ ++ reiser4_object_create_data * data /* info passed ++ * to us, this ++ * is filled by ++ * reiser4() ++ * syscall in ++ * particular */) ++{ ++ reiser4_block_nr reserve; ++ ++ assert("nikita-680", object != NULL); ++ assert("nikita-681", S_ISDIR(object->i_mode)); ++ assert("nikita-682", parent != NULL); ++ assert("nikita-684", data != NULL); ++ assert("nikita-686", data->id == DIRECTORY_FILE_PLUGIN_ID); ++ assert("nikita-687", object->i_mode & S_IFDIR); ++ ++ reserve = estimate_init(parent, object); ++ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) ++ return RETERR(-ENOSPC); ++ ++ return create_dot_dotdot(object, parent); ++} ++ ++/* this is common implementation of done method of dir plugin ++ remove "." entry ++*/ ++int reiser4_dir_done_common(struct inode *object /* object being deleted */ ) ++{ ++ int result; ++ reiser4_block_nr reserve; ++ struct dentry goodby_dots; ++ reiser4_dir_entry_desc entry; ++ ++ assert("nikita-1449", object != NULL); ++ ++ if (reiser4_inode_get_flag(object, REISER4_NO_SD)) ++ return 0; ++ ++ /* of course, this can be rewritten to sweep everything in one ++ reiser4_cut_tree(). */ ++ memset(&entry, 0, sizeof entry); ++ ++ /* FIXME: this done method is called from reiser4_delete_dir_common which ++ * reserved space already */ ++ reserve = inode_dir_plugin(object)->estimate.rem_entry(object); ++ if (reiser4_grab_space(reserve, BA_CAN_COMMIT | BA_RESERVED)) ++ return RETERR(-ENOSPC); ++ ++ memset(&goodby_dots, 0, sizeof goodby_dots); ++ entry.obj = goodby_dots.d_inode = object; ++ goodby_dots.d_name.name = "."; ++ goodby_dots.d_name.len = 1; ++ result = reiser4_rem_entry_common(object, &goodby_dots, &entry); ++ reiser4_free_dentry_fsdata(&goodby_dots); ++ if (unlikely(result != 0 && result != -ENOMEM && result != -ENOENT)) ++ /* only worth a warning ++ ++ "values of B will give rise to dom!\n" ++ -- v6src/s2/mv.c:89 ++ */ ++ warning("nikita-2252", "Cannot remove dot of %lli: %i", ++ (unsigned long long)get_inode_oid(object), result); ++ return 0; ++} ++ ++/* this is common implementation of attach method of dir plugin ++*/ ++int reiser4_attach_common(struct inode *child UNUSED_ARG, ++ struct inode *parent UNUSED_ARG) ++{ ++ assert("nikita-2647", child != NULL); ++ assert("nikita-2648", parent != NULL); ++ ++ return 0; ++} ++ ++/* this is common implementation of detach method of dir plugin ++ remove "..", decrease nlink on parent ++*/ ++int reiser4_detach_common(struct inode *object, struct inode *parent) ++{ ++ int result; ++ struct dentry goodby_dots; ++ reiser4_dir_entry_desc entry; ++ ++ assert("nikita-2885", object != NULL); ++ assert("nikita-2886", !reiser4_inode_get_flag(object, REISER4_NO_SD)); ++ ++ memset(&entry, 0, sizeof entry); ++ ++ /* NOTE-NIKITA this only works if @parent is -the- parent of ++ @object, viz. object whose key is stored in dotdot ++ entry. Wouldn't work with hard-links on directories. */ ++ memset(&goodby_dots, 0, sizeof goodby_dots); ++ entry.obj = goodby_dots.d_inode = parent; ++ goodby_dots.d_name.name = ".."; ++ goodby_dots.d_name.len = 2; ++ result = reiser4_rem_entry_common(object, &goodby_dots, &entry); ++ reiser4_free_dentry_fsdata(&goodby_dots); ++ if (result == 0) { ++ /* the dot should be the only entry remaining at this time... */ ++ assert("nikita-3400", ++ object->i_size == 1 && object->i_nlink <= 2); ++#if 0 ++ /* and, together with the only name directory can have, they ++ * provides for the last 2 remaining references. If we get ++ * here as part of error handling during mkdir, @object ++ * possibly has no name yet, so its nlink == 1. If we get here ++ * from rename (targeting empty directory), it has no name ++ * already, so its nlink == 1. */ ++ assert("nikita-3401", ++ object->i_nlink == 2 || object->i_nlink == 1); ++#endif ++ ++ /* decrement nlink of directory removed ".." pointed ++ to */ ++ reiser4_del_nlink(parent, NULL, 0); ++ } ++ return result; ++} ++ ++/* this is common implementation of estimate.add_entry method of ++ dir plugin ++ estimation of adding entry which supposes that entry is inserting a ++ unit into item ++*/ ++reiser4_block_nr estimate_add_entry_common(const struct inode * inode) ++{ ++ return estimate_one_insert_into_item(reiser4_tree_by_inode(inode)); ++} ++ ++/* this is common implementation of estimate.rem_entry method of dir ++ plugin ++*/ ++reiser4_block_nr estimate_rem_entry_common(const struct inode * inode) ++{ ++ return estimate_one_item_removal(reiser4_tree_by_inode(inode)); ++} ++ ++/* this is common implementation of estimate.unlink method of dir ++ plugin ++*/ ++reiser4_block_nr ++dir_estimate_unlink_common(const struct inode * parent, ++ const struct inode * object) ++{ ++ reiser4_block_nr res; ++ ++ /* hashed_rem_entry(object) */ ++ res = inode_dir_plugin(object)->estimate.rem_entry(object); ++ /* del_nlink(parent) */ ++ res += 2 * inode_file_plugin(parent)->estimate.update(parent); ++ ++ return res; ++} ++ ++/* ++ * helper for inode_ops ->lookup() and dir plugin's ->get_parent() ++ * methods: if @inode is a light-weight file, setup its credentials ++ * that are not stored in the stat-data in this case ++ */ ++void check_light_weight(struct inode *inode, struct inode *parent) ++{ ++ if (reiser4_inode_get_flag(inode, REISER4_LIGHT_WEIGHT)) { ++ inode->i_uid = parent->i_uid; ++ inode->i_gid = parent->i_gid; ++ /* clear light-weight flag. If inode would be read by any ++ other name, [ug]id wouldn't change. */ ++ reiser4_inode_clr_flag(inode, REISER4_LIGHT_WEIGHT); ++ } ++} ++ ++/* looks for name specified in @dentry in directory @parent and if name is ++ found - key of object found entry points to is stored in @entry->key */ ++int reiser4_lookup_name(struct inode *parent, /* inode of directory to lookup for ++ * name in */ ++ struct dentry *dentry, /* name to look for */ ++ reiser4_key * key /* place to store key */ ) ++{ ++ int result; ++ coord_t *coord; ++ lock_handle lh; ++ const char *name; ++ int len; ++ reiser4_dir_entry_desc entry; ++ reiser4_dentry_fsdata *fsdata; ++ ++ assert("nikita-1247", parent != NULL); ++ assert("nikita-1248", dentry != NULL); ++ assert("nikita-1123", dentry->d_name.name != NULL); ++ assert("vs-1486", ++ dentry->d_op == &get_super_private(parent->i_sb)->ops.dentry); ++ ++ name = dentry->d_name.name; ++ len = dentry->d_name.len; ++ ++ if (!inode_dir_plugin(parent)->is_name_acceptable(parent, name, len)) ++ /* some arbitrary error code to return */ ++ return RETERR(-ENAMETOOLONG); ++ ++ fsdata = reiser4_get_dentry_fsdata(dentry); ++ if (IS_ERR(fsdata)) ++ return PTR_ERR(fsdata); ++ ++ coord = &fsdata->dec.entry_coord; ++ coord_clear_iplug(coord); ++ init_lh(&lh); ++ ++ /* find entry in a directory. This is plugin method. */ ++ result = reiser4_find_entry(parent, dentry, &lh, ZNODE_READ_LOCK, ++ &entry); ++ if (result == 0) { ++ /* entry was found, extract object key from it. */ ++ result = ++ WITH_COORD(coord, ++ item_plugin_by_coord(coord)->s.dir. ++ extract_key(coord, key)); ++ } ++ done_lh(&lh); ++ return result; ++ ++} ++ ++/* helper for reiser4_dir_init_common(): estimate number of blocks to reserve */ ++static reiser4_block_nr ++estimate_init(struct inode *parent, struct inode *object) ++{ ++ reiser4_block_nr res = 0; ++ ++ assert("vpf-321", parent != NULL); ++ assert("vpf-322", object != NULL); ++ ++ /* hashed_add_entry(object) */ ++ res += inode_dir_plugin(object)->estimate.add_entry(object); ++ /* reiser4_add_nlink(object) */ ++ res += inode_file_plugin(object)->estimate.update(object); ++ /* hashed_add_entry(object) */ ++ res += inode_dir_plugin(object)->estimate.add_entry(object); ++ /* reiser4_add_nlink(parent) */ ++ res += inode_file_plugin(parent)->estimate.update(parent); ++ ++ return 0; ++} ++ ++/* helper function for reiser4_dir_init_common(). Create "." and ".." */ ++static int create_dot_dotdot(struct inode *object /* object to create dot and ++ * dotdot for */ , ++ struct inode *parent /* parent of @object */) ++{ ++ int result; ++ struct dentry dots_entry; ++ reiser4_dir_entry_desc entry; ++ ++ assert("nikita-688", object != NULL); ++ assert("nikita-689", S_ISDIR(object->i_mode)); ++ assert("nikita-691", parent != NULL); ++ ++ /* We store dot and dotdot as normal directory entries. This is ++ not necessary, because almost all information stored in them ++ is already in the stat-data of directory, the only thing ++ being missed is objectid of grand-parent directory that can ++ easily be added there as extension. ++ ++ But it is done the way it is done, because not storing dot ++ and dotdot will lead to the following complications: ++ ++ . special case handling in ->lookup(). ++ . addition of another extension to the sd. ++ . dependency on key allocation policy for stat data. ++ ++ */ ++ ++ memset(&entry, 0, sizeof entry); ++ memset(&dots_entry, 0, sizeof dots_entry); ++ entry.obj = dots_entry.d_inode = object; ++ dots_entry.d_name.name = "."; ++ dots_entry.d_name.len = 1; ++ result = reiser4_add_entry_common(object, &dots_entry, NULL, &entry); ++ reiser4_free_dentry_fsdata(&dots_entry); ++ ++ if (result == 0) { ++ result = reiser4_add_nlink(object, object, 0); ++ if (result == 0) { ++ entry.obj = dots_entry.d_inode = parent; ++ dots_entry.d_name.name = ".."; ++ dots_entry.d_name.len = 2; ++ result = reiser4_add_entry_common(object, ++ &dots_entry, NULL, &entry); ++ reiser4_free_dentry_fsdata(&dots_entry); ++ /* if creation of ".." failed, iput() will delete ++ object with ".". */ ++ if (result == 0) { ++ result = reiser4_add_nlink(parent, object, 0); ++ if (result != 0) ++ /* ++ * if we failed to bump i_nlink, try ++ * to remove ".." ++ */ ++ reiser4_detach_common(object, parent); ++ } ++ } ++ } ++ ++ if (result != 0) { ++ /* ++ * in the case of error, at least update stat-data so that, ++ * ->i_nlink updates are not lingering. ++ */ ++ reiser4_update_sd(object); ++ reiser4_update_sd(parent); ++ } ++ ++ return result; ++} ++ ++/* ++ * return 0 iff @coord contains a directory entry for the file with the name ++ * @name. ++ */ ++static int ++check_item(const struct inode *dir, const coord_t * coord, const char *name) ++{ ++ item_plugin *iplug; ++ char buf[DE_NAME_BUF_LEN]; ++ ++ iplug = item_plugin_by_coord(coord); ++ if (iplug == NULL) { ++ warning("nikita-1135", "Cannot get item plugin"); ++ print_coord("coord", coord, 1); ++ return RETERR(-EIO); ++ } else if (item_id_by_coord(coord) != ++ item_id_by_plugin(inode_dir_item_plugin(dir))) { ++ /* item id of current item does not match to id of items a ++ directory is built of */ ++ warning("nikita-1136", "Wrong item plugin"); ++ print_coord("coord", coord, 1); ++ return RETERR(-EIO); ++ } ++ assert("nikita-1137", iplug->s.dir.extract_name); ++ ++ /* Compare name stored in this entry with name we are looking for. ++ ++ NOTE-NIKITA Here should go code for support of something like ++ unicode, code tables, etc. ++ */ ++ return !!strcmp(name, iplug->s.dir.extract_name(coord, buf)); ++} ++ ++static int ++check_entry(const struct inode *dir, coord_t * coord, const struct qstr *name) ++{ ++ return WITH_COORD(coord, check_item(dir, coord, name->name)); ++} ++ ++/* ++ * argument package used by entry_actor to scan entries with identical keys. ++ */ ++typedef struct entry_actor_args { ++ /* name we are looking for */ ++ const char *name; ++ /* key of directory entry. entry_actor() scans through sequence of ++ * items/units having the same key */ ++ reiser4_key *key; ++ /* how many entries with duplicate key was scanned so far. */ ++ int non_uniq; ++#if REISER4_USE_COLLISION_LIMIT ++ /* scan limit */ ++ int max_non_uniq; ++#endif ++ /* return parameter: set to true, if ->name wasn't found */ ++ int not_found; ++ /* what type of lock to take when moving to the next node during ++ * scan */ ++ znode_lock_mode mode; ++ ++ /* last coord that was visited during scan */ ++ coord_t last_coord; ++ /* last node locked during scan */ ++ lock_handle last_lh; ++ /* inode of directory */ ++ const struct inode *inode; ++} entry_actor_args; ++ ++/* Function called by reiser4_find_entry() to look for given name ++ in the directory. */ ++static int entry_actor(reiser4_tree * tree UNUSED_ARG /* tree being scanned */ , ++ coord_t * coord /* current coord */ , ++ lock_handle * lh /* current lock handle */ , ++ void *entry_actor_arg /* argument to scan */ ) ++{ ++ reiser4_key unit_key; ++ entry_actor_args *args; ++ ++ assert("nikita-1131", tree != NULL); ++ assert("nikita-1132", coord != NULL); ++ assert("nikita-1133", entry_actor_arg != NULL); ++ ++ args = entry_actor_arg; ++ ++args->non_uniq; ++#if REISER4_USE_COLLISION_LIMIT ++ if (args->non_uniq > args->max_non_uniq) { ++ args->not_found = 1; ++ /* hash collision overflow. */ ++ return RETERR(-EBUSY); ++ } ++#endif ++ ++ /* ++ * did we just reach the end of the sequence of items/units with ++ * identical keys? ++ */ ++ if (!keyeq(args->key, unit_key_by_coord(coord, &unit_key))) { ++ assert("nikita-1791", ++ keylt(args->key, unit_key_by_coord(coord, &unit_key))); ++ args->not_found = 1; ++ args->last_coord.between = AFTER_UNIT; ++ return 0; ++ } ++ ++ coord_dup(&args->last_coord, coord); ++ /* ++ * did scan just moved to the next node? ++ */ ++ if (args->last_lh.node != lh->node) { ++ int lock_result; ++ ++ /* ++ * if so, lock new node with the mode requested by the caller ++ */ ++ done_lh(&args->last_lh); ++ assert("nikita-1896", znode_is_any_locked(lh->node)); ++ lock_result = longterm_lock_znode(&args->last_lh, lh->node, ++ args->mode, ZNODE_LOCK_HIPRI); ++ if (lock_result != 0) ++ return lock_result; ++ } ++ return check_item(args->inode, coord, args->name); ++} ++ ++/* Look for given @name within directory @dir. ++ ++ This is called during lookup, creation and removal of directory ++ entries and on reiser4_rename_common ++ ++ First calculate key that directory entry for @name would have. Search ++ for this key in the tree. If such key is found, scan all items with ++ the same key, checking name in each directory entry along the way. ++*/ ++int reiser4_find_entry(struct inode *dir, /* directory to scan */ ++ struct dentry *de, /* name to search for */ ++ lock_handle * lh, /* resulting lock handle */ ++ znode_lock_mode mode, /* required lock mode */ ++ reiser4_dir_entry_desc * entry /* parameters of found ++ directory entry */) ++{ ++ const struct qstr *name; ++ seal_t *seal; ++ coord_t *coord; ++ int result; ++ __u32 flags; ++ de_location *dec; ++ reiser4_dentry_fsdata *fsdata; ++ ++ assert("nikita-1130", lh != NULL); ++ assert("nikita-1128", dir != NULL); ++ ++ name = &de->d_name; ++ assert("nikita-1129", name != NULL); ++ ++ /* dentry private data don't require lock, because dentry ++ manipulations are protected by i_mutex on parent. ++ ++ This is not so for inodes, because there is no -the- parent in ++ inode case. ++ */ ++ fsdata = reiser4_get_dentry_fsdata(de); ++ if (IS_ERR(fsdata)) ++ return PTR_ERR(fsdata); ++ dec = &fsdata->dec; ++ ++ coord = &dec->entry_coord; ++ coord_clear_iplug(coord); ++ seal = &dec->entry_seal; ++ /* compose key of directory entry for @name */ ++ inode_dir_plugin(dir)->build_entry_key(dir, name, &entry->key); ++ ++ if (reiser4_seal_is_set(seal)) { ++ /* check seal */ ++ result = reiser4_seal_validate(seal, coord, &entry->key, ++ lh, mode, ZNODE_LOCK_LOPRI); ++ if (result == 0) { ++ /* key was found. Check that it is really item we are ++ looking for. */ ++ result = check_entry(dir, coord, name); ++ if (result == 0) ++ return 0; ++ } ++ } ++ flags = (mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0; ++ /* ++ * find place in the tree where directory item should be located. ++ */ ++ result = reiser4_object_lookup(dir, &entry->key, coord, lh, mode, ++ FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL, ++ flags, NULL /*ra_info */ ); ++ if (result == CBK_COORD_FOUND) { ++ entry_actor_args arg; ++ ++ /* fast path: no hash collisions */ ++ result = check_entry(dir, coord, name); ++ if (result == 0) { ++ reiser4_seal_init(seal, coord, &entry->key); ++ dec->pos = 0; ++ } else if (result > 0) { ++ /* Iterate through all units with the same keys. */ ++ arg.name = name->name; ++ arg.key = &entry->key; ++ arg.not_found = 0; ++ arg.non_uniq = 0; ++#if REISER4_USE_COLLISION_LIMIT ++ arg.max_non_uniq = max_hash_collisions(dir); ++ assert("nikita-2851", arg.max_non_uniq > 1); ++#endif ++ arg.mode = mode; ++ arg.inode = dir; ++ coord_init_zero(&arg.last_coord); ++ init_lh(&arg.last_lh); ++ ++ result = reiser4_iterate_tree ++ (reiser4_tree_by_inode(dir), ++ coord, lh, ++ entry_actor, &arg, mode, 1); ++ /* if end of the tree or extent was reached during ++ scanning. */ ++ if (arg.not_found || (result == -E_NO_NEIGHBOR)) { ++ /* step back */ ++ done_lh(lh); ++ ++ result = zload(arg.last_coord.node); ++ if (result == 0) { ++ coord_clear_iplug(&arg.last_coord); ++ coord_dup(coord, &arg.last_coord); ++ move_lh(lh, &arg.last_lh); ++ result = RETERR(-ENOENT); ++ zrelse(arg.last_coord.node); ++ --arg.non_uniq; ++ } ++ } ++ ++ done_lh(&arg.last_lh); ++ if (result == 0) ++ reiser4_seal_init(seal, coord, &entry->key); ++ ++ if (result == 0 || result == -ENOENT) { ++ assert("nikita-2580", arg.non_uniq > 0); ++ dec->pos = arg.non_uniq - 1; ++ } ++ } ++ } else ++ dec->pos = -1; ++ return result; ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/disk_format/Makefile b/fs/reiser4/plugin/disk_format/Makefile +new file mode 100644 +index 0000000..e4e9e54 +--- /dev/null ++++ b/fs/reiser4/plugin/disk_format/Makefile +@@ -0,0 +1,5 @@ ++obj-$(CONFIG_REISER4_FS) += df_plugins.o ++ ++df_plugins-objs := \ ++ disk_format40.o \ ++ disk_format.o +diff --git a/fs/reiser4/plugin/disk_format/disk_format.c b/fs/reiser4/plugin/disk_format/disk_format.c +new file mode 100644 +index 0000000..d785106 +--- /dev/null ++++ b/fs/reiser4/plugin/disk_format/disk_format.c +@@ -0,0 +1,38 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "../../debug.h" ++#include "../plugin_header.h" ++#include "disk_format40.h" ++#include "disk_format.h" ++#include "../plugin.h" ++ ++/* initialization of disk layout plugins */ ++disk_format_plugin format_plugins[LAST_FORMAT_ID] = { ++ [FORMAT40_ID] = { ++ .h = { ++ .type_id = REISER4_FORMAT_PLUGIN_TYPE, ++ .id = FORMAT40_ID, ++ .pops = NULL, ++ .label = "reiser40", ++ .desc = "standard disk layout for reiser40", ++ .linkage = {NULL, NULL} ++ }, ++ .init_format = init_format_format40, ++ .root_dir_key = root_dir_key_format40, ++ .release = release_format40, ++ .log_super = log_super_format40, ++ .check_open = check_open_format40, ++ .version_update = version_update_format40 ++ } ++}; ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/disk_format/disk_format.h b/fs/reiser4/plugin/disk_format/disk_format.h +new file mode 100644 +index 0000000..b9c53ac +--- /dev/null ++++ b/fs/reiser4/plugin/disk_format/disk_format.h +@@ -0,0 +1,27 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* identifiers for disk layouts, they are also used as indexes in array of disk ++ plugins */ ++ ++#if !defined( __REISER4_DISK_FORMAT_H__ ) ++#define __REISER4_DISK_FORMAT_H__ ++ ++typedef enum { ++ /* standard reiser4 disk layout plugin id */ ++ FORMAT40_ID, ++ LAST_FORMAT_ID ++} disk_format_id; ++ ++/* __REISER4_DISK_FORMAT_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/disk_format/disk_format40.c b/fs/reiser4/plugin/disk_format/disk_format40.c +new file mode 100644 +index 0000000..17718f0 +--- /dev/null ++++ b/fs/reiser4/plugin/disk_format/disk_format40.c +@@ -0,0 +1,655 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "../../debug.h" ++#include "../../dformat.h" ++#include "../../key.h" ++#include "../node/node.h" ++#include "../space/space_allocator.h" ++#include "disk_format40.h" ++#include "../plugin.h" ++#include "../../txnmgr.h" ++#include "../../jnode.h" ++#include "../../tree.h" ++#include "../../super.h" ++#include "../../wander.h" ++#include "../../inode.h" ++#include "../../ktxnmgrd.h" ++#include "../../status_flags.h" ++ ++#include /* for __u?? */ ++#include /* for struct super_block */ ++#include ++ ++/* reiser 4.0 default disk layout */ ++ ++/* Amount of free blocks needed to perform release_format40 when fs gets ++ mounted RW: 1 for SB, 1 for non-leaves in overwrite set, 2 for tx header ++ & tx record. */ ++#define RELEASE_RESERVED 4 ++ ++/* The greatest supported format40 version number */ ++#define FORMAT40_VERSION PLUGIN_LIBRARY_VERSION ++ ++/* This flag indicates that backup should be updated ++ (the update is performed by fsck) */ ++#define FORMAT40_UPDATE_BACKUP (1 << 31) ++ ++/* functions to access fields of format40_disk_super_block */ ++static __u64 get_format40_block_count(const format40_disk_super_block * sb) ++{ ++ return le64_to_cpu(get_unaligned(&sb->block_count)); ++} ++ ++static __u64 get_format40_free_blocks(const format40_disk_super_block * sb) ++{ ++ return le64_to_cpu(get_unaligned(&sb->free_blocks)); ++} ++ ++static __u64 get_format40_root_block(const format40_disk_super_block * sb) ++{ ++ return le64_to_cpu(get_unaligned(&sb->root_block)); ++} ++ ++static __u16 get_format40_tree_height(const format40_disk_super_block * sb) ++{ ++ return le16_to_cpu(get_unaligned(&sb->tree_height)); ++} ++ ++static __u64 get_format40_file_count(const format40_disk_super_block * sb) ++{ ++ return le64_to_cpu(get_unaligned(&sb->file_count)); ++} ++ ++static __u64 get_format40_oid(const format40_disk_super_block * sb) ++{ ++ return le64_to_cpu(get_unaligned(&sb->oid)); ++} ++ ++static __u32 get_format40_mkfs_id(const format40_disk_super_block * sb) ++{ ++ return le32_to_cpu(get_unaligned(&sb->mkfs_id)); ++} ++ ++static __u64 get_format40_flags(const format40_disk_super_block * sb) ++{ ++ return le64_to_cpu(get_unaligned(&sb->flags)); ++} ++ ++static __u32 get_format40_version(const format40_disk_super_block * sb) ++{ ++ return le32_to_cpu(get_unaligned(&sb->version)) & ++ ~FORMAT40_UPDATE_BACKUP; ++} ++ ++static int update_backup_version(const format40_disk_super_block * sb) ++{ ++ return (le32_to_cpu(get_unaligned(&sb->version)) & ++ FORMAT40_UPDATE_BACKUP); ++} ++ ++static int update_disk_version(const format40_disk_super_block * sb) ++{ ++ return (get_format40_version(sb) < FORMAT40_VERSION); ++} ++ ++static int incomplete_compatibility(const format40_disk_super_block * sb) ++{ ++ return (get_format40_version(sb) > FORMAT40_VERSION); ++} ++ ++static format40_super_info *get_sb_info(struct super_block *super) ++{ ++ return &get_super_private(super)->u.format40; ++} ++ ++static int consult_diskmap(struct super_block *s) ++{ ++ format40_super_info *info; ++ journal_location *jloc; ++ ++ info = get_sb_info(s); ++ jloc = &get_super_private(s)->jloc; ++ /* Default format-specific locations, if there is nothing in ++ * diskmap */ ++ jloc->footer = FORMAT40_JOURNAL_FOOTER_BLOCKNR; ++ jloc->header = FORMAT40_JOURNAL_HEADER_BLOCKNR; ++ info->loc.super = FORMAT40_OFFSET / s->s_blocksize; ++#ifdef CONFIG_REISER4_BADBLOCKS ++ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JF, ++ &jloc->footer); ++ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JH, ++ &jloc->header); ++ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_SUPER, ++ &info->loc.super); ++#endif ++ return 0; ++} ++ ++/* find any valid super block of disk_format40 (even if the first ++ super block is destroyed), will change block numbers of actual journal header/footer (jf/jh) ++ if needed */ ++static struct buffer_head *find_a_disk_format40_super_block(struct super_block ++ *s) ++{ ++ struct buffer_head *super_bh; ++ format40_disk_super_block *disk_sb; ++ format40_super_info *info; ++ ++ assert("umka-487", s != NULL); ++ ++ info = get_sb_info(s); ++ ++ super_bh = sb_bread(s, info->loc.super); ++ if (super_bh == NULL) ++ return ERR_PTR(RETERR(-EIO)); ++ ++ disk_sb = (format40_disk_super_block *) super_bh->b_data; ++ if (strncmp(disk_sb->magic, FORMAT40_MAGIC, sizeof(FORMAT40_MAGIC))) { ++ brelse(super_bh); ++ return ERR_PTR(RETERR(-EINVAL)); ++ } ++ ++ reiser4_set_block_count(s, le64_to_cpu(get_unaligned(&disk_sb->block_count))); ++ reiser4_set_data_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)) - ++ le64_to_cpu(get_unaligned(&disk_sb->free_blocks))); ++ reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->free_blocks))); ++ ++ return super_bh; ++} ++ ++/* find the most recent version of super block. This is called after journal is ++ replayed */ ++static struct buffer_head *read_super_block(struct super_block *s UNUSED_ARG) ++{ ++ /* Here the most recent superblock copy has to be read. However, as ++ journal replay isn't complete, we are using ++ find_a_disk_format40_super_block() function. */ ++ return find_a_disk_format40_super_block(s); ++} ++ ++static int get_super_jnode(struct super_block *s) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(s); ++ jnode *sb_jnode; ++ int ret; ++ ++ sb_jnode = reiser4_alloc_io_head(&get_sb_info(s)->loc.super); ++ ++ ret = jload(sb_jnode); ++ ++ if (ret) { ++ reiser4_drop_io_head(sb_jnode); ++ return ret; ++ } ++ ++ pin_jnode_data(sb_jnode); ++ jrelse(sb_jnode); ++ ++ sbinfo->u.format40.sb_jnode = sb_jnode; ++ ++ return 0; ++} ++ ++static void done_super_jnode(struct super_block *s) ++{ ++ jnode *sb_jnode = get_super_private(s)->u.format40.sb_jnode; ++ ++ if (sb_jnode) { ++ unpin_jnode_data(sb_jnode); ++ reiser4_drop_io_head(sb_jnode); ++ } ++} ++ ++typedef enum format40_init_stage { ++ NONE_DONE = 0, ++ CONSULT_DISKMAP, ++ FIND_A_SUPER, ++ INIT_JOURNAL_INFO, ++ INIT_STATUS, ++ JOURNAL_REPLAY, ++ READ_SUPER, ++ KEY_CHECK, ++ INIT_OID, ++ INIT_TREE, ++ JOURNAL_RECOVER, ++ INIT_SA, ++ INIT_JNODE, ++ ALL_DONE ++} format40_init_stage; ++ ++static format40_disk_super_block *copy_sb(const struct buffer_head *super_bh) ++{ ++ format40_disk_super_block *sb_copy; ++ ++ sb_copy = kmalloc(sizeof(format40_disk_super_block), ++ reiser4_ctx_gfp_mask_get()); ++ if (sb_copy == NULL) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ memcpy(sb_copy, ((format40_disk_super_block *) super_bh->b_data), ++ sizeof(format40_disk_super_block)); ++ return sb_copy; ++} ++ ++static int check_key_format(const format40_disk_super_block *sb_copy) ++{ ++ if (!equi(REISER4_LARGE_KEY, ++ get_format40_flags(sb_copy) & (1 << FORMAT40_LARGE_KEYS))) { ++ warning("nikita-3228", "Key format mismatch. " ++ "Only %s keys are supported.", ++ REISER4_LARGE_KEY ? "large" : "small"); ++ return RETERR(-EINVAL); ++ } ++ return 0; ++} ++ ++/** ++ * try_init_format40 ++ * @super: ++ * @stage: ++ * ++ */ ++static int try_init_format40(struct super_block *super, ++ format40_init_stage *stage) ++{ ++ int result; ++ struct buffer_head *super_bh; ++ reiser4_super_info_data *sbinfo; ++ format40_disk_super_block *sb_copy; ++ tree_level height; ++ reiser4_block_nr root_block; ++ node_plugin *nplug; ++ ++ assert("vs-475", super != NULL); ++ assert("vs-474", get_super_private(super)); ++ ++ *stage = NONE_DONE; ++ ++ result = consult_diskmap(super); ++ if (result) ++ return result; ++ *stage = CONSULT_DISKMAP; ++ ++ super_bh = find_a_disk_format40_super_block(super); ++ if (IS_ERR(super_bh)) ++ return PTR_ERR(super_bh); ++ brelse(super_bh); ++ *stage = FIND_A_SUPER; ++ ++ /* ok, we are sure that filesystem format is a format40 format */ ++ ++ /* map jnodes for journal control blocks (header, footer) to disk */ ++ result = reiser4_init_journal_info(super); ++ if (result) ++ return result; ++ *stage = INIT_JOURNAL_INFO; ++ ++ /* ok, we are sure that filesystem format is a format40 format */ ++ /* Now check it's state */ ++ result = reiser4_status_init(FORMAT40_STATUS_BLOCKNR); ++ if (result != 0 && result != -EINVAL) ++ /* -EINVAL means there is no magic, so probably just old ++ * fs. */ ++ return result; ++ *stage = INIT_STATUS; ++ ++ result = reiser4_status_query(NULL, NULL); ++ if (result == REISER4_STATUS_MOUNT_WARN) ++ notice("vpf-1363", "Warning: mounting %s with errors.", ++ super->s_id); ++ if (result == REISER4_STATUS_MOUNT_RO) ++ notice("vpf-1364", "Warning: mounting %s with fatal errors," ++ " forcing read-only mount.", super->s_id); ++ result = reiser4_journal_replay(super); ++ if (result) ++ return result; ++ *stage = JOURNAL_REPLAY; ++ ++ super_bh = read_super_block(super); ++ if (IS_ERR(super_bh)) ++ return PTR_ERR(super_bh); ++ *stage = READ_SUPER; ++ ++ /* allocate and make a copy of format40_disk_super_block */ ++ sb_copy = copy_sb(super_bh); ++ brelse(super_bh); ++ ++ if (IS_ERR(sb_copy)) ++ return PTR_ERR(sb_copy); ++ printk("reiser4: %s: found disk format 4.0.%u.\n", ++ super->s_id, ++ get_format40_version(sb_copy)); ++ if (incomplete_compatibility(sb_copy)) ++ printk("reiser4: Warning: The last completely supported " ++ "version of disk format40 is %u. Some objects of " ++ "the semantic tree can be unaccessible.\n", ++ FORMAT40_VERSION); ++ /* make sure that key format of kernel and filesystem match */ ++ result = check_key_format(sb_copy); ++ if (result) { ++ kfree(sb_copy); ++ return result; ++ } ++ *stage = KEY_CHECK; ++ ++ result = oid_init_allocator(super, get_format40_file_count(sb_copy), ++ get_format40_oid(sb_copy)); ++ if (result) { ++ kfree(sb_copy); ++ return result; ++ } ++ *stage = INIT_OID; ++ ++ /* get things necessary to init reiser4_tree */ ++ root_block = get_format40_root_block(sb_copy); ++ height = get_format40_tree_height(sb_copy); ++ nplug = node_plugin_by_id(NODE40_ID); ++ ++ /* initialize reiser4_super_info_data */ ++ sbinfo = get_super_private(super); ++ assert("", sbinfo->tree.super == super); ++ /* init reiser4_tree for the filesystem */ ++ result = reiser4_init_tree(&sbinfo->tree, &root_block, height, nplug); ++ if (result) { ++ kfree(sb_copy); ++ return result; ++ } ++ *stage = INIT_TREE; ++ ++ /* ++ * initialize reiser4_super_info_data with data from format40 super ++ * block ++ */ ++ sbinfo->default_uid = 0; ++ sbinfo->default_gid = 0; ++ sbinfo->mkfs_id = get_format40_mkfs_id(sb_copy); ++ /* number of blocks in filesystem and reserved space */ ++ reiser4_set_block_count(super, get_format40_block_count(sb_copy)); ++ sbinfo->blocks_free = get_format40_free_blocks(sb_copy); ++ sbinfo->version = get_format40_version(sb_copy); ++ kfree(sb_copy); ++ ++ if (update_backup_version(sb_copy)) ++ printk("reiser4: Warning: metadata backup is not updated. " ++ "Please run 'fsck.reiser4 --fix' on %s.\n", ++ super->s_id); ++ ++ sbinfo->fsuid = 0; ++ sbinfo->fs_flags |= (1 << REISER4_ADG); /* hard links for directories ++ * are not supported */ ++ sbinfo->fs_flags |= (1 << REISER4_ONE_NODE_PLUGIN); /* all nodes in ++ * layout 40 are ++ * of one ++ * plugin */ ++ /* sbinfo->tmgr is initialized already */ ++ ++ /* recover sb data which were logged separately from sb block */ ++ ++ /* NOTE-NIKITA: reiser4_journal_recover_sb_data() calls ++ * oid_init_allocator() and reiser4_set_free_blocks() with new ++ * data. What's the reason to call them above? */ ++ result = reiser4_journal_recover_sb_data(super); ++ if (result != 0) ++ return result; ++ *stage = JOURNAL_RECOVER; ++ ++ /* ++ * Set number of used blocks. The number of used blocks is not stored ++ * neither in on-disk super block nor in the journal footer blocks. At ++ * this moment actual values of total blocks and free block counters ++ * are set in the reiser4 super block (in-memory structure) and we can ++ * calculate number of used blocks from them. ++ */ ++ reiser4_set_data_blocks(super, ++ reiser4_block_count(super) - ++ reiser4_free_blocks(super)); ++ ++#if REISER4_DEBUG ++ sbinfo->min_blocks_used = 16 /* reserved area */ + ++ 2 /* super blocks */ + ++ 2 /* journal footer and header */ ; ++#endif ++ ++ /* init disk space allocator */ ++ result = sa_init_allocator(reiser4_get_space_allocator(super), ++ super, NULL); ++ if (result) ++ return result; ++ *stage = INIT_SA; ++ ++ result = get_super_jnode(super); ++ if (result == 0) ++ *stage = ALL_DONE; ++ return result; ++} ++ ++/* plugin->u.format.get_ready */ ++int init_format_format40(struct super_block *s, void *data UNUSED_ARG) ++{ ++ int result; ++ format40_init_stage stage; ++ ++ result = try_init_format40(s, &stage); ++ switch (stage) { ++ case ALL_DONE: ++ assert("nikita-3458", result == 0); ++ break; ++ case INIT_JNODE: ++ done_super_jnode(s); ++ case INIT_SA: ++ sa_destroy_allocator(reiser4_get_space_allocator(s), s); ++ case JOURNAL_RECOVER: ++ case INIT_TREE: ++ reiser4_done_tree(&get_super_private(s)->tree); ++ case INIT_OID: ++ case KEY_CHECK: ++ case READ_SUPER: ++ case JOURNAL_REPLAY: ++ case INIT_STATUS: ++ reiser4_status_finish(); ++ case INIT_JOURNAL_INFO: ++ reiser4_done_journal_info(s); ++ case FIND_A_SUPER: ++ case CONSULT_DISKMAP: ++ case NONE_DONE: ++ break; ++ default: ++ impossible("nikita-3457", "init stage: %i", stage); ++ } ++ ++ if (!rofs_super(s) && reiser4_free_blocks(s) < RELEASE_RESERVED) ++ return RETERR(-ENOSPC); ++ ++ return result; ++} ++ ++static void pack_format40_super(const struct super_block *s, char *data) ++{ ++ format40_disk_super_block *super_data = ++ (format40_disk_super_block *) data; ++ ++ reiser4_super_info_data *sbinfo = get_super_private(s); ++ ++ assert("zam-591", data != NULL); ++ ++ put_unaligned(cpu_to_le64(reiser4_free_committed_blocks(s)), ++ &super_data->free_blocks); ++ ++ put_unaligned(cpu_to_le64(sbinfo->tree.root_block), ++ &super_data->root_block); ++ ++ put_unaligned(cpu_to_le64(oid_next(s)), ++ &super_data->oid); ++ ++ put_unaligned(cpu_to_le64(oids_used(s)), ++ &super_data->file_count); ++ ++ put_unaligned(cpu_to_le16(sbinfo->tree.height), ++ &super_data->tree_height); ++ ++ if (update_disk_version(super_data)) { ++ __u32 version = FORMAT40_VERSION | FORMAT40_UPDATE_BACKUP; ++ ++ put_unaligned(cpu_to_le32(version), &super_data->version); ++ } ++} ++ ++/* plugin->u.format.log_super ++ return a jnode which should be added to transaction when the super block ++ gets logged */ ++jnode *log_super_format40(struct super_block *s) ++{ ++ jnode *sb_jnode; ++ ++ sb_jnode = get_super_private(s)->u.format40.sb_jnode; ++ ++ jload(sb_jnode); ++ ++ pack_format40_super(s, jdata(sb_jnode)); ++ ++ jrelse(sb_jnode); ++ ++ return sb_jnode; ++} ++ ++/* plugin->u.format.release */ ++int release_format40(struct super_block *s) ++{ ++ int ret; ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = get_super_private(s); ++ assert("zam-579", sbinfo != NULL); ++ ++ if (!rofs_super(s)) { ++ ret = reiser4_capture_super_block(s); ++ if (ret != 0) ++ warning("vs-898", ++ "reiser4_capture_super_block failed: %d", ++ ret); ++ ++ ret = txnmgr_force_commit_all(s, 1); ++ if (ret != 0) ++ warning("jmacd-74438", "txn_force failed: %d", ret); ++ ++ all_grabbed2free(); ++ } ++ ++ sa_destroy_allocator(&sbinfo->space_allocator, s); ++ reiser4_done_journal_info(s); ++ done_super_jnode(s); ++ ++ rcu_barrier(); ++ reiser4_done_tree(&sbinfo->tree); ++ /* call finish_rcu(), because some znode were "released" in ++ * reiser4_done_tree(). */ ++ rcu_barrier(); ++ ++ return 0; ++} ++ ++#define FORMAT40_ROOT_LOCALITY 41 ++#define FORMAT40_ROOT_OBJECTID 42 ++ ++/* plugin->u.format.root_dir_key */ ++const reiser4_key *root_dir_key_format40(const struct super_block *super ++ UNUSED_ARG) ++{ ++ static const reiser4_key FORMAT40_ROOT_DIR_KEY = { ++ .el = { ++ __constant_cpu_to_le64((FORMAT40_ROOT_LOCALITY << 4) | KEY_SD_MINOR), ++#if REISER4_LARGE_KEY ++ ON_LARGE_KEY(0ull,) ++#endif ++ __constant_cpu_to_le64(FORMAT40_ROOT_OBJECTID), ++ 0ull ++ } ++ }; ++ ++ return &FORMAT40_ROOT_DIR_KEY; ++} ++ ++/* plugin->u.format.check_open. ++ Check the opened object for validness. For now it checks for the valid oid & ++ locality only, can be improved later and it its work may depend on the mount ++ options. */ ++int check_open_format40(const struct inode *object) ++{ ++ oid_t max, oid; ++ ++ max = oid_next(object->i_sb) - 1; ++ ++ /* Check the oid. */ ++ oid = get_inode_oid(object); ++ if (oid > max) { ++ warning("vpf-1360", "The object with the oid %llu " ++ "greater then the max used oid %llu found.", ++ (unsigned long long)oid, (unsigned long long)max); ++ ++ return RETERR(-EIO); ++ } ++ ++ /* Check the locality. */ ++ oid = reiser4_inode_data(object)->locality_id; ++ if (oid > max) { ++ warning("vpf-1361", "The object with the locality %llu " ++ "greater then the max used oid %llu found.", ++ (unsigned long long)oid, (unsigned long long)max); ++ ++ return RETERR(-EIO); ++ } ++ ++ return 0; ++} ++ ++/* plugin->u.format.version_update. ++ Perform all version update operations from the on-disk ++ format40_disk_super_block.version on disk to FORMAT40_VERSION. ++ */ ++int version_update_format40(struct super_block *super) { ++ txn_handle * trans; ++ lock_handle lh; ++ txn_atom *atom; ++ int ret; ++ ++ /* Nothing to do if RO mount or the on-disk version is not less. */ ++ if (super->s_flags & MS_RDONLY) ++ return 0; ++ ++ if (get_super_private(super)->version >= FORMAT40_VERSION) ++ return 0; ++ ++ printk("reiser4: Updating disk format to 4.0.%u. The reiser4 metadata " ++ "backup is left unchanged. Please run 'fsck.reiser4 --fix' " ++ "on %s to update it too.\n", FORMAT40_VERSION, super->s_id); ++ ++ /* Mark the uber znode dirty to call log_super on write_logs. */ ++ init_lh(&lh); ++ ret = get_uber_znode(reiser4_get_tree(super), ZNODE_WRITE_LOCK, ++ ZNODE_LOCK_HIPRI, &lh); ++ if (ret != 0) ++ return ret; ++ ++ znode_make_dirty(lh.node); ++ done_lh(&lh); ++ ++ /* Update the backup blocks. */ ++ ++ /* Force write_logs immediately. */ ++ trans = get_current_context()->trans; ++ atom = get_current_atom_locked(); ++ assert("vpf-1906", atom != NULL); ++ ++ spin_lock_txnh(trans); ++ return force_commit_atom(trans); ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/disk_format/disk_format40.h b/fs/reiser4/plugin/disk_format/disk_format40.h +new file mode 100644 +index 0000000..7fc1772 +--- /dev/null ++++ b/fs/reiser4/plugin/disk_format/disk_format40.h +@@ -0,0 +1,109 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* this file contains: ++ - definition of ondisk super block of standart disk layout for ++ reiser 4.0 (layout 40) ++ - definition of layout 40 specific portion of in-core super block ++ - declarations of functions implementing methods of layout plugin ++ for layout 40 ++ - declarations of functions used to get/set fields in layout 40 super block ++*/ ++ ++#ifndef __DISK_FORMAT40_H__ ++#define __DISK_FORMAT40_H__ ++ ++/* magic for default reiser4 layout */ ++#define FORMAT40_MAGIC "ReIsEr40FoRmAt" ++#define FORMAT40_OFFSET (REISER4_MASTER_OFFSET + PAGE_CACHE_SIZE) ++ ++#include "../../dformat.h" ++ ++#include /* for struct super_block */ ++ ++typedef enum { ++ FORMAT40_LARGE_KEYS ++} format40_flags; ++ ++/* ondisk super block for format 40. It is 512 bytes long */ ++typedef struct format40_disk_super_block { ++ /* 0 */ d64 block_count; ++ /* number of block in a filesystem */ ++ /* 8 */ d64 free_blocks; ++ /* number of free blocks */ ++ /* 16 */ d64 root_block; ++ /* filesystem tree root block */ ++ /* 24 */ d64 oid; ++ /* smallest free objectid */ ++ /* 32 */ d64 file_count; ++ /* number of files in a filesystem */ ++ /* 40 */ d64 flushes; ++ /* number of times super block was ++ flushed. Needed if format 40 ++ will have few super blocks */ ++ /* 48 */ d32 mkfs_id; ++ /* unique identifier of fs */ ++ /* 52 */ char magic[16]; ++ /* magic string ReIsEr40FoRmAt */ ++ /* 68 */ d16 tree_height; ++ /* height of filesystem tree */ ++ /* 70 */ d16 formatting_policy; ++ /* not used anymore */ ++ /* 72 */ d64 flags; ++ /* 80 */ d32 version; ++ /* on-disk format version number ++ initially assigned by mkfs as the greatest format40 ++ version number supported by reiser4progs and updated ++ in mount time in accordance with the greatest format40 ++ version number supported by kernel. ++ Is used by fsck to catch possible corruption and ++ for various compatibility issues */ ++ /* 84 */ char not_used[428]; ++} format40_disk_super_block; ++ ++/* format 40 specific part of reiser4_super_info_data */ ++typedef struct format40_super_info { ++/* format40_disk_super_block actual_sb; */ ++ jnode *sb_jnode; ++ struct { ++ reiser4_block_nr super; ++ } loc; ++} format40_super_info; ++ ++/* Defines for journal header and footer respectively. */ ++#define FORMAT40_JOURNAL_HEADER_BLOCKNR \ ++ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 3) ++ ++#define FORMAT40_JOURNAL_FOOTER_BLOCKNR \ ++ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 4) ++ ++#define FORMAT40_STATUS_BLOCKNR \ ++ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 5) ++ ++/* Diskmap declarations */ ++#define FORMAT40_PLUGIN_DISKMAP_ID ((REISER4_FORMAT_PLUGIN_TYPE<<16) | (FORMAT40_ID)) ++#define FORMAT40_SUPER 1 ++#define FORMAT40_JH 2 ++#define FORMAT40_JF 3 ++ ++/* declarations of functions implementing methods of layout plugin for ++ format 40. The functions theirself are in disk_format40.c */ ++extern int init_format_format40(struct super_block *, void *data); ++extern const reiser4_key *root_dir_key_format40(const struct super_block *); ++extern int release_format40(struct super_block *s); ++extern jnode *log_super_format40(struct super_block *s); ++extern int check_open_format40(const struct inode *object); ++extern int version_update_format40(struct super_block *super); ++ ++/* __DISK_FORMAT40_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/fibration.c b/fs/reiser4/plugin/fibration.c +new file mode 100644 +index 0000000..690dac4 +--- /dev/null ++++ b/fs/reiser4/plugin/fibration.c +@@ -0,0 +1,175 @@ ++/* Copyright 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Directory fibrations */ ++ ++/* ++ * Suppose we have a directory tree with sources of some project. During ++ * compilation .o files are created within this tree. This makes access ++ * to the original source files less efficient, because source files are ++ * now "diluted" by object files: default directory plugin uses prefix ++ * of a file name as a part of the key for directory entry (and this ++ * part is also inherited by the key of file body). This means that ++ * foo.o will be located close to foo.c and foo.h in the tree. ++ * ++ * To avoid this effect directory plugin fill highest 7 (unused ++ * originally) bits of the second component of the directory entry key ++ * by bit-pattern depending on the file name (see ++ * fs/reiser4/kassign.c:build_entry_key_common()). These bits are called ++ * "fibre". Fibre of the file name key is inherited by key of stat data ++ * and keys of file body (in the case of REISER4_LARGE_KEY). ++ * ++ * Fibre for a given file is chosen by per-directory fibration ++ * plugin. Names within given fibre are ordered lexicographically. ++ */ ++ ++#include "../debug.h" ++#include "plugin_header.h" ++#include "plugin.h" ++#include "../super.h" ++#include "../inode.h" ++ ++#include ++ ++static const int fibre_shift = 57; ++ ++#define FIBRE_NO(n) (((__u64)(n)) << fibre_shift) ++ ++/* ++ * Trivial fibration: all files of directory are just ordered ++ * lexicographically. ++ */ ++static __u64 fibre_trivial(const struct inode *dir, const char *name, int len) ++{ ++ return FIBRE_NO(0); ++} ++ ++/* ++ * dot-o fibration: place .o files after all others. ++ */ ++static __u64 fibre_dot_o(const struct inode *dir, const char *name, int len) ++{ ++ /* special treatment for .*\.o */ ++ if (len > 2 && name[len - 1] == 'o' && name[len - 2] == '.') ++ return FIBRE_NO(1); ++ else ++ return FIBRE_NO(0); ++} ++ ++/* ++ * ext.1 fibration: subdivide directory into 128 fibrations one for each ++ * 7bit extension character (file "foo.h" goes into fibre "h"), plus ++ * default fibre for the rest. ++ */ ++static __u64 fibre_ext_1(const struct inode *dir, const char *name, int len) ++{ ++ if (len > 2 && name[len - 2] == '.') ++ return FIBRE_NO(name[len - 1]); ++ else ++ return FIBRE_NO(0); ++} ++ ++/* ++ * ext.3 fibration: try to separate files with different 3-character ++ * extensions from each other. ++ */ ++static __u64 fibre_ext_3(const struct inode *dir, const char *name, int len) ++{ ++ if (len > 4 && name[len - 4] == '.') ++ return FIBRE_NO(name[len - 3] + name[len - 2] + name[len - 1]); ++ else ++ return FIBRE_NO(0); ++} ++ ++static int change_fibration(struct inode *inode, ++ reiser4_plugin * plugin, ++ pset_member memb) ++{ ++ int result; ++ ++ assert("nikita-3503", inode != NULL); ++ assert("nikita-3504", plugin != NULL); ++ ++ assert("nikita-3505", is_reiser4_inode(inode)); ++ assert("nikita-3506", inode_dir_plugin(inode) != NULL); ++ assert("nikita-3507", ++ plugin->h.type_id == REISER4_FIBRATION_PLUGIN_TYPE); ++ ++ result = 0; ++ if (inode_fibration_plugin(inode) == NULL || ++ inode_fibration_plugin(inode)->h.id != plugin->h.id) { ++ if (is_dir_empty(inode) == 0) ++ result = aset_set_unsafe(&reiser4_inode_data(inode)->pset, ++ PSET_FIBRATION, plugin); ++ else ++ result = RETERR(-ENOTEMPTY); ++ ++ } ++ return result; ++} ++ ++static reiser4_plugin_ops fibration_plugin_ops = { ++ .init = NULL, ++ .load = NULL, ++ .save_len = NULL, ++ .save = NULL, ++ .change = change_fibration ++}; ++ ++/* fibration plugins */ ++fibration_plugin fibration_plugins[LAST_FIBRATION_ID] = { ++ [FIBRATION_LEXICOGRAPHIC] = { ++ .h = { ++ .type_id = REISER4_FIBRATION_PLUGIN_TYPE, ++ .id = FIBRATION_LEXICOGRAPHIC, ++ .pops = &fibration_plugin_ops, ++ .label = "lexicographic", ++ .desc = "no fibration", ++ .linkage = {NULL, NULL} ++ }, ++ .fibre = fibre_trivial ++ }, ++ [FIBRATION_DOT_O] = { ++ .h = { ++ .type_id = REISER4_FIBRATION_PLUGIN_TYPE, ++ .id = FIBRATION_DOT_O, ++ .pops = &fibration_plugin_ops, ++ .label = "dot-o", ++ .desc = "fibrate .o files separately", ++ .linkage = {NULL, NULL} ++ }, ++ .fibre = fibre_dot_o ++ }, ++ [FIBRATION_EXT_1] = { ++ .h = { ++ .type_id = REISER4_FIBRATION_PLUGIN_TYPE, ++ .id = FIBRATION_EXT_1, ++ .pops = &fibration_plugin_ops, ++ .label = "ext-1", ++ .desc = "fibrate file by single character extension", ++ .linkage = {NULL, NULL} ++ }, ++ .fibre = fibre_ext_1 ++ }, ++ [FIBRATION_EXT_3] = { ++ .h = { ++ .type_id = REISER4_FIBRATION_PLUGIN_TYPE, ++ .id = FIBRATION_EXT_3, ++ .pops = &fibration_plugin_ops, ++ .label = "ext-3", ++ .desc = "fibrate file by three character extension", ++ .linkage = {NULL, NULL} ++ }, ++ .fibre = fibre_ext_3 ++ } ++}; ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/fibration.h b/fs/reiser4/plugin/fibration.h +new file mode 100644 +index 0000000..0723cad +--- /dev/null ++++ b/fs/reiser4/plugin/fibration.h +@@ -0,0 +1,37 @@ ++/* Copyright 2004 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Fibration plugin used by hashed directory plugin to segment content ++ * of directory. See fs/reiser4/plugin/fibration.c for more on this. */ ++ ++#if !defined( __FS_REISER4_PLUGIN_FIBRATION_H__ ) ++#define __FS_REISER4_PLUGIN_FIBRATION_H__ ++ ++#include "plugin_header.h" ++ ++typedef struct fibration_plugin { ++ /* generic fields */ ++ plugin_header h; ++ ++ __u64(*fibre) (const struct inode * dir, const char *name, int len); ++} fibration_plugin; ++ ++typedef enum { ++ FIBRATION_LEXICOGRAPHIC, ++ FIBRATION_DOT_O, ++ FIBRATION_EXT_1, ++ FIBRATION_EXT_3, ++ LAST_FIBRATION_ID ++} reiser4_fibration_id; ++ ++/* __FS_REISER4_PLUGIN_FIBRATION_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/file/Makefile b/fs/reiser4/plugin/file/Makefile +new file mode 100644 +index 0000000..134fa7a +--- /dev/null ++++ b/fs/reiser4/plugin/file/Makefile +@@ -0,0 +1,7 @@ ++obj-$(CONFIG_REISER4_FS) += file_plugins.o ++ ++file_plugins-objs := \ ++ file.o \ ++ tail_conversion.o \ ++ symlink.o \ ++ cryptcompress.o +diff --git a/fs/reiser4/plugin/file/cryptcompress.c b/fs/reiser4/plugin/file/cryptcompress.c +new file mode 100644 +index 0000000..2876e31 +--- /dev/null ++++ b/fs/reiser4/plugin/file/cryptcompress.c +@@ -0,0 +1,3760 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ reiser4/README */ ++ ++/* This file contains implementations of inode/file/address_space/file plugin ++ * operations specific for cryptcompress file plugin which manages files with ++ * compressed and encrypted bodies. "Cryptcompress file" is built of items of ++ * CTAIL_ID (see http://www.namesys.com/cryptcompress_design.html for details). ++ */ ++ ++#include "../../inode.h" ++#include "../cluster.h" ++#include "../object.h" ++#include "../../tree_walk.h" ++#include "cryptcompress.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* get cryptcompress specific portion of inode */ ++cryptcompress_info_t *cryptcompress_inode_data(const struct inode *inode) ++{ ++ return &reiser4_inode_data(inode)->file_plugin_data.cryptcompress_info; ++} ++ ++/* plugin->u.file.init_inode_data */ ++void init_inode_data_cryptcompress(struct inode *inode, ++ reiser4_object_create_data * crd, ++ int create) ++{ ++ cryptcompress_info_t *data; ++ ++ data = cryptcompress_inode_data(inode); ++ assert("edward-685", data != NULL); ++ ++ memset(data, 0, sizeof(*data)); ++ ++ turn_on_compression(data); ++ set_lattice_factor(data, MIN_LATTICE_FACTOR); ++ init_inode_ordering(inode, crd, create); ++} ++ ++#if REISER4_DEBUG ++int cryptcompress_inode_ok(struct inode *inode) ++{ ++ if (!(reiser4_inode_data(inode)->plugin_mask & (1 << PSET_FILE))) ++ return 0; ++ if (!cluster_shift_ok(inode_cluster_shift(inode))) ++ return 0; ++ return 1; ++} ++#endif ++ ++/* The following is a part of reiser4 cipher key manager ++ which is called when opening/creating a cryptcompress file */ ++ ++/* get/set cipher key info */ ++crypto_stat_t * inode_crypto_stat (struct inode * inode) ++{ ++ assert("edward-90", inode != NULL); ++ assert("edward-91", reiser4_inode_data(inode) != NULL); ++ return cryptcompress_inode_data(inode)->crypt; ++} ++ ++static void set_inode_crypto_stat (struct inode * inode, crypto_stat_t * stat) ++{ ++ cryptcompress_inode_data(inode)->crypt = stat; ++} ++ ++/* allocate a cipher key info */ ++crypto_stat_t * reiser4_alloc_crypto_stat (struct inode * inode) ++{ ++ crypto_stat_t * info; ++ int fipsize; ++ ++ info = kmalloc(sizeof(*info), reiser4_ctx_gfp_mask_get()); ++ if (!info) ++ return ERR_PTR(-ENOMEM); ++ memset(info, 0, sizeof (*info)); ++ fipsize = inode_digest_plugin(inode)->fipsize; ++ info->keyid = kmalloc(fipsize, reiser4_ctx_gfp_mask_get()); ++ if (!info->keyid) { ++ kfree(info); ++ return ERR_PTR(-ENOMEM); ++ } ++ info->host = inode; ++ return info; ++} ++ ++#if 0 ++/* allocate/free low-level info for cipher and digest ++ transforms */ ++static int alloc_crypto_tfms(crypto_stat_t * info) ++{ ++ struct crypto_blkcipher * ctfm = NULL; ++ struct crypto_hash * dtfm = NULL; ++ cipher_plugin * cplug = inode_cipher_plugin(info->host); ++ digest_plugin * dplug = inode_digest_plugin(info->host); ++ ++ if (cplug->alloc) { ++ ctfm = cplug->alloc(); ++ if (IS_ERR(ctfm)) { ++ warning("edward-1364", ++ "Can not allocate info for %s\n", ++ cplug->h.desc); ++ return RETERR(PTR_ERR(ctfm)); ++ } ++ } ++ info_set_cipher(info, ctfm); ++ if (dplug->alloc) { ++ dtfm = dplug->alloc(); ++ if (IS_ERR(dtfm)) { ++ warning("edward-1365", ++ "Can not allocate info for %s\n", ++ dplug->h.desc); ++ goto unhappy_with_digest; ++ } ++ } ++ info_set_digest(info, dtfm); ++ return 0; ++ unhappy_with_digest: ++ if (cplug->free) { ++ cplug->free(ctfm); ++ info_set_cipher(info, NULL); ++ } ++ return RETERR(PTR_ERR(dtfm)); ++} ++#endif ++ ++static void ++free_crypto_tfms(crypto_stat_t * info) ++{ ++ assert("edward-1366", info != NULL); ++ if (!info_get_cipher(info)) { ++ assert("edward-1601", !info_get_digest(info)); ++ return; ++ } ++ inode_cipher_plugin(info->host)->free(info_get_cipher(info)); ++ info_set_cipher(info, NULL); ++ inode_digest_plugin(info->host)->free(info_get_digest(info)); ++ info_set_digest(info, NULL); ++ return; ++} ++ ++#if 0 ++/* create a key fingerprint for disk stat-data */ ++static int create_keyid (crypto_stat_t * info, crypto_data_t * data) ++{ ++ int ret = -ENOMEM; ++ size_t blk, pad; ++ __u8 * dmem; ++ __u8 * cmem; ++ struct hash_desc ddesc; ++ struct blkcipher_desc cdesc; ++ struct scatterlist sg; ++ ++ assert("edward-1367", info != NULL); ++ assert("edward-1368", info->keyid != NULL); ++ ++ ddesc.tfm = info_get_digest(info); ++ ddesc.flags = 0; ++ cdesc.tfm = info_get_cipher(info); ++ cdesc.flags = 0; ++ ++ dmem = kmalloc((size_t)crypto_hash_digestsize(ddesc.tfm), ++ reiser4_ctx_gfp_mask_get()); ++ if (!dmem) ++ goto exit1; ++ ++ blk = crypto_blkcipher_blocksize(cdesc.tfm); ++ ++ pad = data->keyid_size % blk; ++ pad = (pad ? blk - pad : 0); ++ ++ cmem = kmalloc((size_t)data->keyid_size + pad, ++ reiser4_ctx_gfp_mask_get()); ++ if (!cmem) ++ goto exit2; ++ memcpy(cmem, data->keyid, data->keyid_size); ++ memset(cmem + data->keyid_size, 0, pad); ++ ++ sg.page = virt_to_page(cmem); ++ sg.offset = offset_in_page(cmem); ++ sg.length = data->keyid_size + pad; ++ ++ ret = crypto_blkcipher_encrypt(&cdesc, &sg, &sg, ++ data->keyid_size + pad); ++ if (ret) { ++ warning("edward-1369", ++ "encryption failed flags=%x\n", cdesc.flags); ++ goto exit3; ++ } ++ ret = crypto_hash_digest(&ddesc, &sg, sg.length, dmem); ++ if (ret) { ++ warning("edward-1602", ++ "digest failed flags=%x\n", ddesc.flags); ++ goto exit3; ++ } ++ memcpy(info->keyid, dmem, inode_digest_plugin(info->host)->fipsize); ++ exit3: ++ kfree(cmem); ++ exit2: ++ kfree(dmem); ++ exit1: ++ return ret; ++} ++#endif ++ ++static void destroy_keyid(crypto_stat_t * info) ++{ ++ assert("edward-1370", info != NULL); ++ assert("edward-1371", info->keyid != NULL); ++ kfree(info->keyid); ++ return; ++} ++ ++static void __free_crypto_stat (struct inode * inode) ++{ ++ crypto_stat_t * info = inode_crypto_stat(inode); ++ assert("edward-1372", info != NULL); ++ ++ free_crypto_tfms(info); ++ destroy_keyid(info); ++ kfree(info); ++} ++ ++#if 0 ++static void instantiate_crypto_stat(crypto_stat_t * info) ++{ ++ assert("edward-1373", info != NULL); ++ assert("edward-1374", info->inst == 0); ++ info->inst = 1; ++} ++#endif ++ ++static void uninstantiate_crypto_stat(crypto_stat_t * info) ++{ ++ assert("edward-1375", info != NULL); ++ info->inst = 0; ++} ++ ++static int crypto_stat_instantiated(crypto_stat_t * info) ++{ ++ return info->inst; ++} ++ ++static int inode_has_cipher_key(struct inode * inode) ++{ ++ assert("edward-1376", inode != NULL); ++ return inode_crypto_stat(inode) && ++ crypto_stat_instantiated(inode_crypto_stat(inode)); ++} ++ ++static void free_crypto_stat (struct inode * inode) ++{ ++ uninstantiate_crypto_stat(inode_crypto_stat(inode)); ++ __free_crypto_stat(inode); ++} ++ ++static int need_cipher(struct inode * inode) ++{ ++ return inode_cipher_plugin(inode) != ++ cipher_plugin_by_id(NONE_CIPHER_ID); ++} ++ ++/* Create a crypto-stat and attach result to the @object. ++ If success is returned, then low-level cipher info contains ++ an instantiated key */ ++#if 0 ++crypto_stat_t * ++create_crypto_stat(struct inode * object, ++ crypto_data_t * data /* this contains a (uninstantiated) ++ cipher key imported from user ++ space */) ++{ ++ int ret; ++ crypto_stat_t * info; ++ ++ assert("edward-1377", data != NULL); ++ assert("edward-1378", need_cipher(object)); ++ ++ if (inode_file_plugin(object) != ++ file_plugin_by_id(DIRECTORY_FILE_PLUGIN_ID)) ++ return ERR_PTR(-EINVAL); ++ ++ info = reiser4_alloc_crypto_stat(object); ++ if (IS_ERR(info)) ++ return info; ++ ret = alloc_crypto_tfms(info); ++ if (ret) ++ goto err; ++ /* instantiating a key */ ++ ret = crypto_blkcipher_setkey(info_get_cipher(info), ++ data->key, ++ data->keysize); ++ if (ret) { ++ warning("edward-1379", ++ "setkey failed flags=%x\n", ++ crypto_blkcipher_get_flags(info_get_cipher(info))); ++ goto err; ++ } ++ info->keysize = data->keysize; ++ ret = create_keyid(info, data); ++ if (ret) ++ goto err; ++ instantiate_crypto_stat(info); ++ return info; ++ err: ++ __free_crypto_stat(object); ++ return ERR_PTR(ret); ++} ++#endif ++ ++/* increment/decrement a load counter when ++ attaching/detaching the crypto-stat to any object */ ++static void load_crypto_stat(crypto_stat_t * info) ++{ ++ assert("edward-1380", info != NULL); ++ inc_keyload_count(info); ++} ++ ++static void unload_crypto_stat(struct inode * inode) ++{ ++ crypto_stat_t * info = inode_crypto_stat(inode); ++ assert("edward-1381", info->keyload_count > 0); ++ ++ dec_keyload_count(inode_crypto_stat(inode)); ++ if (info->keyload_count == 0) ++ /* final release */ ++ free_crypto_stat(inode); ++} ++ ++/* attach/detach an existing crypto-stat */ ++void reiser4_attach_crypto_stat(struct inode * inode, crypto_stat_t * info) ++{ ++ assert("edward-1382", inode != NULL); ++ assert("edward-1383", info != NULL); ++ assert("edward-1384", inode_crypto_stat(inode) == NULL); ++ ++ set_inode_crypto_stat(inode, info); ++ load_crypto_stat(info); ++} ++ ++/* returns true, if crypto stat can be attached to the @host */ ++#if REISER4_DEBUG ++static int host_allows_crypto_stat(struct inode * host) ++{ ++ int ret; ++ file_plugin * fplug = inode_file_plugin(host); ++ ++ switch (fplug->h.id) { ++ case CRYPTCOMPRESS_FILE_PLUGIN_ID: ++ ret = 1; ++ break; ++ default: ++ ret = 0; ++ } ++ return ret; ++} ++#endif /* REISER4_DEBUG */ ++ ++static void reiser4_detach_crypto_stat(struct inode * inode) ++{ ++ assert("edward-1385", inode != NULL); ++ assert("edward-1386", host_allows_crypto_stat(inode)); ++ ++ if (inode_crypto_stat(inode)) ++ unload_crypto_stat(inode); ++ set_inode_crypto_stat(inode, NULL); ++} ++ ++#if 0 ++ ++/* compare fingerprints of @child and @parent */ ++static int keyid_eq(crypto_stat_t * child, crypto_stat_t * parent) ++{ ++ return !memcmp(child->keyid, parent->keyid, info_digest_plugin(parent)->fipsize); ++} ++ ++/* check if a crypto-stat (which is bound to @parent) can be inherited */ ++int can_inherit_crypto_cryptcompress(struct inode *child, struct inode *parent) ++{ ++ if (!need_cipher(child)) ++ return 0; ++ /* the child is created */ ++ if (!inode_crypto_stat(child)) ++ return 1; ++ /* the child is looked up */ ++ if (!inode_crypto_stat(parent)) ++ return 0; ++ return (inode_cipher_plugin(child) == inode_cipher_plugin(parent) && ++ inode_digest_plugin(child) == inode_digest_plugin(parent) && ++ inode_crypto_stat(child)->keysize == inode_crypto_stat(parent)->keysize && ++ keyid_eq(inode_crypto_stat(child), inode_crypto_stat(parent))); ++} ++#endif ++ ++/* helper functions for ->create() method of the cryptcompress plugin */ ++static int inode_set_crypto(struct inode * object) ++{ ++ reiser4_inode * info; ++ if (!inode_crypto_stat(object)) { ++ if (need_cipher(object)) ++ return RETERR(-EINVAL); ++ /* the file is not to be encrypted */ ++ return 0; ++ } ++ info = reiser4_inode_data(object); ++ info->extmask |= (1 << CRYPTO_STAT); ++ return 0; ++} ++ ++static int inode_init_compression(struct inode * object) ++{ ++ int result = 0; ++ assert("edward-1461", object != NULL); ++ if (inode_compression_plugin(object)->init) ++ result = inode_compression_plugin(object)->init(); ++ return result; ++} ++ ++static int inode_check_cluster(struct inode * object) ++{ ++ assert("edward-696", object != NULL); ++ ++ if (inode_cluster_size(object) < PAGE_CACHE_SIZE) { ++ warning("edward-1320", "Can not support '%s' " ++ "logical clusters (less then page size)", ++ inode_cluster_plugin(object)->h.label); ++ return RETERR(-EINVAL); ++ } ++ return 0; ++} ++ ++/* ->destroy_inode() method of the cryptcompress plugin */ ++void destroy_inode_cryptcompress(struct inode * inode) ++{ ++ assert("edward-23", cryptcompress_inode_data(inode)->pgcount == 0); ++ reiser4_detach_crypto_stat(inode); ++ return; ++} ++ ++/* ->create() method of the cryptcompress plugin ++ ++. install plugins ++. attach crypto info if specified ++. attach compression info if specified ++. attach cluster info ++*/ ++int ++create_cryptcompress(struct inode *object, struct inode *parent, ++ reiser4_object_create_data * data) ++{ ++ int result; ++ reiser4_inode *info; ++ ++ assert("edward-23", object != NULL); ++ assert("edward-24", parent != NULL); ++ assert("edward-30", data != NULL); ++ assert("edward-26", reiser4_inode_get_flag(object, REISER4_NO_SD)); ++ assert("edward-27", data->id == CRYPTCOMPRESS_FILE_PLUGIN_ID); ++ ++ info = reiser4_inode_data(object); ++ ++ assert("edward-29", info != NULL); ++ ++ /* set file bit */ ++ info->plugin_mask |= (1 << PSET_FILE); ++ ++ /* set crypto */ ++ result = inode_set_crypto(object); ++ if (result) ++ goto error; ++ /* set compression */ ++ result = inode_init_compression(object); ++ if (result) ++ goto error; ++ /* set cluster */ ++ result = inode_check_cluster(object); ++ if (result) ++ goto error; ++ ++ /* save everything in disk stat-data */ ++ result = write_sd_by_inode_common(object); ++ if (!result) ++ return 0; ++ error: ++ reiser4_detach_crypto_stat(object); ++ return result; ++} ++ ++/* ->open() method of the cryptcompress plugin */ ++int open_object_cryptcompress(struct inode * inode, struct file * file) ++{ ++ int result; ++ struct inode * parent; ++ ++ assert("edward-1394", inode != NULL); ++ assert("edward-1395", file != NULL); ++ assert("edward-1396", file != NULL); ++ assert("edward-1397", file->f_dentry->d_inode == inode); ++ assert("edward-1398", file->f_dentry->d_parent != NULL); ++ assert("edward-1399", file->f_dentry->d_parent->d_inode != NULL); ++ assert("edward-698", ++ inode_file_plugin(inode) == ++ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); ++ result = inode_check_cluster(inode); ++ if (result) ++ return result; ++ result = inode_init_compression(inode); ++ if (result) ++ return result; ++ if (!need_cipher(inode)) ++ /* the file is not to be ciphered */ ++ return 0; ++ parent = file->f_dentry->d_parent->d_inode; ++ if (!inode_has_cipher_key(inode)) ++ return RETERR(-EINVAL); ++ return 0; ++} ++ ++/* returns a blocksize, the attribute of a cipher algorithm */ ++static unsigned int ++cipher_blocksize(struct inode * inode) ++{ ++ assert("edward-758", need_cipher(inode)); ++ assert("edward-1400", inode_crypto_stat(inode) != NULL); ++ return crypto_blkcipher_blocksize ++ (info_get_cipher(inode_crypto_stat(inode))); ++} ++ ++/* returns offset translated by scale factor of the crypto-algorithm */ ++static loff_t inode_scaled_offset (struct inode * inode, ++ const loff_t src_off /* input offset */) ++{ ++ assert("edward-97", inode != NULL); ++ ++ if (!need_cipher(inode) || ++ src_off == get_key_offset(reiser4_min_key()) || ++ src_off == get_key_offset(reiser4_max_key())) ++ return src_off; ++ ++ return inode_cipher_plugin(inode)->scale(inode, ++ cipher_blocksize(inode), ++ src_off); ++} ++ ++/* returns disk cluster size */ ++size_t inode_scaled_cluster_size(struct inode * inode) ++{ ++ assert("edward-110", inode != NULL); ++ ++ return inode_scaled_offset(inode, inode_cluster_size(inode)); ++} ++ ++static int new_cluster(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ return (clust_to_off(clust->index, inode) >= inode->i_size); ++} ++ ++/* set number of cluster pages */ ++static void set_cluster_nrpages(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ reiser4_slide_t *win; ++ ++ assert("edward-180", clust != NULL); ++ assert("edward-1040", inode != NULL); ++ ++ win = clust->win; ++ if (!win) { ++ /* NOTE-EDWARD: i_size should be protected */ ++ clust->nr_pages = ++ count_to_nrpages(fsize_to_count(clust, inode)); ++ return; ++ } ++ assert("edward-1176", clust->op != PCL_UNKNOWN); ++ assert("edward-1064", win->off + win->count + win->delta != 0); ++ ++ if (win->stat == HOLE_WINDOW && ++ win->off == 0 && win->count == inode_cluster_size(inode)) { ++ /* special case: we start write hole from fake cluster */ ++ clust->nr_pages = 0; ++ return; ++ } ++ clust->nr_pages = ++ count_to_nrpages(max_count(win->off + win->count + win->delta, ++ fsize_to_count(clust, inode))); ++ return; ++} ++ ++/* ->key_by_inode() method of the cryptcompress plugin */ ++/* see plugin/plugin.h for details */ ++int ++key_by_inode_cryptcompress(struct inode *inode, loff_t off, reiser4_key * key) ++{ ++ loff_t clust_off; ++ ++ assert("edward-64", inode != 0); ++ // assert("edward-112", ergo(off != get_key_offset(reiser4_max_key()), !off_to_cloff(off, inode))); ++ /* don't come here with other offsets */ ++ ++ clust_off = ++ (off == ++ get_key_offset(reiser4_max_key())? get_key_offset(reiser4_max_key()) : ++ off_to_clust_to_off(off, inode)); ++ ++ key_by_inode_and_offset_common(inode, 0, key); ++ set_key_offset(key, ++ (__u64) (!inode_crypto_stat(inode) ? clust_off : ++ inode_scaled_offset(inode, clust_off))); ++ return 0; ++} ++ ++/* plugin->flow_by_inode */ ++int ++flow_by_inode_cryptcompress(struct inode *inode /* file to build flow for */ , ++ const char __user *buf /* user level buffer */ , ++ int user /* 1 if @buf is of user space, 0 - if it is ++ kernel space */ , ++ loff_t size /* buffer size */ , ++ loff_t off /* offset to start io from */ , ++ rw_op op /* READ or WRITE */ , ++ flow_t * f /* resulting flow */ ) ++{ ++ assert("edward-436", f != NULL); ++ assert("edward-149", inode != NULL); ++ assert("edward-150", inode_file_plugin(inode) != NULL); ++ ++ f->length = size; ++ memcpy(&f->data, &buf, sizeof(buf)); ++ f->user = user; ++ f->op = op; ++ ++ if (op == WRITE_OP && user == 1) ++ return 0; ++ return key_by_inode_cryptcompress(inode, off, &f->key); ++} ++ ++static int ++cryptcompress_hint_validate(hint_t * hint, const reiser4_key * key, ++ znode_lock_mode lock_mode) ++{ ++ coord_t *coord; ++ ++ assert("edward-704", hint != NULL); ++ assert("edward-1089", !hint_is_valid(hint)); ++ assert("edward-706", hint->lh.owner == NULL); ++ ++ coord = &hint->ext_coord.coord; ++ ++ if (!hint || !hint_is_set(hint) || hint->mode != lock_mode) ++ /* hint either not set or set by different operation */ ++ return RETERR(-E_REPEAT); ++ ++ if (get_key_offset(key) != hint->offset) ++ /* hint is set for different key */ ++ return RETERR(-E_REPEAT); ++ ++ assert("edward-707", reiser4_schedulable()); ++ ++ return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord, ++ key, &hint->lh, lock_mode, ++ ZNODE_LOCK_LOPRI); ++} ++ ++/* reserve disk space when writing a logical cluster */ ++static int reserve4cluster(struct inode *inode, reiser4_cluster_t *clust) ++{ ++ int result = 0; ++ ++ assert("edward-965", reiser4_schedulable()); ++ assert("edward-439", inode != NULL); ++ assert("edward-440", clust != NULL); ++ assert("edward-441", clust->pages != NULL); ++ ++ if (clust->nr_pages == 0) { ++ assert("edward-1152", clust->win != NULL); ++ assert("edward-1153", clust->win->stat == HOLE_WINDOW); ++ /* don't reserve space for fake disk clusteer */ ++ return 0; ++ } ++ assert("edward-442", jprivate(clust->pages[0]) != NULL); ++ ++ result = reiser4_grab_space_force(estimate_insert_cluster(inode) + ++ estimate_update_cluster(inode), ++ BA_CAN_COMMIT); ++ if (result) ++ return result; ++ clust->reserved = 1; ++ grabbed2cluster_reserved(estimate_insert_cluster(inode) + ++ estimate_update_cluster(inode)); ++#if REISER4_DEBUG ++ clust->reserved_prepped = estimate_update_cluster(inode); ++ clust->reserved_unprepped = estimate_insert_cluster(inode); ++#endif ++ /* there can be space grabbed by txnmgr_force_commit_all */ ++ return 0; ++} ++ ++/* free reserved disk space if writing a logical cluster fails */ ++static void ++free_reserved4cluster(struct inode *inode, reiser4_cluster_t * clust, int count) ++{ ++ assert("edward-967", clust->reserved == 1); ++ ++ cluster_reserved2free(count); ++ clust->reserved = 0; ++} ++ ++/* The core search procedure of the cryptcompress plugin. ++ If returned value is not cbk_errored, then current znode is locked */ ++static int find_cluster_item(hint_t * hint, ++ const reiser4_key * key, /* key of the item we are ++ looking for */ ++ znode_lock_mode lock_mode /* which lock */ , ++ ra_info_t * ra_info, lookup_bias bias, __u32 flags) ++{ ++ int result; ++ reiser4_key ikey; ++ int went_right = 0; ++ coord_t *coord = &hint->ext_coord.coord; ++ coord_t orig = *coord; ++ ++ assert("edward-152", hint != NULL); ++ ++ if (!hint_is_valid(hint)) { ++ result = cryptcompress_hint_validate(hint, key, lock_mode); ++ if (result == -E_REPEAT) ++ goto traverse_tree; ++ else if (result) { ++ assert("edward-1216", 0); ++ return result; ++ } ++ hint_set_valid(hint); ++ } ++ assert("edward-709", znode_is_any_locked(coord->node)); ++ ++ /* In-place lookup is going here, it means we just need to ++ check if next item of the @coord match to the @keyhint) */ ++ ++ if (equal_to_rdk(coord->node, key)) { ++ result = goto_right_neighbor(coord, &hint->lh); ++ if (result == -E_NO_NEIGHBOR) { ++ assert("edward-1217", 0); ++ return RETERR(-EIO); ++ } ++ if (result) ++ return result; ++ assert("edward-1218", equal_to_ldk(coord->node, key)); ++ went_right = 1; ++ } else { ++ coord->item_pos++; ++ coord->unit_pos = 0; ++ coord->between = AT_UNIT; ++ } ++ result = zload(coord->node); ++ if (result) ++ return result; ++ assert("edward-1219", !node_is_empty(coord->node)); ++ ++ if (!coord_is_existing_item(coord)) { ++ zrelse(coord->node); ++ goto not_found; ++ } ++ item_key_by_coord(coord, &ikey); ++ zrelse(coord->node); ++ if (!keyeq(key, &ikey)) ++ goto not_found; ++ /* Ok, item is found, update node counts */ ++ if (went_right) ++ dclust_inc_extension_ncount(hint); ++ return CBK_COORD_FOUND; ++ ++ not_found: ++ assert("edward-1220", coord->item_pos > 0); ++ //coord->item_pos--; ++ /* roll back */ ++ *coord = orig; ++ ON_DEBUG(coord_update_v(coord)); ++ return CBK_COORD_NOTFOUND; ++ ++ traverse_tree: ++ assert("edward-713", hint->lh.owner == NULL); ++ assert("edward-714", reiser4_schedulable()); ++ ++ reiser4_unset_hint(hint); ++ dclust_init_extension(hint); ++ coord_init_zero(coord); ++ result = coord_by_key(current_tree, key, coord, &hint->lh, ++ lock_mode, bias, LEAF_LEVEL, LEAF_LEVEL, ++ CBK_UNIQUE | flags, ra_info); ++ if (cbk_errored(result)) ++ return result; ++ if(result == CBK_COORD_FOUND) ++ dclust_inc_extension_ncount(hint); ++ hint_set_valid(hint); ++ return result; ++} ++ ++/* This function is called by deflate[inflate] manager when ++ creating a transformed/plain stream to check if we should ++ create/cut some overhead. If this returns true, then @oh ++ contains the size of this overhead. ++ */ ++static int ++need_cut_or_align(struct inode * inode, reiser4_cluster_t * clust, ++ rw_op rw, int * oh) ++{ ++ tfm_cluster_t * tc = &clust->tc; ++ switch (rw) { ++ case WRITE_OP: /* estimate align */ ++ *oh = tc->len % cipher_blocksize(inode); ++ if (*oh != 0) ++ return 1; ++ break; ++ case READ_OP: /* estimate cut */ ++ *oh = *(tfm_output_data(clust) + tc->len - 1); ++ break; ++ default: ++ impossible("edward-1401", "bad option"); ++ } ++ return (tc->len != tc->lsize); ++} ++ ++/* create/cut an overhead of transformed/plain stream */ ++static void ++align_or_cut_overhead(struct inode * inode, reiser4_cluster_t * clust, rw_op rw) ++{ ++ int oh; ++ cipher_plugin * cplug = inode_cipher_plugin(inode); ++ ++ assert("edward-1402", need_cipher(inode)); ++ ++ if (!need_cut_or_align(inode, clust, rw, &oh)) ++ return; ++ switch (rw) { ++ case WRITE_OP: /* do align */ ++ clust->tc.len += ++ cplug->align_stream(tfm_input_data(clust) + ++ clust->tc.len, clust->tc.len, ++ cipher_blocksize(inode)); ++ *(tfm_input_data(clust) + clust->tc.len - 1) = ++ cipher_blocksize(inode) - oh; ++ break; ++ case READ_OP: /* do cut */ ++ assert("edward-1403", oh <= cipher_blocksize(inode)); ++ clust->tc.len -= oh; ++ break; ++ default: ++ impossible("edward-1404", "bad option"); ++ } ++ return; ++} ++ ++/* the following two functions are to evaluate results ++ of compression transform */ ++static unsigned ++max_cipher_overhead(struct inode * inode) ++{ ++ if (!need_cipher(inode) || !inode_cipher_plugin(inode)->align_stream) ++ return 0; ++ return cipher_blocksize(inode); ++} ++ ++static int deflate_overhead(struct inode *inode) ++{ ++ return (inode_compression_plugin(inode)-> ++ checksum ? DC_CHECKSUM_SIZE : 0); ++} ++ ++static unsigned deflate_overrun(struct inode * inode, int ilen) ++{ ++ return coa_overrun(inode_compression_plugin(inode), ilen); ++} ++ ++/* Estimating compressibility of a logical cluster by various ++ policies represented by compression mode plugin. ++ If this returns false, then compressor won't be called for ++ the cluster of index @index. ++*/ ++static int should_compress(tfm_cluster_t * tc, cloff_t index, ++ struct inode *inode) ++{ ++ compression_plugin *cplug = inode_compression_plugin(inode); ++ compression_mode_plugin *mplug = inode_compression_mode_plugin(inode); ++ ++ assert("edward-1321", tc->len != 0); ++ assert("edward-1322", cplug != NULL); ++ assert("edward-1323", mplug != NULL); ++ ++ return /* estimate by size */ ++ (cplug->min_size_deflate ? ++ tc->len >= cplug->min_size_deflate() : ++ 1) && ++ /* estimate by compression mode plugin */ ++ (mplug->should_deflate ? ++ mplug->should_deflate(inode, index) : ++ 1); ++} ++ ++/* Evaluating results of compression transform. ++ Returns true, if we need to accept this results */ ++static int ++save_compressed(int size_before, int size_after, struct inode * inode) ++{ ++ return (size_after + deflate_overhead(inode) + ++ max_cipher_overhead(inode) < size_before); ++} ++ ++/* Guess result of the evaluation above */ ++static int ++need_inflate(reiser4_cluster_t * clust, struct inode *inode, ++ int encrypted /* is cluster encrypted */ ) ++{ ++ tfm_cluster_t *tc = &clust->tc; ++ ++ assert("edward-142", tc != 0); ++ assert("edward-143", inode != NULL); ++ ++ return tc->len < ++ (encrypted ? ++ inode_scaled_offset(inode, tc->lsize) : ++ tc->lsize); ++} ++ ++/* If results of compression were accepted, then we add ++ a checksum to catch possible disk cluster corruption. ++ The following is a format of the data stored in disk clusters: ++ ++ data This is (transformed) logical cluster. ++ cipher_overhead This is created by ->align() method ++ of cipher plugin. May be absent. ++ checksum (4) This is created by ->checksum method ++ of compression plugin to check ++ integrity. May be absent. ++ ++ Crypto overhead format: ++ ++ data ++ control_byte (1) contains aligned overhead size: ++ 1 <= overhead <= cipher_blksize ++*/ ++/* Append a checksum at the end of a transformed stream */ ++static void dc_set_checksum(compression_plugin * cplug, tfm_cluster_t * tc) ++{ ++ __u32 checksum; ++ ++ assert("edward-1309", tc != NULL); ++ assert("edward-1310", tc->len > 0); ++ assert("edward-1311", cplug->checksum != NULL); ++ ++ checksum = cplug->checksum(tfm_stream_data(tc, OUTPUT_STREAM), tc->len); ++ put_unaligned(cpu_to_le32(checksum), ++ (d32 *)(tfm_stream_data(tc, OUTPUT_STREAM) + tc->len)); ++ tc->len += (int)DC_CHECKSUM_SIZE; ++} ++ ++/* Check a disk cluster checksum. ++ Returns 0 if checksum is correct, otherwise returns 1 */ ++static int dc_check_checksum(compression_plugin * cplug, tfm_cluster_t * tc) ++{ ++ assert("edward-1312", tc != NULL); ++ assert("edward-1313", tc->len > (int)DC_CHECKSUM_SIZE); ++ assert("edward-1314", cplug->checksum != NULL); ++ ++ if (cplug->checksum(tfm_stream_data(tc, INPUT_STREAM), ++ tc->len - (int)DC_CHECKSUM_SIZE) != ++ le32_to_cpu(get_unaligned((d32 *) ++ (tfm_stream_data(tc, INPUT_STREAM) ++ + tc->len - (int)DC_CHECKSUM_SIZE)))) { ++ warning("edward-156", ++ "Bad disk cluster checksum %d, (should be %d) Fsck?\n", ++ (int)le32_to_cpu ++ (get_unaligned((d32 *) ++ (tfm_stream_data(tc, INPUT_STREAM) + ++ tc->len - (int)DC_CHECKSUM_SIZE))), ++ (int)cplug->checksum ++ (tfm_stream_data(tc, INPUT_STREAM), ++ tc->len - (int)DC_CHECKSUM_SIZE)); ++ return 1; ++ } ++ tc->len -= (int)DC_CHECKSUM_SIZE; ++ return 0; ++} ++ ++/* get input/output stream for some transform action */ ++int grab_tfm_stream(struct inode * inode, tfm_cluster_t * tc, ++ tfm_stream_id id) ++{ ++ size_t size = inode_scaled_cluster_size(inode); ++ ++ assert("edward-901", tc != NULL); ++ assert("edward-1027", inode_compression_plugin(inode) != NULL); ++ ++ if (cluster_get_tfm_act(tc) == TFMA_WRITE) ++ size += deflate_overrun(inode, inode_cluster_size(inode)); ++ ++ if (!tfm_stream(tc, id) && id == INPUT_STREAM) ++ alternate_streams(tc); ++ if (!tfm_stream(tc, id)) ++ return alloc_tfm_stream(tc, size, id); ++ ++ assert("edward-902", tfm_stream_is_set(tc, id)); ++ ++ if (tfm_stream_size(tc, id) < size) ++ return realloc_tfm_stream(tc, size, id); ++ return 0; ++} ++ ++/* Common deflate manager */ ++int reiser4_deflate_cluster(reiser4_cluster_t * clust, struct inode * inode) ++{ ++ int result = 0; ++ int compressed = 0; ++ int encrypted = 0; ++ tfm_cluster_t * tc = &clust->tc; ++ compression_plugin * coplug; ++ ++ assert("edward-401", inode != NULL); ++ assert("edward-903", tfm_stream_is_set(tc, INPUT_STREAM)); ++ assert("edward-1348", cluster_get_tfm_act(tc) == TFMA_WRITE); ++ assert("edward-498", !tfm_cluster_is_uptodate(tc)); ++ ++ coplug = inode_compression_plugin(inode); ++ if (should_compress(tc, clust->index, inode)) { ++ /* try to compress, discard bad results */ ++ __u32 dst_len; ++ compression_mode_plugin * mplug = ++ inode_compression_mode_plugin(inode); ++ assert("edward-602", coplug != NULL); ++ assert("edward-1423", coplug->compress != NULL); ++ ++ result = grab_coa(tc, coplug); ++ if (result) { ++ warning("edward-1424", ++ "alloc_coa failed with ret=%d, skipped compression", ++ result); ++ goto cipher; ++ } ++ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); ++ if (result) { ++ warning("edward-1425", ++ "alloc stream failed with ret=%d, skipped compression", ++ result); ++ goto cipher; ++ } ++ dst_len = tfm_stream_size(tc, OUTPUT_STREAM); ++ coplug->compress(get_coa(tc, coplug->h.id, tc->act), ++ tfm_input_data(clust), tc->len, ++ tfm_output_data(clust), &dst_len); ++ /* make sure we didn't overwrite extra bytes */ ++ assert("edward-603", ++ dst_len <= tfm_stream_size(tc, OUTPUT_STREAM)); ++ ++ /* evaluate results of compression transform */ ++ if (save_compressed(tc->len, dst_len, inode)) { ++ /* good result, accept */ ++ tc->len = dst_len; ++ if (mplug->accept_hook != NULL) { ++ result = mplug->accept_hook(inode, clust->index); ++ if (result) ++ warning("edward-1426", ++ "accept_hook failed with ret=%d", ++ result); ++ } ++ compressed = 1; ++ } ++ else { ++ /* bad result, discard */ ++#if REISER4_DEBUG ++ if (cluster_is_complete(clust, inode)) ++ warning("edward-1338", ++ "incompressible cluster %lu (inode %llu)", ++ clust->index, ++ (unsigned long long)get_inode_oid(inode)); ++#endif ++ if (mplug->discard_hook != NULL && ++ cluster_is_complete(clust, inode)) { ++ result = mplug->discard_hook(inode, ++ clust->index); ++ if (result) ++ warning("edward-1427", ++ "discard_hook failed with ret=%d", ++ result); ++ } ++ } ++ } ++ cipher: ++ if (need_cipher(inode)) { ++ cipher_plugin * ciplug; ++ struct blkcipher_desc desc; ++ struct scatterlist src; ++ struct scatterlist dst; ++ ++ ciplug = inode_cipher_plugin(inode); ++ desc.tfm = info_get_cipher(inode_crypto_stat(inode)); ++ desc.flags = 0; ++ if (compressed) ++ alternate_streams(tc); ++ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); ++ if (result) ++ return result; ++ ++ align_or_cut_overhead(inode, clust, WRITE_OP); ++ src.page = virt_to_page(tfm_input_data(clust)); ++ src.offset = offset_in_page(tfm_input_data(clust)); ++ src.length = tc->len; ++ ++ dst.page = virt_to_page(tfm_output_data(clust)); ++ dst.offset = offset_in_page(tfm_output_data(clust)); ++ dst.length = tc->len; ++ ++ result = crypto_blkcipher_encrypt(&desc, &dst, &src, tc->len); ++ if (result) { ++ warning("edward-1405", ++ "encryption failed flags=%x\n", desc.flags); ++ return result; ++ } ++ encrypted = 1; ++ } ++ if (compressed && coplug->checksum != NULL) ++ dc_set_checksum(coplug, tc); ++ if (!compressed && !encrypted) ++ alternate_streams(tc); ++ return result; ++} ++ ++/* Common inflate manager. */ ++int reiser4_inflate_cluster(reiser4_cluster_t * clust, struct inode * inode) ++{ ++ int result = 0; ++ int transformed = 0; ++ tfm_cluster_t * tc = &clust->tc; ++ compression_plugin * coplug; ++ ++ assert("edward-905", inode != NULL); ++ assert("edward-1178", clust->dstat == PREP_DISK_CLUSTER); ++ assert("edward-906", tfm_stream_is_set(&clust->tc, INPUT_STREAM)); ++ assert("edward-1349", tc->act == TFMA_READ); ++ assert("edward-907", !tfm_cluster_is_uptodate(tc)); ++ ++ /* Handle a checksum (if any) */ ++ coplug = inode_compression_plugin(inode); ++ if (need_inflate(clust, inode, need_cipher(inode)) && ++ coplug->checksum != NULL) { ++ result = dc_check_checksum(coplug, tc); ++ if (unlikely(result)) { ++ warning("edward-1460", ++ "Inode %llu: disk cluster %lu looks corrupted", ++ (unsigned long long)get_inode_oid(inode), ++ clust->index); ++ return RETERR(-EIO); ++ } ++ } ++ if (need_cipher(inode)) { ++ cipher_plugin * ciplug; ++ struct blkcipher_desc desc; ++ struct scatterlist src; ++ struct scatterlist dst; ++ ++ ciplug = inode_cipher_plugin(inode); ++ desc.tfm = info_get_cipher(inode_crypto_stat(inode)); ++ desc.flags = 0; ++ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); ++ if (result) ++ return result; ++ assert("edward-909", tfm_cluster_is_set(tc)); ++ ++ src.page = virt_to_page(tfm_input_data(clust)); ++ src.offset = offset_in_page(tfm_input_data(clust)); ++ src.length = tc->len; ++ ++ dst.page = virt_to_page(tfm_output_data(clust)); ++ dst.offset = offset_in_page(tfm_output_data(clust)); ++ dst.length = tc->len; ++ ++ result = crypto_blkcipher_decrypt(&desc, &dst, &src, tc->len); ++ if (result) { ++ warning("edward-1600", "decrypt failed flags=%x\n", ++ desc.flags); ++ return result; ++ } ++ align_or_cut_overhead(inode, clust, READ_OP); ++ transformed = 1; ++ } ++ if (need_inflate(clust, inode, 0)) { ++ unsigned dst_len = inode_cluster_size(inode); ++ if(transformed) ++ alternate_streams(tc); ++ ++ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); ++ if (result) ++ return result; ++ assert("edward-1305", coplug->decompress != NULL); ++ assert("edward-910", tfm_cluster_is_set(tc)); ++ ++ coplug->decompress(get_coa(tc, coplug->h.id, tc->act), ++ tfm_input_data(clust), tc->len, ++ tfm_output_data(clust), &dst_len); ++ /* check length */ ++ tc->len = dst_len; ++ assert("edward-157", dst_len == tc->lsize); ++ transformed = 1; ++ } ++ if (!transformed) ++ alternate_streams(tc); ++ return result; ++} ++ ++/* This is implementation of readpage method of struct ++ address_space_operations for cryptcompress plugin. */ ++int readpage_cryptcompress(struct file *file, struct page *page) ++{ ++ reiser4_context *ctx; ++ reiser4_cluster_t clust; ++ item_plugin *iplug; ++ int result; ++ ++ assert("edward-88", PageLocked(page)); ++ assert("vs-976", !PageUptodate(page)); ++ assert("edward-89", page->mapping && page->mapping->host); ++ ++ ctx = reiser4_init_context(page->mapping->host->i_sb); ++ if (IS_ERR(ctx)) { ++ unlock_page(page); ++ return PTR_ERR(ctx); ++ } ++ assert("edward-113", ++ ergo(file != NULL, ++ page->mapping == file->f_dentry->d_inode->i_mapping)); ++ ++ if (PageUptodate(page)) { ++ warning("edward-1338", "page is already uptodate\n"); ++ unlock_page(page); ++ reiser4_exit_context(ctx); ++ return 0; ++ } ++ cluster_init_read(&clust, NULL); ++ clust.file = file; ++ iplug = item_plugin_by_id(CTAIL_ID); ++ if (!iplug->s.file.readpage) { ++ unlock_page(page); ++ put_cluster_handle(&clust); ++ reiser4_exit_context(ctx); ++ return -EINVAL; ++ } ++ result = iplug->s.file.readpage(&clust, page); ++ ++ assert("edward-1459", !PageLocked(page)); ++ assert("edward-64", ergo(result == 0, PageUptodate(page))); ++ put_cluster_handle(&clust); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* how much pages will be captured */ ++static int cluster_nrpages_to_capture(reiser4_cluster_t * clust) ++{ ++ switch (clust->op) { ++ case PCL_APPEND: ++ return clust->nr_pages; ++ case PCL_TRUNCATE: ++ assert("edward-1179", clust->win != NULL); ++ return count_to_nrpages(clust->win->off + clust->win->count); ++ default: ++ impossible("edward-1180", "bad page cluster option"); ++ return 0; ++ } ++} ++ ++static void set_cluster_pages_dirty(reiser4_cluster_t * clust) ++{ ++ int i; ++ struct page *pg; ++ int nrpages = cluster_nrpages_to_capture(clust); ++ ++ for (i = 0; i < nrpages; i++) { ++ ++ pg = clust->pages[i]; ++ assert("edward-968", pg != NULL); ++ lock_page(pg); ++ assert("edward-1065", PageUptodate(pg)); ++ reiser4_set_page_dirty_internal(pg); ++ unlock_page(pg); ++ mark_page_accessed(pg); ++ } ++} ++ ++static void clear_cluster_pages_dirty(reiser4_cluster_t * clust) ++{ ++ int i; ++ assert("edward-1275", clust != NULL); ++ ++ for (i = 0; i < clust->nr_pages; i++) { ++ assert("edward-1276", clust->pages[i] != NULL); ++ ++ lock_page(clust->pages[i]); ++ if (PageDirty(clust->pages[i])) { ++ assert("edward-1277", PageUptodate(clust->pages[i])); ++ cancel_dirty_page(clust->pages[i], PAGE_CACHE_SIZE); ++ } ++#if REISER4_DEBUG ++ else ++ /* Race between flush and write: ++ some pages became clean when write() (or another ++ process which modifies data) capture the cluster. */ ++ warning("edward-985", "Page of index %lu (inode %llu)" ++ " is not dirty\n", clust->pages[i]->index, ++ (unsigned long long)get_inode_oid(clust-> ++ pages[i]-> ++ mapping-> ++ host)); ++#endif ++ unlock_page(clust->pages[i]); ++ } ++} ++ ++/* update i_size by window */ ++static void inode_set_new_size(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ loff_t size; ++ reiser4_slide_t *win; ++ ++ assert("edward-1181", clust != NULL); ++ assert("edward-1182", inode != NULL); ++ ++ win = clust->win; ++ assert("edward-1183", win != NULL); ++ assert("edward-1183", win->count != 0); ++ ++ size = clust_to_off(clust->index, inode) + win->off; ++ ++ switch (clust->op) { ++ case PCL_APPEND: ++ if (size + win->count <= inode->i_size) ++ /* overwrite only */ ++ return; ++ size += win->count; ++ break; ++ case PCL_TRUNCATE: ++ break; ++ default: ++ impossible("edward-1184", "bad page cluster option"); ++ break; ++ } ++ inode_check_scale_nolock(inode, inode->i_size, size); ++ inode->i_size = size; ++ return; ++} ++ ++/* Check in page cluster modifications. ++ . Make jnode dirty, if it wasn't; ++ . Reserve space for a disk cluster update by flush algorithm, if needed; ++ . Clean up old references (if any). ++ . Put pages (grabbed in this thread) which will be truncated ++*/ ++static void ++make_cluster_jnode_dirty_locked(reiser4_cluster_t * clust, jnode * node, ++ loff_t * old_isize, struct inode *inode) ++{ ++ int i; ++ int old_nrpages; ++ int new_nrpages = cluster_nrpages_to_capture(clust); ++ ++ assert("edward-973", new_nrpages > 0); ++ assert("edward-221", node != NULL); ++ assert("edward-971", clust->reserved == 1); ++ assert_spin_locked(&(node->guard)); ++ assert("edward-972", node->page_count <= cluster_nrpages(inode)); ++ assert("edward-1263", ++ clust->reserved_prepped == estimate_update_cluster(inode)); ++ assert("edward-1264", clust->reserved_unprepped == 0); ++ ++ if (JF_ISSET(node, JNODE_DIRTY)) { ++ /* someone has modified this cluster, but ++ the modifications are not committed yet */ ++ old_nrpages = ++ count_to_nrpages(cnt_to_clcnt(*old_isize, ++ clust->index, inode)); ++ /* free space which is already reserved */ ++ free_reserved4cluster(inode, clust, ++ estimate_update_cluster(inode)); ++ /* put old references */ ++ for (i = 0; i < old_nrpages; i++) { ++ assert("edward-975", clust->pages[i]); ++ assert("edward-1185", PageUptodate(clust->pages[i])); ++ ++ page_cache_release(clust->pages[i]); ++#if REISER4_DEBUG ++ cryptcompress_inode_data(inode)->pgcount --; ++#endif ++ } ++ } else { ++ /* no captured pages */ ++ assert("edward-1043", node->page_count == 0); ++ jnode_make_dirty_locked(node); ++ clust->reserved = 0; ++ } ++ /* put pages that will be truncated (if any) */ ++ for (i = new_nrpages; i < clust->nr_pages; i++) { ++ assert("edward-1433", clust->pages[i]); ++ assert("edward-1434", PageUptodate(clust->pages[i])); ++ page_cache_release(clust->pages[i]); ++#if REISER4_DEBUG ++ cryptcompress_inode_data(inode)->pgcount --; ++#endif ++ } ++#if REISER4_DEBUG ++ clust->reserved_prepped -= estimate_update_cluster(inode); ++ node->page_count = new_nrpages; ++#endif ++ return; ++} ++ ++/* This function spawns a transaction and ++ is called by any thread as a final step in page cluster modification. ++*/ ++static int try_capture_cluster(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ int result = 0; ++ loff_t old_size; ++ jnode *node; ++ ++ assert("edward-1029", clust != NULL); ++ assert("edward-1030", clust->reserved == 1); ++ assert("edward-1031", clust->nr_pages != 0); ++ assert("edward-1032", clust->pages != NULL); ++ assert("edward-1033", clust->pages[0] != NULL); ++ ++ node = jprivate(clust->pages[0]); ++ assert("edward-1035", node != NULL); ++ assert("edward-1446", jnode_is_cluster_page(node)); ++ ++ spin_lock_jnode(node); ++ ++ old_size = inode->i_size; ++ if (clust->win) ++ inode_set_new_size(clust, inode); ++ ++ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0); ++ if (result) ++ goto exit; ++ make_cluster_jnode_dirty_locked(clust, node, &old_size, inode); ++ exit: ++ spin_unlock_jnode(node); ++ jput(node); ++ return result; ++} ++ ++/* Collect unlocked cluster pages for any modifications and attach a jnode. ++ We allocate only one jnode per cluster, this jnode is binded to the first ++ page of this cluster, so we have an extra-reference that will exist with ++ this jnode, other references will be cleaned up in flush time. ++*/ ++static int ++grab_cluster_pages_jnode(struct inode *inode, reiser4_cluster_t * clust) ++{ ++ int i; ++ int result = 0; ++ jnode *node = NULL; ++ ++ assert("edward-182", clust != NULL); ++ assert("edward-183", clust->pages != NULL); ++ assert("edward-184", clust->nr_pages <= cluster_nrpages(inode)); ++ ++ if (clust->nr_pages == 0) ++ return 0; ++ ++ for (i = 0; i < clust->nr_pages; i++) { ++ ++ assert("edward-1044", clust->pages[i] == NULL); ++ ++ clust->pages[i] = ++ find_or_create_page(inode->i_mapping, ++ clust_to_pg(clust->index, inode) + i, ++ reiser4_ctx_gfp_mask_get()); ++ if (!clust->pages[i]) { ++ result = RETERR(-ENOMEM); ++ break; ++ } ++ if (i == 0) { ++ node = jnode_of_page(clust->pages[i]); ++ if (IS_ERR(node)) { ++ result = PTR_ERR(node); ++ unlock_page(clust->pages[i]); ++ break; ++ } ++ JF_SET(node, JNODE_CLUSTER_PAGE); ++ unlock_page(clust->pages[i]); ++ assert("edward-919", node); ++ continue; ++ } ++ unlock_page(clust->pages[i]); ++ } ++ if (result) { ++ while (i) ++ page_cache_release(clust->pages[--i]); ++ if (node && !IS_ERR(node)) ++ jput(node); ++ return result; ++ } ++ assert("edward-920", jprivate(clust->pages[0])); ++#if REISER4_DEBUG ++ cryptcompress_inode_data(inode)->pgcount += clust->nr_pages; ++#endif ++ return 0; ++} ++ ++/* Collect unlocked cluster pages only for read (not to modify) */ ++int grab_cluster_pages(struct inode *inode, reiser4_cluster_t * clust) ++{ ++ int i; ++ int result = 0; ++ ++ assert("edward-1428", inode != NULL); ++ assert("edward-1429", inode->i_mapping != NULL); ++ assert("edward-787", clust != NULL); ++ assert("edward-788", clust->pages != NULL); ++ assert("edward-789", clust->nr_pages != 0); ++ assert("edward-790", clust->nr_pages <= cluster_nrpages(inode)); ++ ++ for (i = 0; i < clust->nr_pages; i++) { ++ clust->pages[i] = ++ find_or_create_page(inode->i_mapping, ++ clust_to_pg(clust->index, inode) + i, ++ reiser4_ctx_gfp_mask_get()); ++ if (!clust->pages[i]) { ++ result = RETERR(-ENOMEM); ++ break; ++ } ++ unlock_page(clust->pages[i]); ++ } ++ if (result) ++ while (i) ++ page_cache_release(clust->pages[--i]); ++ return result; ++} ++ ++/* @node might be attached by reiser4_writepage(), not by ++ cryptcompress plugin code, but emergency flush should ++ understand that pages of cryptcompress files are not ++ flushable. ++*/ ++#if 0 ++int jnode_of_cluster(const jnode * node, struct page * page) ++{ ++ assert("edward-1339", node != NULL); ++ assert("edward-1340", page != NULL); ++ assert("edward-1341", page->mapping != NULL); ++ assert("edward-1342", page->mapping->host != NULL); ++ assert("edward-1343", ++ ergo(jnode_is_unformatted(node), ++ get_inode_oid(page->mapping->host) == ++ node->key.j.objectid)); ++ if (inode_file_plugin(page->mapping->host) == ++ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)) { ++#if REISER4_DEBUG ++ if (!jnode_is_cluster_page(node)) ++ warning("edward-1345", ++ "inode %llu: cluster page of index %lu became private", ++ (unsigned long long)get_inode_oid(page->mapping->host), ++ page->index); ++#endif ++ return 1; ++ } ++ return 0; ++} ++#endif /* 0 */ ++ ++/* put cluster pages */ ++void reiser4_release_cluster_pages(reiser4_cluster_t * clust) ++{ ++ int i; ++ ++ assert("edward-447", clust != NULL); ++ for (i = 0; i < clust->nr_pages; i++) { ++ ++ assert("edward-449", clust->pages[i] != NULL); ++ ++ page_cache_release(clust->pages[i]); ++ } ++} ++ ++/* this is called when something is failed */ ++static void reiser4_release_cluster_pages_and_jnode(reiser4_cluster_t * clust) ++{ ++ jnode *node; ++ ++ assert("edward-445", clust != NULL); ++ assert("edward-922", clust->pages != NULL); ++ assert("edward-446", clust->pages[0] != NULL); ++ ++ node = jprivate(clust->pages[0]); ++ ++ assert("edward-447", node != NULL); ++ ++ reiser4_release_cluster_pages(clust); ++ jput(node); ++} ++ ++#if REISER4_DEBUG ++static int window_ok(reiser4_slide_t * win, struct inode *inode) ++{ ++ assert("edward-1115", win != NULL); ++ assert("edward-1116", ergo(win->delta, win->stat == HOLE_WINDOW)); ++ ++ return (win->off != inode_cluster_size(inode)) && ++ (win->off + win->count + win->delta <= inode_cluster_size(inode)); ++} ++ ++static int cluster_ok(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ assert("edward-279", clust != NULL); ++ ++ if (!clust->pages) ++ return 0; ++ return (clust->win ? window_ok(clust->win, inode) : 1); ++} ++#endif ++ ++/* guess next window stat */ ++static inline window_stat next_window_stat(reiser4_slide_t * win) ++{ ++ assert("edward-1130", win != NULL); ++ return ((win->stat == HOLE_WINDOW && win->delta == 0) ? ++ HOLE_WINDOW : DATA_WINDOW); ++} ++ ++/* guess next cluster index and window params */ ++static void ++update_cluster(struct inode *inode, reiser4_cluster_t * clust, loff_t file_off, ++ loff_t to_file) ++{ ++ reiser4_slide_t *win; ++ ++ assert("edward-185", clust != NULL); ++ assert("edward-438", clust->pages != NULL); ++ assert("edward-281", cluster_ok(clust, inode)); ++ ++ win = clust->win; ++ if (!win) ++ return; ++ ++ switch (win->stat) { ++ case DATA_WINDOW: ++ /* increment window position */ ++ clust->index++; ++ win->stat = DATA_WINDOW; ++ win->off = 0; ++ win->count = min_count(inode_cluster_size(inode), to_file); ++ break; ++ case HOLE_WINDOW: ++ switch (next_window_stat(win)) { ++ case HOLE_WINDOW: ++ /* set window to fit the offset we start write from */ ++ clust->index = off_to_clust(file_off, inode); ++ win->stat = HOLE_WINDOW; ++ win->off = 0; ++ win->count = off_to_cloff(file_off, inode); ++ win->delta = ++ min_count(inode_cluster_size(inode) - win->count, ++ to_file); ++ break; ++ case DATA_WINDOW: ++ /* do not move the window, just change its state, ++ off+count+delta=inv */ ++ win->stat = DATA_WINDOW; ++ win->off = win->off + win->count; ++ win->count = win->delta; ++ win->delta = 0; ++ break; ++ default: ++ impossible("edward-282", "wrong next window state"); ++ } ++ break; ++ default: ++ impossible("edward-283", "wrong current window state"); ++ } ++ assert("edward-1068", cluster_ok(clust, inode)); ++} ++ ++static int update_sd_cryptcompress(struct inode *inode) ++{ ++ int result = 0; ++ ++ assert("edward-978", reiser4_schedulable()); ++ ++ result = reiser4_grab_space_force( /* one for stat data update */ ++ estimate_update_common(inode), ++ BA_CAN_COMMIT); ++ if (result) ++ return result; ++ inode->i_ctime = inode->i_mtime = CURRENT_TIME; ++ result = reiser4_update_sd(inode); ++ ++ return result; ++} ++ ++/* NOTE-Edward: this is too similar to reiser4/txnmgr.c:uncapture_jnode() */ ++static void uncapture_cluster_jnode(jnode * node) ++{ ++ txn_atom *atom; ++ ++ assert_spin_locked(&(node->guard)); ++ ++ /*jnode_make_clean(node); */ ++ atom = jnode_get_atom(node); ++ if (atom == NULL) { ++ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY)); ++ spin_unlock_jnode(node); ++ return; ++ } ++ ++ reiser4_uncapture_block(node); ++ spin_unlock_atom(atom); ++ jput(node); ++} ++ ++static void forget_cluster_pages(struct page **pages, int nr) ++{ ++ int i; ++ for (i = 0; i < nr; i++) { ++ ++ assert("edward-1045", pages[i] != NULL); ++ page_cache_release(pages[i]); ++ } ++} ++ ++/* Check out last modifications we are about to commit, ++ and prepare input stream for transform operations. ++*/ ++int ++flush_cluster_pages(reiser4_cluster_t * clust, jnode * node, ++ struct inode *inode) ++{ ++ int result = 0; ++ int i; ++ int nr_pages = 0; ++ tfm_cluster_t *tc = &clust->tc; ++#if REISER4_DEBUG ++ int node_pgcount; ++#endif ++ assert("edward-980", node != NULL); ++ assert("edward-236", inode != NULL); ++ assert("edward-237", clust != NULL); ++ assert("edward-240", !clust->win); ++ assert("edward-241", reiser4_schedulable()); ++ assert("edward-718", cryptcompress_inode_ok(inode)); ++ ++ result = grab_tfm_stream(inode, tc, INPUT_STREAM); ++ if (result) { ++ warning("edward-1430", ++ "alloc stream failed with ret=%d", result); ++ return result; ++ } ++ spin_lock_jnode(node); ++#if REISER4_DEBUG ++ node_pgcount = node->page_count; ++#endif ++ if (!JF_ISSET(node, JNODE_DIRTY)) { ++ /* race with another flush */ ++#if REISER4_DEBUG ++ assert("edward-981", node_pgcount == 0); ++ warning("edward-982", "flush_cluster_pages: jnode is not dirty " ++ "clust %lu, inode %llu\n", ++ clust->index, (unsigned long long)get_inode_oid(inode)); ++#endif ++ spin_unlock_jnode(node); ++ return RETERR(-E_REPEAT); ++ } ++ /* Check out a size of logical cluster and ++ set a number of cluster pages to commit. */ ++ tc->len = tc->lsize = fsize_to_count(clust, inode); ++ clust->nr_pages = count_to_nrpages(tc->len); ++ ++#if REISER4_DEBUG ++ node->page_count = 0; ++#endif ++ cluster_reserved2grabbed(estimate_update_cluster(inode)); ++ uncapture_cluster_jnode(node); ++ ++ assert("edward-1224", reiser4_schedulable()); ++ /* Check out page cluster for commit */ ++ nr_pages = ++ find_get_pages(inode->i_mapping, clust_to_pg(clust->index, inode), ++ clust->nr_pages, clust->pages); ++ if (nr_pages != clust->nr_pages) ++ goto checkout_failed; ++ ++ /* Try to construct input stream from the checked out pages */ ++ for (i = 0; i < clust->nr_pages; i++) { ++ char *data; ++ ++ assert("edward-242", clust->pages[i] != NULL); ++ if (clust->pages[i]->index != ++ clust_to_pg(clust->index, inode) + i) ++ goto checkout_failed; ++ BUG_ON(!PageUptodate(clust->pages[i])); ++ ++ /* flush the page into input transform stream */ ++ lock_page(clust->pages[i]); ++ data = kmap(clust->pages[i]); ++ ++ assert("edward-986", cnt_to_pgcnt(tc->len, i) != 0); ++ ++ memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i), ++ data, cnt_to_pgcnt(tc->len, i)); ++ kunmap(clust->pages[i]); ++ unlock_page(clust->pages[i]); ++ } ++ /* page cluster flushed successfully */ ++ ++ clear_cluster_pages_dirty(clust); ++ reiser4_release_cluster_pages(clust); ++#if REISER4_DEBUG ++ cryptcompress_inode_data(inode)->pgcount -= clust->nr_pages; ++#endif ++ goto out; ++ checkout_failed: ++#if REISER4_DEBUG ++ assert("edward-1282", node_pgcount == 0); ++ warning("edward-1435", "Inode %llu : checkout page cluster" ++ "of index %lu failed\n", ++ (unsigned long long)get_inode_oid(inode), clust->index); ++#endif /* REISER4_DEBUG */ ++ result = RETERR(-E_REPEAT); ++ out: ++ /* put pages that were found here */ ++ forget_cluster_pages(clust->pages, nr_pages); ++ return result; ++} ++ ++/* set hint for the cluster of the index @index */ ++static void set_hint_cluster(struct inode *inode, hint_t * hint, ++ cloff_t index, znode_lock_mode mode) ++{ ++ reiser4_key key; ++ assert("edward-722", cryptcompress_inode_ok(inode)); ++ assert("edward-723", ++ inode_file_plugin(inode) == ++ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); ++ ++ inode_file_plugin(inode)->key_by_inode(inode, ++ clust_to_off(index, inode), ++ &key); ++ ++ reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, &key); ++ hint->offset = get_key_offset(&key); ++ hint->mode = mode; ++} ++ ++void invalidate_hint_cluster(reiser4_cluster_t * clust) ++{ ++ assert("edward-1291", clust != NULL); ++ assert("edward-1292", clust->hint != NULL); ++ ++ done_lh(&clust->hint->lh); ++ hint_clr_valid(clust->hint); ++} ++ ++void put_hint_cluster(reiser4_cluster_t * clust, struct inode *inode, ++ znode_lock_mode mode) ++{ ++ assert("edward-1286", clust != NULL); ++ assert("edward-1287", clust->hint != NULL); ++ ++ set_hint_cluster(inode, clust->hint, clust->index + 1, mode); ++ invalidate_hint_cluster(clust); ++} ++ ++static int ++balance_dirty_page_cluster(reiser4_cluster_t * clust, struct inode *inode, ++ loff_t off, loff_t to_file) ++{ ++ int result; ++ ++ assert("edward-724", inode != NULL); ++ assert("edward-725", cryptcompress_inode_ok(inode)); ++ ++ /* set next window params */ ++ update_cluster(inode, clust, off, to_file); ++ ++ result = update_sd_cryptcompress(inode); ++ if (result) ++ return result; ++ assert("edward-726", clust->hint->lh.owner == NULL); ++ ++ reiser4_throttle_write(inode); ++ return 0; ++} ++ ++/* set zeroes to the cluster, update it, and maybe, try to capture its pages */ ++static int ++write_hole(struct inode *inode, reiser4_cluster_t * clust, loff_t file_off, ++ loff_t to_file) ++{ ++ char *data; ++ int result = 0; ++ unsigned cl_off, cl_count = 0; ++ unsigned to_pg, pg_off; ++ reiser4_slide_t *win; ++ ++ assert("edward-190", clust != NULL); ++ assert("edward-1069", clust->win != NULL); ++ assert("edward-191", inode != NULL); ++ assert("edward-727", cryptcompress_inode_ok(inode)); ++ assert("edward-1171", clust->dstat != INVAL_DISK_CLUSTER); ++ assert("edward-1154", ++ ergo(clust->dstat != FAKE_DISK_CLUSTER, clust->reserved == 1)); ++ ++ win = clust->win; ++ ++ assert("edward-1070", win != NULL); ++ assert("edward-201", win->stat == HOLE_WINDOW); ++ assert("edward-192", cluster_ok(clust, inode)); ++ ++ if (win->off == 0 && win->count == inode_cluster_size(inode)) { ++ /* the hole will be represented by fake disk cluster */ ++ update_cluster(inode, clust, file_off, to_file); ++ return 0; ++ } ++ cl_count = win->count; /* number of zeroes to write */ ++ cl_off = win->off; ++ pg_off = off_to_pgoff(win->off); ++ ++ while (cl_count) { ++ struct page *page; ++ page = clust->pages[off_to_pg(cl_off)]; ++ ++ assert("edward-284", page != NULL); ++ ++ to_pg = min_count(PAGE_CACHE_SIZE - pg_off, cl_count); ++ lock_page(page); ++ data = kmap_atomic(page, KM_USER0); ++ memset(data + pg_off, 0, to_pg); ++ flush_dcache_page(page); ++ kunmap_atomic(data, KM_USER0); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ cl_off += to_pg; ++ cl_count -= to_pg; ++ pg_off = 0; ++ } ++ if (!win->delta) { ++ /* only zeroes, try to capture */ ++ ++ set_cluster_pages_dirty(clust); ++ result = try_capture_cluster(clust, inode); ++ if (result) ++ return result; ++ put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK); ++ result = ++ balance_dirty_page_cluster(clust, inode, file_off, to_file); ++ } else ++ update_cluster(inode, clust, file_off, to_file); ++ return result; ++} ++ ++/* ++ The main disk search procedure for cryptcompress plugins, which ++ . scans all items of disk cluster with the lock mode @mode ++ . maybe reads each one (if @read) ++ . maybe makes its znode dirty (if write lock mode was specified) ++ ++ NOTE-EDWARD: Callers should handle the case when disk cluster ++ is incomplete (-EIO) ++*/ ++int find_disk_cluster(reiser4_cluster_t * clust, ++ struct inode *inode, int read, znode_lock_mode mode) ++{ ++ flow_t f; ++ hint_t *hint; ++ int result = 0; ++ unsigned long cl_idx; ++ ra_info_t ra_info; ++ file_plugin *fplug; ++ item_plugin *iplug; ++ tfm_cluster_t *tc; ++ int was_grabbed; ++ ++ assert("edward-138", clust != NULL); ++ assert("edward-728", clust->hint != NULL); ++ assert("edward-226", reiser4_schedulable()); ++ assert("edward-137", inode != NULL); ++ assert("edward-729", cryptcompress_inode_ok(inode)); ++ ++ hint = clust->hint; ++ cl_idx = clust->index; ++ fplug = inode_file_plugin(inode); ++ was_grabbed = get_current_context()->grabbed_blocks; ++ tc = &clust->tc; ++ ++ assert("edward-462", !tfm_cluster_is_uptodate(tc)); ++ assert("edward-461", ergo(read, tfm_stream_is_set(tc, INPUT_STREAM))); ++ ++ dclust_init_extension(hint); ++ ++ /* set key of the first disk cluster item */ ++ fplug->flow_by_inode(inode, ++ (read ? (char __user *)tfm_stream_data(tc, INPUT_STREAM) : NULL), ++ 0 /* kernel space */ , ++ inode_scaled_cluster_size(inode), ++ clust_to_off(cl_idx, inode), READ_OP, &f); ++ if (mode == ZNODE_WRITE_LOCK) { ++ /* reserve for flush to make dirty all the leaf nodes ++ which contain disk cluster */ ++ result = ++ reiser4_grab_space_force(estimate_dirty_cluster(inode), ++ BA_CAN_COMMIT); ++ if (result) ++ goto out; ++ } ++ ++ ra_info.key_to_stop = f.key; ++ set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key())); ++ ++ while (f.length) { ++ result = find_cluster_item(hint, &f.key, mode, ++ NULL, FIND_EXACT, ++ (mode == ZNODE_WRITE_LOCK ? ++ CBK_FOR_INSERT : 0)); ++ switch (result) { ++ case CBK_COORD_NOTFOUND: ++ result = 0; ++ if (inode_scaled_offset ++ (inode, ++ clust_to_off(cl_idx, ++ inode)) == get_key_offset(&f.key)) { ++ /* first item not found, this is treated ++ as disk cluster is absent */ ++ clust->dstat = FAKE_DISK_CLUSTER; ++ goto out; ++ } ++ /* we are outside the cluster, stop search here */ ++ assert("edward-146", ++ f.length != inode_scaled_cluster_size(inode)); ++ goto ok; ++ case CBK_COORD_FOUND: ++ assert("edward-148", ++ hint->ext_coord.coord.between == AT_UNIT); ++ assert("edward-460", ++ hint->ext_coord.coord.unit_pos == 0); ++ ++ coord_clear_iplug(&hint->ext_coord.coord); ++ result = zload_ra(hint->ext_coord.coord.node, &ra_info); ++ if (unlikely(result)) ++ goto out; ++ iplug = item_plugin_by_coord(&hint->ext_coord.coord); ++ assert("edward-147", ++ item_id_by_coord(&hint->ext_coord.coord) == ++ CTAIL_ID); ++ ++ result = iplug->s.file.read(NULL, &f, hint); ++ if (result) { ++ zrelse(hint->ext_coord.coord.node); ++ goto out; ++ } ++ if (mode == ZNODE_WRITE_LOCK) { ++ /* Don't make dirty more nodes then it was ++ estimated (see comments before ++ estimate_dirty_cluster). Missed nodes will be ++ read up in flush time if they are evicted from ++ memory */ ++ if (dclust_get_extension_ncount(hint) <= ++ estimate_dirty_cluster(inode)) ++ znode_make_dirty(hint->ext_coord.coord.node); ++ ++ znode_set_convertible(hint->ext_coord.coord. ++ node); ++ } ++ zrelse(hint->ext_coord.coord.node); ++ break; ++ default: ++ goto out; ++ } ++ } ++ ok: ++ /* at least one item was found */ ++ /* NOTE-EDWARD: Callers should handle the case ++ when disk cluster is incomplete (-EIO) */ ++ tc->len = inode_scaled_cluster_size(inode) - f.length; ++ tc->lsize = fsize_to_count(clust, inode); ++ assert("edward-1196", tc->len > 0); ++ assert("edward-1406", tc->lsize > 0); ++ ++ if (hint_is_unprepped_dclust(clust->hint)) ++ clust->dstat = UNPR_DISK_CLUSTER; ++ else { ++ dclust_set_extension_dsize(clust->hint, tc->len); ++ clust->dstat = PREP_DISK_CLUSTER; ++ } ++ out: ++ assert("edward-1339", ++ get_current_context()->grabbed_blocks >= was_grabbed); ++ grabbed2free(get_current_context(), ++ get_current_super_private(), ++ get_current_context()->grabbed_blocks - was_grabbed); ++ return result; ++} ++ ++int ++get_disk_cluster_locked(reiser4_cluster_t * clust, struct inode *inode, ++ znode_lock_mode lock_mode) ++{ ++ reiser4_key key; ++ ra_info_t ra_info; ++ ++ assert("edward-730", reiser4_schedulable()); ++ assert("edward-731", clust != NULL); ++ assert("edward-732", inode != NULL); ++ ++ if (hint_is_valid(clust->hint)) { ++ assert("edward-1293", clust->dstat != INVAL_DISK_CLUSTER); ++ assert("edward-1294", ++ znode_is_write_locked(clust->hint->lh.node)); ++ /* already have a valid locked position */ ++ return (clust->dstat == ++ FAKE_DISK_CLUSTER ? CBK_COORD_NOTFOUND : ++ CBK_COORD_FOUND); ++ } ++ key_by_inode_cryptcompress(inode, clust_to_off(clust->index, inode), ++ &key); ++ ra_info.key_to_stop = key; ++ set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key())); ++ ++ return find_cluster_item(clust->hint, &key, lock_mode, NULL, FIND_EXACT, ++ CBK_FOR_INSERT); ++} ++ ++/* Read needed cluster pages before modifying. ++ If success, @clust->hint contains locked position in the tree. ++ Also: ++ . find and set disk cluster state ++ . make disk cluster dirty if its state is not FAKE_DISK_CLUSTER. ++*/ ++static int ++read_some_cluster_pages(struct inode *inode, reiser4_cluster_t * clust) ++{ ++ int i; ++ int result = 0; ++ item_plugin *iplug; ++ reiser4_slide_t *win = clust->win; ++ znode_lock_mode mode = ZNODE_WRITE_LOCK; ++ ++ iplug = item_plugin_by_id(CTAIL_ID); ++ ++ assert("edward-924", !tfm_cluster_is_uptodate(&clust->tc)); ++ ++#if REISER4_DEBUG ++ if (clust->nr_pages == 0) { ++ /* start write hole from fake disk cluster */ ++ assert("edward-1117", win != NULL); ++ assert("edward-1118", win->stat == HOLE_WINDOW); ++ assert("edward-1119", new_cluster(clust, inode)); ++ } ++#endif ++ if (new_cluster(clust, inode)) { ++ /* ++ new page cluster is about to be written, nothing to read, ++ */ ++ assert("edward-734", reiser4_schedulable()); ++ assert("edward-735", clust->hint->lh.owner == NULL); ++ ++ if (clust->nr_pages) { ++ int off; ++ char *data; ++ struct page * pg; ++ assert("edward-1419", clust->pages != NULL); ++ pg = clust->pages[clust->nr_pages - 1]; ++ assert("edward-1420", pg != NULL); ++ off = off_to_pgoff(win->off+win->count+win->delta); ++ if (off) { ++ lock_page(pg); ++ data = kmap_atomic(pg, KM_USER0); ++ memset(data + off, 0, PAGE_CACHE_SIZE - off); ++ flush_dcache_page(pg); ++ kunmap_atomic(data, KM_USER0); ++ unlock_page(pg); ++ } ++ } ++ clust->dstat = FAKE_DISK_CLUSTER; ++ return 0; ++ } ++ /* ++ Here we should search for disk cluster to figure out its real state. ++ Also there is one more important reason to do disk search: we need ++ to make disk cluster _dirty_ if it exists ++ */ ++ ++ /* if windows is specified, read the only pages ++ that will be modified partially */ ++ ++ for (i = 0; i < clust->nr_pages; i++) { ++ struct page *pg = clust->pages[i]; ++ ++ lock_page(pg); ++ if (PageUptodate(pg)) { ++ unlock_page(pg); ++ continue; ++ } ++ unlock_page(pg); ++ ++ if (win && ++ i >= count_to_nrpages(win->off) && ++ i < off_to_pg(win->off + win->count + win->delta)) ++ /* page will be completely overwritten */ ++ continue; ++ ++ if (win && (i == clust->nr_pages - 1) && ++ /* the last page is ++ partially modified, ++ not uptodate .. */ ++ (count_to_nrpages(inode->i_size) <= pg->index)) { ++ /* .. and appended, ++ so set zeroes to the rest */ ++ char *data; ++ int offset; ++ lock_page(pg); ++ data = kmap_atomic(pg, KM_USER0); ++ ++ assert("edward-1260", ++ count_to_nrpages(win->off + win->count + ++ win->delta) - 1 == i); ++ ++ offset = ++ off_to_pgoff(win->off + win->count + win->delta); ++ memset(data + offset, 0, PAGE_CACHE_SIZE - offset); ++ flush_dcache_page(pg); ++ kunmap_atomic(data, KM_USER0); ++ unlock_page(pg); ++ /* still not uptodate */ ++ break; ++ } ++ if (!tfm_cluster_is_uptodate(&clust->tc)) { ++ result = ctail_read_disk_cluster(clust, inode, mode); ++ if (result) ++ goto out; ++ assert("edward-925", ++ tfm_cluster_is_uptodate(&clust->tc)); ++ } ++ lock_page(pg); ++ result = do_readpage_ctail(inode, clust, pg, mode); ++ unlock_page(pg); ++ if (result) { ++ impossible("edward-219", ++ "do_readpage_ctail returned crap"); ++ goto out; ++ } ++ } ++ if (!tfm_cluster_is_uptodate(&clust->tc)) { ++ /* disk cluster unclaimed, but we need to make its znodes dirty ++ to make flush update convert its content */ ++ result = find_disk_cluster(clust, inode, 0 /* do not read items */, ++ mode); ++ } ++ out: ++ tfm_cluster_clr_uptodate(&clust->tc); ++ return result; ++} ++ ++static int ++should_create_unprepped_cluster(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ assert("edward-737", clust != NULL); ++ ++ switch (clust->dstat) { ++ case PREP_DISK_CLUSTER: ++ case UNPR_DISK_CLUSTER: ++ return 0; ++ case FAKE_DISK_CLUSTER: ++ if (clust->win && ++ clust->win->stat == HOLE_WINDOW && clust->nr_pages == 0) { ++ assert("edward-1172", new_cluster(clust, inode)); ++ return 0; ++ } ++ return 1; ++ default: ++ impossible("edward-1173", "bad disk cluster state"); ++ return 0; ++ } ++} ++ ++static int ++cryptcompress_make_unprepped_cluster(reiser4_cluster_t * clust, ++ struct inode *inode) ++{ ++ int result; ++ ++ assert("edward-1123", reiser4_schedulable()); ++ assert("edward-737", clust != NULL); ++ assert("edward-738", inode != NULL); ++ assert("edward-739", cryptcompress_inode_ok(inode)); ++ assert("edward-1053", clust->hint != NULL); ++ ++ if (!should_create_unprepped_cluster(clust, inode)) { ++ if (clust->reserved) { ++ cluster_reserved2free(estimate_insert_cluster(inode)); ++#if REISER4_DEBUG ++ assert("edward-1267", ++ clust->reserved_unprepped == ++ estimate_insert_cluster(inode)); ++ clust->reserved_unprepped -= ++ estimate_insert_cluster(inode); ++#endif ++ } ++ return 0; ++ } ++ assert("edward-1268", clust->reserved); ++ cluster_reserved2grabbed(estimate_insert_cluster(inode)); ++#if REISER4_DEBUG ++ assert("edward-1441", ++ clust->reserved_unprepped == estimate_insert_cluster(inode)); ++ clust->reserved_unprepped -= estimate_insert_cluster(inode); ++#endif ++ result = ctail_insert_unprepped_cluster(clust, inode); ++ if (result) ++ return result; ++ ++ inode_add_bytes(inode, inode_cluster_size(inode)); ++ ++ assert("edward-743", cryptcompress_inode_ok(inode)); ++ assert("edward-744", znode_is_write_locked(clust->hint->lh.node)); ++ ++ clust->dstat = UNPR_DISK_CLUSTER; ++ return 0; ++} ++ ++#if REISER4_DEBUG ++static int jnode_truncate_ok(struct inode *inode, cloff_t index) ++{ ++ jnode *node; ++ node = ++ jlookup(current_tree, get_inode_oid(inode), ++ clust_to_pg(index, inode)); ++ if (likely(!node)) ++ return 1; ++ /* someone got this jnode */ ++ warning("edward-1315", "jnode %p is untruncated\n", node); ++ jput(node); ++ return (atomic_read(&node->x_count)); ++} ++#endif ++ ++/* Collect unlocked cluster pages and jnode (the last is in the ++ case when the page cluster will be modified and captured) */ ++int ++prepare_page_cluster(struct inode *inode, reiser4_cluster_t * clust, ++ int capture) ++{ ++ assert("edward-177", inode != NULL); ++ assert("edward-741", cryptcompress_inode_ok(inode)); ++ assert("edward-740", clust->pages != NULL); ++ ++ set_cluster_nrpages(clust, inode); ++ reset_cluster_pgset(clust, cluster_nrpages(inode)); ++ return (capture ? ++ grab_cluster_pages_jnode(inode, clust) : ++ grab_cluster_pages(inode, clust)); ++} ++ ++/* Truncate all pages of the cluster of index @index. ++ This is called by ->kill_hook() method of item plugin */ ++void truncate_page_cluster_cryptcompress(struct inode *inode, cloff_t index, ++ int even_cows) ++{ ++ int i; ++ int found = 0; ++ int nr_pages; ++ jnode *node; ++ struct page *pages[MAX_CLUSTER_NRPAGES]; ++ ++ node = ++ jlookup(current_tree, get_inode_oid(inode), ++ clust_to_pg(index, inode)); ++ /* jnode is absent, just drop pages which can not ++ acquire jnode because of exclusive access */ ++ if (!node) ++ goto truncate; ++ /* jnode is present and may be dirty */ ++ nr_pages = count_to_nrpages(cnt_to_clcnt(inode->i_size, index, inode)); ++ ++ found = find_get_pages(inode->i_mapping, clust_to_pg(index, inode), ++ nr_pages, pages); ++ spin_lock_jnode(node); ++ ++ if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS) ++ && index == 0) ++ /* converting to unix_file in progress */ ++ JF_CLR(node, JNODE_CLUSTER_PAGE); ++ if (JF_ISSET(node, JNODE_DIRTY)) { ++ /* someone has done modifications which are not ++ yet committed, so we need to release some resources */ ++ ++ /* free disk space grabbed for disk cluster converting */ ++ cluster_reserved2grabbed(estimate_update_cluster(inode)); ++ grabbed2free(get_current_context(), ++ get_current_super_private(), ++ estimate_update_cluster(inode)); ++ ++ assert("edward-1198", found == nr_pages); ++ assert("edward-1199", node->page_count == nr_pages); ++#if REISER4_DEBUG ++ node->page_count = 0; ++#endif ++ /* This will clear dirty bit */ ++ uncapture_cluster_jnode(node); ++ ++ /* put pages grabbed for last uncommitted modifications */ ++ for (i = 0; i < nr_pages; i++) { ++ assert("edward-1200", PageUptodate(pages[i])); ++ page_cache_release(pages[i]); ++#if REISER4_DEBUG ++ cryptcompress_inode_data(inode)->pgcount --; ++#endif ++ } ++ } else ++ spin_unlock_jnode(node); ++ /* FIXME-EDWARD: Use truncate_complete_page in the loop above instead */ ++ ++ jput(node); ++ /* put pages found here */ ++ forget_cluster_pages(pages, found); ++ truncate: ++ if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS) && ++ index == 0) ++ return; ++ reiser4_invalidate_pages(inode->i_mapping, ++ clust_to_pg(index, inode), ++ cluster_nrpages(inode), ++ even_cows); ++ assert("edward-1201", ++ ergo(!reiser4_inode_get_flag(inode, ++ REISER4_FILE_CONV_IN_PROGRESS), ++ jnode_truncate_ok(inode, index))); ++ return; ++} ++ ++/* Prepare cluster handle before(after) modifications ++ which are supposed to be committed. ++ ++ . grab cluster pages; ++ . reserve disk space; ++ . maybe read pages from disk and set the disk cluster dirty; ++ . maybe write hole; ++ . maybe create 'unprepped' disk cluster if the last one is fake ++ (i.e. is not represenred by any items) ++*/ ++ ++static int ++prepare_cluster(struct inode *inode, ++ loff_t file_off /* write position in the file */ , ++ loff_t to_file, /* bytes of users data to write to the file */ ++ reiser4_cluster_t * clust, page_cluster_op op) ++{ ++ int result = 0; ++ reiser4_slide_t *win = clust->win; ++ ++ reset_cluster_params(clust); ++ cluster_set_tfm_act(&clust->tc, TFMA_READ); ++#if REISER4_DEBUG ++ clust->ctx = get_current_context(); ++#endif ++ assert("edward-1190", op != PCL_UNKNOWN); ++ ++ clust->op = op; ++ ++ result = prepare_page_cluster(inode, clust, 1); ++ if (result) ++ return result; ++ assert("edward-1447", ++ ergo(clust->nr_pages != 0, jprivate(clust->pages[0]))); ++ assert("edward-1448", ++ ergo(clust->nr_pages != 0, ++ jnode_is_cluster_page(jprivate(clust->pages[0])))); ++ ++ result = reserve4cluster(inode, clust); ++ if (result) ++ goto err1; ++ result = read_some_cluster_pages(inode, clust); ++ if (result) { ++ free_reserved4cluster(inode, ++ clust, ++ estimate_update_cluster(inode) + ++ estimate_insert_cluster(inode)); ++ goto err1; ++ } ++ assert("edward-1124", clust->dstat != INVAL_DISK_CLUSTER); ++ ++ result = cryptcompress_make_unprepped_cluster(clust, inode); ++ if (result) ++ goto err2; ++ if (win && win->stat == HOLE_WINDOW) { ++ result = write_hole(inode, clust, file_off, to_file); ++ if (result) ++ goto err2; ++ } ++ return 0; ++ err2: ++ free_reserved4cluster(inode, clust, ++ estimate_update_cluster(inode)); ++ err1: ++ reiser4_release_cluster_pages_and_jnode(clust); ++ assert("edward-1125", result == -ENOSPC); ++ return result; ++} ++ ++/* set window by two offsets */ ++static void ++set_window(reiser4_cluster_t * clust, reiser4_slide_t * win, ++ struct inode *inode, loff_t o1, loff_t o2) ++{ ++ assert("edward-295", clust != NULL); ++ assert("edward-296", inode != NULL); ++ assert("edward-1071", win != NULL); ++ assert("edward-297", o1 <= o2); ++ ++ clust->index = off_to_clust(o1, inode); ++ ++ win->off = off_to_cloff(o1, inode); ++ win->count = min_count(inode_cluster_size(inode) - win->off, o2 - o1); ++ win->delta = 0; ++ ++ clust->win = win; ++} ++ ++static int ++set_cluster_by_window(struct inode *inode, reiser4_cluster_t * clust, ++ reiser4_slide_t * win, flow_t * f, loff_t file_off) ++{ ++ int result; ++ ++ assert("edward-197", clust != NULL); ++ assert("edward-1072", win != NULL); ++ assert("edward-198", inode != NULL); ++ ++ result = alloc_cluster_pgset(clust, cluster_nrpages(inode)); ++ if (result) ++ return result; ++ ++ if (file_off > inode->i_size) { ++ /* Uhmm, hole in cryptcompress file... */ ++ loff_t hole_size; ++ hole_size = file_off - inode->i_size; ++ ++ set_window(clust, win, inode, inode->i_size, file_off); ++ win->stat = HOLE_WINDOW; ++ if (win->off + hole_size < inode_cluster_size(inode)) ++ /* there is also user's data to append to the hole */ ++ win->delta = ++ min_count(inode_cluster_size(inode) - ++ (win->off + win->count), f->length); ++ return 0; ++ } ++ set_window(clust, win, inode, file_off, file_off + f->length); ++ win->stat = DATA_WINDOW; ++ return 0; ++} ++ ++int set_cluster_by_page(reiser4_cluster_t * clust, struct page * page, ++ int count) ++{ ++ int result = 0; ++ int (*setting_actor)(reiser4_cluster_t * clust, int count); ++ ++ assert("edward-1358", clust != NULL); ++ assert("edward-1359", page != NULL); ++ assert("edward-1360", page->mapping != NULL); ++ assert("edward-1361", page->mapping->host != NULL); ++ ++ setting_actor = (clust->pages ? reset_cluster_pgset : alloc_cluster_pgset); ++ result = setting_actor(clust, count); ++ clust->index = pg_to_clust(page->index, page->mapping->host); ++ return result; ++} ++ ++/* reset all the params that not get updated */ ++void reset_cluster_params(reiser4_cluster_t * clust) ++{ ++ assert("edward-197", clust != NULL); ++ ++ clust->dstat = INVAL_DISK_CLUSTER; ++ clust->tc.uptodate = 0; ++ clust->tc.len = 0; ++} ++ ++/* Core write procedure of cryptcompress plugin, which slices user's ++ flow into logical clusters, maps the last ones to the appropriate ++ page clusters, and tries to capture them. ++ If @buf != NULL, returns number of successfully written bytes, ++ otherwise returns error ++*/ ++static loff_t ++write_cryptcompress_flow(struct file *file, struct inode *inode, ++ const char __user *buf, size_t count, loff_t pos, ++ int *conv_occured) ++{ ++ int i; ++ flow_t f; ++ hint_t *hint; ++ int result = 0; ++ size_t to_write = 0; ++ loff_t file_off; ++ reiser4_slide_t win; ++ reiser4_cluster_t clust; ++ ++ assert("edward-161", reiser4_schedulable()); ++ assert("edward-748", cryptcompress_inode_ok(inode)); ++ assert("edward-159", current_blocksize == PAGE_CACHE_SIZE); ++ assert("edward-1274", get_current_context()->grabbed_blocks == 0); ++ ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) ++ return RETERR(-ENOMEM); ++ ++ result = load_file_hint(file, hint); ++ if (result) { ++ kfree(hint); ++ return result; ++ } ++ ++ result = ++ flow_by_inode_cryptcompress(inode, buf, 1 /* user space */ , ++ count, pos, WRITE_OP, &f); ++ if (result) ++ goto out; ++ to_write = f.length; ++ ++ /* current write position in file */ ++ file_off = pos; ++ reiser4_slide_init(&win); ++ cluster_init_read(&clust, &win); ++ clust.hint = hint; ++ ++ result = set_cluster_by_window(inode, &clust, &win, &f, file_off); ++ if (result) ++ goto out; ++ ++ if (next_window_stat(&win) == HOLE_WINDOW) { ++ result = write_conversion_hook(file, inode, pos, &clust, NULL); ++ if (result) ++ goto out; ++ result = ++ prepare_cluster(inode, file_off, f.length, &clust, ++ PCL_APPEND); ++ if (result) ++ goto out; ++ } ++ do { ++ char *src; ++ unsigned page_off, page_count; ++ ++ assert("edward-750", reiser4_schedulable()); ++ ++ result = write_conversion_hook(file, inode, pos, &clust, ++ conv_occured); ++ if (result || *conv_occured) ++ goto out; ++ result = ++ prepare_cluster(inode, file_off, f.length, &clust, ++ PCL_APPEND); ++ if (result) ++ goto out; ++ ++ assert("edward-751", cryptcompress_inode_ok(inode)); ++ assert("edward-204", win.stat == DATA_WINDOW); ++ assert("edward-1288", hint_is_valid(clust.hint)); ++ assert("edward-752", ++ znode_is_write_locked(hint->ext_coord.coord.node)); ++ ++ put_hint_cluster(&clust, inode, ZNODE_WRITE_LOCK); ++ ++ /* set write position in page */ ++ page_off = off_to_pgoff(win.off); ++ ++ /* copy user's data to cluster pages */ ++ for (i = off_to_pg(win.off), src = f.data; ++ i < count_to_nrpages(win.off + win.count); ++ i++, src += page_count) { ++ page_count = ++ cnt_to_pgcnt(win.off + win.count, i) - page_off; ++ ++ assert("edward-1039", ++ page_off + page_count <= PAGE_CACHE_SIZE); ++ assert("edward-287", clust.pages[i] != NULL); ++ ++ lock_page(clust.pages[i]); ++ result = ++ __copy_from_user((char *)kmap(clust.pages[i]) + ++ page_off, (char __user *)src, page_count); ++ kunmap(clust.pages[i]); ++ if (unlikely(result)) { ++ unlock_page(clust.pages[i]); ++ result = -EFAULT; ++ goto err2; ++ } ++ SetPageUptodate(clust.pages[i]); ++ unlock_page(clust.pages[i]); ++ page_off = 0; ++ } ++ assert("edward-753", cryptcompress_inode_ok(inode)); ++ ++ set_cluster_pages_dirty(&clust); ++ ++ result = try_capture_cluster(&clust, inode); ++ if (result) ++ goto err2; ++ ++ assert("edward-998", f.user == 1); ++ ++ move_flow_forward(&f, win.count); ++ ++ /* disk cluster may be already clean at this point */ ++ ++ /* . update cluster ++ . set hint for new offset ++ . unlock znode ++ . update inode ++ . balance dirty pages ++ */ ++ result = balance_dirty_page_cluster(&clust, inode, 0, f.length); ++ if (result) ++ goto err1; ++ assert("edward-755", hint->lh.owner == NULL); ++ reset_cluster_params(&clust); ++ continue; ++ err2: ++ reiser4_release_cluster_pages_and_jnode(&clust); ++ err1: ++ if (clust.reserved) ++ free_reserved4cluster(inode, ++ &clust, ++ estimate_update_cluster(inode)); ++ break; ++ } while (f.length); ++ out: ++ done_lh(&hint->lh); ++ if (result == -EEXIST) ++ warning("edward-1407", "write returns EEXIST!\n"); ++ ++ put_cluster_handle(&clust); ++ save_file_hint(file, hint); ++ kfree(hint); ++ if (buf) { ++ /* if nothing were written - there must be an error */ ++ assert("edward-195", ergo((to_write == f.length), ++ (result < 0 || *conv_occured))); ++ return (to_write - f.length) ? (to_write - f.length) : result; ++ } ++ return result; ++} ++ ++/** ++ * write_cryptcompress - write of struct file_operations ++ * @file: file to write to ++ * @buf: address of user-space buffer ++ * @read_amount: number of bytes to write ++ * @off: position in file to write to ++ * ++ * This is implementation of vfs's write method of struct file_operations for ++ * cryptcompress plugin. ++ */ ++ssize_t write_cryptcompress(struct file *file, const char __user *buf, ++ size_t count, loff_t *off, int *conv) ++{ ++ ssize_t result; ++ struct inode *inode; ++ reiser4_context *ctx; ++ loff_t pos = *off; ++ cryptcompress_info_t *info; ++ ++ assert("edward-1449", *conv == 0); ++ ++ inode = file->f_dentry->d_inode; ++ assert("edward-196", cryptcompress_inode_ok(inode)); ++ ++ info = cryptcompress_inode_data(inode); ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ mutex_lock(&inode->i_mutex); ++ ++ result = generic_write_checks(file, &pos, &count, 0); ++ if (unlikely(result != 0)) ++ goto out; ++ if (unlikely(count == 0)) ++ goto out; ++ result = remove_suid(file->f_dentry); ++ if (unlikely(result != 0)) ++ goto out; ++ /* remove_suid might create a transaction */ ++ reiser4_txn_restart(ctx); ++ ++ result = write_cryptcompress_flow(file, inode, buf, count, pos, conv); ++ ++ if (result < 0) ++ goto out; ++ /* update position in a file */ ++ *off = pos + result; ++ out: ++ mutex_unlock(&inode->i_mutex); ++ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++int readpages_cryptcompress(struct file *file, struct address_space *mapping, ++ struct list_head *pages, unsigned nr_pages) ++{ ++ reiser4_context * ctx; ++ int ret; ++ ++ ctx = reiser4_init_context(mapping->host->i_sb); ++ if (IS_ERR(ctx)) { ++ ret = PTR_ERR(ctx); ++ goto err; ++ } ++ /* crc files can be built of ctail items only */ ++ ret = readpages_ctail(file, mapping, pages); ++ reiser4_exit_context(ctx); ++ if (ret) { ++err: ++ put_pages_list(pages); ++ } ++ return ret; ++} ++ ++static reiser4_block_nr cryptcompress_estimate_read(struct inode *inode) ++{ ++ /* reserve one block to update stat data item */ ++ assert("edward-1193", ++ inode_file_plugin(inode)->estimate.update == ++ estimate_update_common); ++ return estimate_update_common(inode); ++} ++ ++/** ++ * read_cryptcompress - read of struct file_operations ++ * @file: file to read from ++ * @buf: address of user-space buffer ++ * @read_amount: number of bytes to read ++ * @off: position in file to read from ++ * ++ * This is implementation of vfs's read method of struct file_operations for ++ * cryptcompress plugin. ++ */ ++ssize_t read_cryptcompress(struct file * file, char __user *buf, size_t size, ++ loff_t * off) ++{ ++ ssize_t result; ++ struct inode *inode; ++ reiser4_context *ctx; ++ cryptcompress_info_t *info; ++ reiser4_block_nr needed; ++ ++ inode = file->f_dentry->d_inode; ++ assert("edward-1194", !reiser4_inode_get_flag(inode, REISER4_NO_SD)); ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ info = cryptcompress_inode_data(inode); ++ needed = cryptcompress_estimate_read(inode); ++ ++ result = reiser4_grab_space(needed, BA_CAN_COMMIT); ++ if (result != 0) { ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ LOCK_CNT_INC(inode_sem_r); ++ ++ result = do_sync_read(file, buf, size, off); ++ ++ LOCK_CNT_DEC(inode_sem_r); ++ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ ++ return result; ++} ++ ++/* If @index > 0, find real disk cluster of the index (@index - 1), ++ If @index == 0 find the real disk cluster of the object of maximal index. ++ Keep incremented index of the result in @found. ++ It succes was returned: ++ (@index == 0 && @found == 0) means that the object doesn't have real disk ++ clusters. ++ (@index != 0 && @found == 0) means that disk cluster of (@index -1) doesn't ++ exist. ++*/ ++static int ++find_real_disk_cluster(struct inode *inode, cloff_t * found, cloff_t index) ++{ ++ int result; ++ reiser4_key key; ++ loff_t offset; ++ hint_t *hint; ++ lock_handle *lh; ++ lookup_bias bias; ++ coord_t *coord; ++ item_plugin *iplug; ++ ++ assert("edward-1131", inode != NULL); ++ assert("edward-95", cryptcompress_inode_ok(inode)); ++ ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) ++ return RETERR(-ENOMEM); ++ hint_init_zero(hint); ++ lh = &hint->lh; ++ ++ bias = (index ? FIND_EXACT : FIND_MAX_NOT_MORE_THAN); ++ offset = ++ (index ? clust_to_off(index, inode) - ++ 1 : get_key_offset(reiser4_max_key())); ++ ++ key_by_inode_cryptcompress(inode, offset, &key); ++ ++ /* find the last item of this object */ ++ result = ++ find_cluster_item(hint, &key, ZNODE_READ_LOCK, NULL /* ra_info */, ++ bias, 0); ++ if (cbk_errored(result)) { ++ done_lh(lh); ++ kfree(hint); ++ return result; ++ } ++ if (result == CBK_COORD_NOTFOUND) { ++ /* no real disk clusters */ ++ done_lh(lh); ++ kfree(hint); ++ *found = 0; ++ return 0; ++ } ++ /* disk cluster is found */ ++ coord = &hint->ext_coord.coord; ++ coord_clear_iplug(coord); ++ result = zload(coord->node); ++ if (unlikely(result)) { ++ done_lh(lh); ++ kfree(hint); ++ return result; ++ } ++ iplug = item_plugin_by_coord(coord); ++ assert("edward-277", iplug == item_plugin_by_id(CTAIL_ID)); ++ assert("edward-1202", ctail_ok(coord)); ++ ++ item_key_by_coord(coord, &key); ++ *found = off_to_clust(get_key_offset(&key), inode) + 1; ++ ++ assert("edward-1132", ergo(index, index == *found)); ++ ++ zrelse(coord->node); ++ done_lh(lh); ++ kfree(hint); ++ return 0; ++} ++ ++static int find_fake_appended(struct inode *inode, cloff_t * index) ++{ ++ return find_real_disk_cluster(inode, index, ++ 0 /* find last real one */ ); ++} ++ ++/* Set left coord when unit is not found after node_lookup() ++ This takes into account that there can be holes in a sequence ++ of disk clusters */ ++ ++static void adjust_left_coord(coord_t * left_coord) ++{ ++ switch (left_coord->between) { ++ case AFTER_UNIT: ++ left_coord->between = AFTER_ITEM; ++ case AFTER_ITEM: ++ case BEFORE_UNIT: ++ break; ++ default: ++ impossible("edward-1204", "bad left coord to cut"); ++ } ++ return; ++} ++ ++#define CRC_CUT_TREE_MIN_ITERATIONS 64 ++int ++cut_tree_worker_cryptcompress(tap_t * tap, const reiser4_key * from_key, ++ const reiser4_key * to_key, ++ reiser4_key * smallest_removed, ++ struct inode *object, int truncate, int *progress) ++{ ++ lock_handle next_node_lock; ++ coord_t left_coord; ++ int result; ++ ++ assert("edward-1158", tap->coord->node != NULL); ++ assert("edward-1159", znode_is_write_locked(tap->coord->node)); ++ assert("edward-1160", znode_get_level(tap->coord->node) == LEAF_LEVEL); ++ ++ *progress = 0; ++ init_lh(&next_node_lock); ++ ++ while (1) { ++ znode *node; /* node from which items are cut */ ++ node_plugin *nplug; /* node plugin for @node */ ++ ++ node = tap->coord->node; ++ ++ /* Move next_node_lock to the next node on the left. */ ++ result = ++ reiser4_get_left_neighbor(&next_node_lock, node, ++ ZNODE_WRITE_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ if (result != 0 && result != -E_NO_NEIGHBOR) ++ break; ++ /* FIXME-EDWARD: Check can we delete the node as a whole. */ ++ result = reiser4_tap_load(tap); ++ if (result) ++ return result; ++ ++ /* Prepare the second (right) point for cut_node() */ ++ if (*progress) ++ coord_init_last_unit(tap->coord, node); ++ ++ else if (item_plugin_by_coord(tap->coord)->b.lookup == NULL) ++ /* set rightmost unit for the items without lookup method */ ++ tap->coord->unit_pos = coord_last_unit_pos(tap->coord); ++ ++ nplug = node->nplug; ++ ++ assert("edward-1161", nplug); ++ assert("edward-1162", nplug->lookup); ++ ++ /* left_coord is leftmost unit cut from @node */ ++ result = nplug->lookup(node, from_key, FIND_EXACT, &left_coord); ++ ++ if (IS_CBKERR(result)) ++ break; ++ ++ if (result == CBK_COORD_NOTFOUND) ++ adjust_left_coord(&left_coord); ++ ++ /* adjust coordinates so that they are set to existing units */ ++ if (coord_set_to_right(&left_coord) ++ || coord_set_to_left(tap->coord)) { ++ result = 0; ++ break; ++ } ++ ++ if (coord_compare(&left_coord, tap->coord) == ++ COORD_CMP_ON_RIGHT) { ++ /* keys from @from_key to @to_key are not in the tree */ ++ result = 0; ++ break; ++ } ++ ++ /* cut data from one node */ ++ *smallest_removed = *reiser4_min_key(); ++ result = kill_node_content(&left_coord, ++ tap->coord, ++ from_key, ++ to_key, ++ smallest_removed, ++ next_node_lock.node, ++ object, truncate); ++#if REISER4_DEBUG ++ /*node_check(node, ~0U); */ ++#endif ++ reiser4_tap_relse(tap); ++ ++ if (result) ++ break; ++ ++ ++(*progress); ++ ++ /* Check whether all items with keys >= from_key were removed ++ * from the tree. */ ++ if (keyle(smallest_removed, from_key)) ++ /* result = 0; */ ++ break; ++ ++ if (next_node_lock.node == NULL) ++ break; ++ ++ result = reiser4_tap_move(tap, &next_node_lock); ++ done_lh(&next_node_lock); ++ if (result) ++ break; ++ ++ /* Break long cut_tree operation (deletion of a large file) if ++ * atom requires commit. */ ++ if (*progress > CRC_CUT_TREE_MIN_ITERATIONS ++ && current_atom_should_commit()) { ++ result = -E_REPEAT; ++ break; ++ } ++ } ++ done_lh(&next_node_lock); ++ return result; ++} ++ ++/* Append or expand hole in two steps (exclusive access should be aquired!) ++ 1) write zeroes to the current real cluster, ++ 2) expand hole via fake clusters (just increase i_size) */ ++static int ++cryptcompress_append_hole(struct inode *inode /*contains old i_size */ , ++ loff_t new_size) ++{ ++ int result = 0; ++ hint_t *hint; ++ lock_handle *lh; ++ loff_t hole_size; ++ int nr_zeroes; ++ reiser4_slide_t win; ++ reiser4_cluster_t clust; ++ ++ assert("edward-1133", inode->i_size < new_size); ++ assert("edward-1134", reiser4_schedulable()); ++ assert("edward-1135", cryptcompress_inode_ok(inode)); ++ assert("edward-1136", current_blocksize == PAGE_CACHE_SIZE); ++ assert("edward-1333", off_to_cloff(inode->i_size, inode) != 0); ++ ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) ++ return RETERR(-ENOMEM); ++ hint_init_zero(hint); ++ lh = &hint->lh; ++ ++ reiser4_slide_init(&win); ++ cluster_init_read(&clust, &win); ++ clust.hint = hint; ++ ++ result = alloc_cluster_pgset(&clust, cluster_nrpages(inode)); ++ if (result) ++ goto out; ++ if (off_to_cloff(inode->i_size, inode) == 0) ++ goto fake_append; ++ hole_size = new_size - inode->i_size; ++ nr_zeroes = ++ inode_cluster_size(inode) - off_to_cloff(inode->i_size, inode); ++ if (hole_size < nr_zeroes) ++ nr_zeroes = hole_size; ++ set_window(&clust, &win, inode, inode->i_size, ++ inode->i_size + nr_zeroes); ++ win.stat = HOLE_WINDOW; ++ ++ assert("edward-1137", ++ clust.index == off_to_clust(inode->i_size, inode)); ++ ++ result = prepare_cluster(inode, 0, 0, &clust, PCL_APPEND); ++ ++ assert("edward-1271", !result || result == -ENOSPC); ++ if (result) ++ goto out; ++ assert("edward-1139", ++ clust.dstat == PREP_DISK_CLUSTER || ++ clust.dstat == UNPR_DISK_CLUSTER); ++ ++ assert("edward-1431", hole_size >= nr_zeroes); ++ if (hole_size == nr_zeroes) ++ /* nothing to append anymore */ ++ goto out; ++ fake_append: ++ INODE_SET_FIELD(inode, i_size, new_size); ++ out: ++ done_lh(lh); ++ kfree(hint); ++ put_cluster_handle(&clust); ++ return result; ++} ++ ++#if REISER4_DEBUG ++static int ++pages_truncate_ok(struct inode *inode, loff_t old_size, pgoff_t start) ++{ ++ struct pagevec pvec; ++ int i; ++ int count; ++ int rest; ++ ++ rest = count_to_nrpages(old_size) - start; ++ ++ pagevec_init(&pvec, 0); ++ count = min_count(pagevec_space(&pvec), rest); ++ ++ while (rest) { ++ count = min_count(pagevec_space(&pvec), rest); ++ pvec.nr = find_get_pages(inode->i_mapping, start, ++ count, pvec.pages); ++ for (i = 0; i < pagevec_count(&pvec); i++) { ++ if (PageUptodate(pvec.pages[i])) { ++ warning("edward-1205", ++ "truncated page of index %lu is uptodate", ++ pvec.pages[i]->index); ++ return 0; ++ } ++ } ++ start += count; ++ rest -= count; ++ pagevec_release(&pvec); ++ } ++ return 1; ++} ++ ++static int body_truncate_ok(struct inode *inode, cloff_t aidx) ++{ ++ int result; ++ cloff_t raidx; ++ ++ result = find_fake_appended(inode, &raidx); ++ return !result && (aidx == raidx); ++} ++#endif ++ ++static int ++update_cryptcompress_size(struct inode *inode, reiser4_key * key, int update_sd) ++{ ++ return (get_key_offset(key) & ((loff_t) (inode_cluster_size(inode)) - 1) ++ ? 0 : reiser4_update_file_size(inode, key, update_sd)); ++} ++ ++/* prune cryptcompress file in two steps (exclusive access should be acquired!) ++ 1) cut all disk clusters but the last one partially truncated, ++ 2) set zeroes and capture last partially truncated page cluster if the last ++ one exists, otherwise truncate via prune fake cluster (just decrease i_size) ++*/ ++static int ++prune_cryptcompress(struct inode *inode, loff_t new_size, int update_sd, ++ cloff_t aidx) ++{ ++ int result = 0; ++ unsigned nr_zeroes; ++ loff_t to_prune; ++ loff_t old_size; ++ cloff_t ridx; ++ ++ hint_t *hint; ++ lock_handle *lh; ++ reiser4_slide_t win; ++ reiser4_cluster_t clust; ++ ++ assert("edward-1140", inode->i_size >= new_size); ++ assert("edward-1141", reiser4_schedulable()); ++ assert("edward-1142", cryptcompress_inode_ok(inode)); ++ assert("edward-1143", current_blocksize == PAGE_CACHE_SIZE); ++ ++ old_size = inode->i_size; ++ ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) ++ return RETERR(-ENOMEM); ++ hint_init_zero(hint); ++ lh = &hint->lh; ++ ++ reiser4_slide_init(&win); ++ cluster_init_read(&clust, &win); ++ clust.hint = hint; ++ ++ /* rightmost completely truncated cluster */ ++ ridx = count_to_nrclust(new_size, inode); ++ ++ assert("edward-1174", ridx <= aidx); ++ old_size = inode->i_size; ++ if (ridx != aidx) { ++ result = cut_file_items(inode, ++ clust_to_off(ridx, inode), ++ update_sd, ++ clust_to_off(aidx, inode), ++ update_cryptcompress_size); ++ if (result) ++ goto out; ++ } ++ if (!off_to_cloff(new_size, inode)) { ++ /* no partially truncated clusters */ ++ assert("edward-1145", inode->i_size == new_size); ++ goto finish; ++ } ++ assert("edward-1146", new_size < inode->i_size); ++ ++ to_prune = inode->i_size - new_size; ++ ++ /* partial truncate of leftmost cluster, ++ first check if it is fake */ ++ result = find_real_disk_cluster(inode, &aidx, ridx); ++ if (result) ++ goto out; ++ if (!aidx) ++ /* yup, this is fake one */ ++ goto finish; ++ ++ assert("edward-1148", aidx == ridx); ++ ++ /* do partial truncate of the leftmost page cluster, ++ then try to capture this one */ ++ result = alloc_cluster_pgset(&clust, cluster_nrpages(inode)); ++ if (result) ++ goto out; ++ nr_zeroes = (off_to_pgoff(new_size) ? ++ PAGE_CACHE_SIZE - off_to_pgoff(new_size) : 0); ++ set_window(&clust, &win, inode, new_size, new_size + nr_zeroes); ++ win.stat = HOLE_WINDOW; ++ ++ assert("edward-1149", clust.index == ridx - 1); ++ ++ result = prepare_cluster(inode, 0, 0, &clust, PCL_TRUNCATE); ++ if (result) ++ goto out; ++ assert("edward-1151", ++ clust.dstat == PREP_DISK_CLUSTER || ++ clust.dstat == UNPR_DISK_CLUSTER); ++ ++ assert("edward-1191", inode->i_size == new_size); ++ assert("edward-1206", body_truncate_ok(inode, ridx)); ++ finish: ++ /* drop all the pages that don't have jnodes (i.e. pages ++ which can not be truncated by cut_file_items() because ++ of holes represented by fake disk clusters) including ++ the pages of partially truncated cluster which was ++ released by prepare_cluster() */ ++ truncate_inode_pages(inode->i_mapping, new_size); ++ INODE_SET_FIELD(inode, i_size, new_size); ++ out: ++ assert("edward-1334", !result || result == -ENOSPC); ++ assert("edward-1209", ++ pages_truncate_ok(inode, old_size, count_to_nrpages(new_size))); ++ done_lh(lh); ++ kfree(hint); ++ put_cluster_handle(&clust); ++ return result; ++} ++ ++/* Prepare cryptcompress file for truncate: ++ prune or append rightmost fake logical clusters (if any) ++*/ ++static int ++start_truncate_fake(struct inode *inode, cloff_t aidx, loff_t new_size, ++ int update_sd) ++{ ++ int result = 0; ++ int bytes; ++ ++ if (new_size > inode->i_size) { ++ /* append */ ++ if (inode->i_size < clust_to_off(aidx, inode)) ++ /* no fake bytes */ ++ return 0; ++ bytes = new_size - inode->i_size; ++ INODE_SET_FIELD(inode, i_size, inode->i_size + bytes); ++ } else { ++ /* prune */ ++ if (inode->i_size <= clust_to_off(aidx, inode)) ++ /* no fake bytes */ ++ return 0; ++ bytes = ++ inode->i_size - max_count(new_size, ++ clust_to_off(aidx, inode)); ++ if (!bytes) ++ return 0; ++ INODE_SET_FIELD(inode, i_size, inode->i_size - bytes); ++ /* In the case of fake prune we need to drop page cluster. ++ There are only 2 cases for partially truncated page: ++ 1. If is is dirty, therefore it is anonymous ++ (was dirtied via mmap), and will be captured ++ later via ->capture(). ++ 2. If is clean, therefore it is filled by zeroes. ++ In both cases we don't need to make it dirty and ++ capture here. ++ */ ++ truncate_inode_pages(inode->i_mapping, inode->i_size); ++ } ++ if (update_sd) ++ result = update_sd_cryptcompress(inode); ++ return result; ++} ++ ++/* This is called in setattr_cryptcompress when it is used to truncate, ++ and in delete_cryptcompress */ ++static int cryptcompress_truncate(struct inode *inode, /* old size */ ++ loff_t new_size, /* new size */ ++ int update_sd) ++{ ++ int result; ++ cloff_t aidx; ++ ++ result = find_fake_appended(inode, &aidx); ++ if (result) ++ return result; ++ assert("edward-1208", ++ ergo(aidx > 0, inode->i_size > clust_to_off(aidx - 1, inode))); ++ ++ result = start_truncate_fake(inode, aidx, new_size, update_sd); ++ if (result) ++ return result; ++ if (inode->i_size == new_size) ++ /* nothing to truncate anymore */ ++ return 0; ++ result = (inode->i_size < new_size ? ++ cryptcompress_append_hole(inode, new_size) : ++ prune_cryptcompress(inode, new_size, update_sd, aidx)); ++ if (!result && update_sd) ++ result = update_sd_cryptcompress(inode); ++ return result; ++} ++ ++static void clear_moved_tag_cluster(struct address_space * mapping, ++ reiser4_cluster_t * clust) ++{ ++ int i; ++ void * ret; ++ read_lock_irq(&mapping->tree_lock); ++ for (i = 0; i < clust->nr_pages; i++) { ++ assert("edward-1438", clust->pages[i] != NULL); ++ ret = radix_tree_tag_clear(&mapping->page_tree, ++ clust->pages[i]->index, ++ PAGECACHE_TAG_REISER4_MOVED); ++ assert("edward-1439", ret == clust->pages[i]); ++ } ++ read_unlock_irq(&mapping->tree_lock); ++} ++ ++/* Capture an anonymous pager cluster. (Page cluser is ++ anonymous if it contains at least one anonymous page */ ++static int ++capture_page_cluster(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ int result; ++ ++ assert("edward-1073", clust != NULL); ++ assert("edward-1074", inode != NULL); ++ assert("edward-1075", clust->dstat == INVAL_DISK_CLUSTER); ++ ++ result = prepare_cluster(inode, 0, 0, clust, PCL_APPEND); ++ if (result) ++ return result; ++ set_cluster_pages_dirty(clust); ++ clear_moved_tag_cluster(inode->i_mapping, clust); ++ ++ result = try_capture_cluster(clust, inode); ++ put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK); ++ if (unlikely(result)) { ++ /* set cleared tag back, so it will be ++ possible to capture it again later */ ++ read_lock_irq(&inode->i_mapping->tree_lock); ++ radix_tree_tag_set(&inode->i_mapping->page_tree, ++ clust_to_pg(clust->index, inode), ++ PAGECACHE_TAG_REISER4_MOVED); ++ read_unlock_irq(&inode->i_mapping->tree_lock); ++ ++ reiser4_release_cluster_pages_and_jnode(clust); ++ } ++ return result; ++} ++ ++#define MAX_CLUSTERS_TO_CAPTURE(inode) (1024 >> cluster_nrpages_shift(inode)) ++ ++/* read lock should be acquired */ ++static int ++capture_anonymous_clusters(struct address_space *mapping, pgoff_t * index, ++ int to_capture) ++{ ++ int result = 0; ++ int found; ++ struct page *page = NULL; ++ hint_t *hint; ++ lock_handle *lh; ++ reiser4_cluster_t clust; ++ ++ assert("edward-1127", mapping != NULL); ++ assert("edward-1128", mapping->host != NULL); ++ assert("edward-1440", mapping->host->i_mapping == mapping); ++ ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) ++ return RETERR(-ENOMEM); ++ hint_init_zero(hint); ++ lh = &hint->lh; ++ ++ cluster_init_read(&clust, NULL); ++ clust.hint = hint; ++ ++ result = alloc_cluster_pgset(&clust, cluster_nrpages(mapping->host)); ++ if (result) ++ goto out; ++ ++ while (to_capture > 0) { ++ found = ++ find_get_pages_tag(mapping, index, ++ PAGECACHE_TAG_REISER4_MOVED, 1, &page); ++ if (!found) { ++ *index = (pgoff_t) - 1; ++ break; ++ } ++ assert("edward-1109", page != NULL); ++ ++ move_cluster_forward(&clust, mapping->host, page->index); ++ result = capture_page_cluster(&clust, mapping->host); ++ page_cache_release(page); ++ if (result) ++ break; ++ to_capture -= clust.nr_pages; ++ } ++ if (result) { ++ warning("edward-1077", ++ "Cannot capture anon pages: result=%i (captured=%d)\n", ++ result, ++ ((__u32) MAX_CLUSTERS_TO_CAPTURE(mapping->host)) - ++ to_capture); ++ } else { ++ /* something had to be found */ ++ assert("edward-1078", ++ to_capture <= MAX_CLUSTERS_TO_CAPTURE(mapping->host)); ++ if (to_capture <= 0) ++ /* there may be left more pages */ ++ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); ++ } ++ out: ++ done_lh(lh); ++ kfree(hint); ++ put_cluster_handle(&clust); ++ return result; ++} ++ ++/* Check mapping for existence of not captured dirty pages. ++ This returns !0 if either page tree contains pages tagged ++ PAGECACHE_TAG_REISER4_MOVED */ ++static int cryptcompress_inode_has_anon_pages(struct inode *inode) ++{ ++ return mapping_tagged(inode->i_mapping, PAGECACHE_TAG_REISER4_MOVED); ++} ++ ++/* this is implementation of vfs's writepages method of struct ++ address_space_operations */ ++int ++writepages_cryptcompress(struct address_space *mapping, ++ struct writeback_control *wbc) ++{ ++ int result; ++ int to_capture; ++ pgoff_t nrpages; ++ pgoff_t index = 0; ++ cryptcompress_info_t *info; ++ struct inode *inode; ++ ++ inode = mapping->host; ++ if (!cryptcompress_inode_has_anon_pages(inode)) { ++ result = 0; ++ goto end; ++ } ++ ++ info = cryptcompress_inode_data(inode); ++ nrpages = count_to_nrpages(i_size_read(inode)); ++ ++ if (wbc->sync_mode != WB_SYNC_ALL) ++ to_capture = ++ min_count(wbc->nr_to_write, MAX_CLUSTERS_TO_CAPTURE(inode)); ++ else ++ to_capture = MAX_CLUSTERS_TO_CAPTURE(inode); ++ do { ++ reiser4_context *ctx; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) { ++ result = PTR_ERR(ctx); ++ break; ++ } ++ ctx->nobalance = 1; ++ ++ assert("edward-1079", ++ lock_stack_isclean(get_current_lock_stack())); ++ ++ LOCK_CNT_INC(inode_sem_r); ++ ++ result = ++ capture_anonymous_clusters(inode->i_mapping, &index, ++ to_capture); ++ ++ if (result != 0 || wbc->sync_mode != WB_SYNC_ALL) { ++ reiser4_exit_context(ctx); ++ break; ++ } ++ result = txnmgr_force_commit_all(inode->i_sb, 0); ++ reiser4_exit_context(ctx); ++ } while (result == 0 && index < nrpages); ++ ++ end: ++ if (is_in_reiser4_context()) { ++ if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) { ++ /* there are already pages to flush, flush them out, do ++ not delay until end of reiser4_sync_inodes */ ++ reiser4_writeout(inode->i_sb, wbc); ++ get_current_context()->nr_captured = 0; ++ } ++ } ++ return result; ++} ++ ++/* plugin->u.file.mmap */ ++int mmap_cryptcompress(struct file *file, struct vm_area_struct *vma) ++{ ++ int result; ++ struct inode *inode; ++ reiser4_context *ctx; ++ ++ inode = file->f_dentry->d_inode; ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ /* ++ * generic_file_mmap will do update_atime. Grab space for stat data ++ * update. ++ */ ++ result = reiser4_grab_space_force ++ (inode_file_plugin(inode)->estimate.update(inode), ++ BA_CAN_COMMIT); ++ if (result) { ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ result = generic_file_mmap(file, vma); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* plugin->u.file.release */ ++/* plugin->u.file.get_block */ ++ ++/* this is implementation of delete method of file plugin for ++ cryptcompress objects */ ++int delete_object_cryptcompress(struct inode *inode) ++{ ++ int result; ++ ++ assert("edward-429", inode->i_nlink == 0); ++ ++ reiser4_txn_restart_current(); ++ ++ result = cryptcompress_truncate(inode, 0, 0); ++ if (result) { ++ warning("edward-430", ++ "cannot truncate cryptcompress file %lli: %i", ++ (unsigned long long)get_inode_oid(inode), ++ result); ++ } ++ truncate_inode_pages(inode->i_mapping, 0); ++ /* and remove stat data */ ++ return reiser4_delete_object_common(inode); ++} ++ ++/* plugin->u.file.setattr method ++ This implements actual truncate (see comments in reiser4/page_cache.c) */ ++int setattr_cryptcompress(struct dentry *dentry, struct iattr *attr) ++{ ++ int result; ++ struct inode *inode; ++ ++ inode = dentry->d_inode; ++ if (attr->ia_valid & ATTR_SIZE) { ++ if (inode->i_size != attr->ia_size) { ++ reiser4_context *ctx; ++ loff_t old_size; ++ ++ ctx = reiser4_init_context(dentry->d_inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ inode_check_scale(inode, inode->i_size, attr->ia_size); ++ ++ old_size = inode->i_size; ++ ++ result = ++ cryptcompress_truncate(inode, attr->ia_size, ++ 1 /* update stat data */ ); ++ if (result) { ++ warning("edward-1192", ++ "truncate_cryptcompress failed: oid %lli, " ++ "old size %lld, new size %lld, retval %d", ++ (unsigned long long) ++ get_inode_oid(inode), old_size, ++ attr->ia_size, result); ++ } ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ } else ++ result = 0; ++ } else ++ result = reiser4_setattr_common(dentry, attr); ++ return result; ++} ++ ++/* sendfile_cryptcompress - sendfile of struct file_operations */ ++ssize_t ++sendfile_cryptcompress(struct file *file, loff_t *ppos, size_t count, ++ read_actor_t actor, void *target) ++{ ++ reiser4_context *ctx; ++ ssize_t result; ++ struct inode *inode; ++ cryptcompress_info_t *info; ++ ++ inode = file->f_dentry->d_inode; ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ /* ++ * generic_file_sndfile may want to call update_atime. Grab space for ++ * stat data update ++ */ ++ result = reiser4_grab_space(estimate_update_common(inode), ++ BA_CAN_COMMIT); ++ if (result) ++ goto exit; ++ info = cryptcompress_inode_data(inode); ++ ++ result = generic_file_sendfile(file, ppos, count, actor, target); ++ exit: ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* ++ * release_cryptcompress - release of struct file_operations ++ * @inode: inode of released file ++ * @file: file to release ++ */ ++int release_cryptcompress(struct inode *inode, struct file *file) ++{ ++ reiser4_context *ctx = reiser4_init_context(inode->i_sb); ++ ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ reiser4_free_file_fsdata(file); ++ reiser4_exit_context(ctx); ++ return 0; ++} ++ ++#if 0 ++int prepare_write_cryptcompress(struct file *file, struct page *page, ++ unsigned from, unsigned to) ++{ ++ return prepare_write_common(file, page, from, to); ++} ++#endif /* 0 */ ++ ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/file/cryptcompress.h b/fs/reiser4/plugin/file/cryptcompress.h +new file mode 100644 +index 0000000..5f2d7fb +--- /dev/null ++++ b/fs/reiser4/plugin/file/cryptcompress.h +@@ -0,0 +1,554 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++/* See http://www.namesys.com/cryptcompress_design.html */ ++ ++#if !defined( __FS_REISER4_CRYPTCOMPRESS_H__ ) ++#define __FS_REISER4_CRYPTCOMPRESS_H__ ++ ++#include "../../page_cache.h" ++#include "../compress/compress.h" ++#include "../crypto/cipher.h" ++ ++#include ++ ++#define MIN_CLUSTER_SHIFT PAGE_CACHE_SHIFT ++#define MAX_CLUSTER_SHIFT 16 ++#define MAX_CLUSTER_NRPAGES (1U << MAX_CLUSTER_SHIFT >> PAGE_CACHE_SHIFT) ++#define DC_CHECKSUM_SIZE 4 ++ ++#define MIN_LATTICE_FACTOR 1 ++#define MAX_LATTICE_FACTOR 32 ++ ++/* this mask contains all non-standard plugins that might ++ be present in reiser4-specific part of inode managed by ++ cryptcompress file plugin */ ++#define cryptcompress_mask \ ++ ((1 << PSET_FILE) | \ ++ (1 << PSET_CLUSTER) | \ ++ (1 << PSET_CIPHER) | \ ++ (1 << PSET_DIGEST) | \ ++ (1 << PSET_COMPRESSION) | \ ++ (1 << PSET_COMPRESSION_MODE)) ++ ++static inline loff_t min_count(loff_t a, loff_t b) ++{ ++ return (a < b ? a : b); ++} ++ ++static inline loff_t max_count(loff_t a, loff_t b) ++{ ++ return (a > b ? a : b); ++} ++ ++#if REISER4_DEBUG ++static inline int cluster_shift_ok(int shift) ++{ ++ return (shift >= MIN_CLUSTER_SHIFT) && (shift <= MAX_CLUSTER_SHIFT); ++} ++#endif ++ ++typedef struct tfm_stream { ++ __u8 *data; ++ size_t size; ++} tfm_stream_t; ++ ++typedef enum { ++ INPUT_STREAM, ++ OUTPUT_STREAM, ++ LAST_STREAM ++} tfm_stream_id; ++ ++typedef tfm_stream_t *tfm_unit[LAST_STREAM]; ++ ++static inline __u8 *ts_data(tfm_stream_t * stm) ++{ ++ assert("edward-928", stm != NULL); ++ return stm->data; ++} ++ ++static inline size_t ts_size(tfm_stream_t * stm) ++{ ++ assert("edward-929", stm != NULL); ++ return stm->size; ++} ++ ++static inline void set_ts_size(tfm_stream_t * stm, size_t size) ++{ ++ assert("edward-930", stm != NULL); ++ ++ stm->size = size; ++} ++ ++static inline int alloc_ts(tfm_stream_t ** stm) ++{ ++ assert("edward-931", stm); ++ assert("edward-932", *stm == NULL); ++ ++ *stm = kmalloc(sizeof **stm, reiser4_ctx_gfp_mask_get()); ++ if (*stm == NULL) ++ return -ENOMEM; ++ memset(*stm, 0, sizeof **stm); ++ return 0; ++} ++ ++static inline void free_ts(tfm_stream_t * stm) ++{ ++ assert("edward-933", !ts_data(stm)); ++ assert("edward-934", !ts_size(stm)); ++ ++ kfree(stm); ++} ++ ++static inline int alloc_ts_data(tfm_stream_t * stm, size_t size) ++{ ++ assert("edward-935", !ts_data(stm)); ++ assert("edward-936", !ts_size(stm)); ++ assert("edward-937", size != 0); ++ ++ stm->data = reiser4_vmalloc(size); ++ if (!stm->data) ++ return -ENOMEM; ++ set_ts_size(stm, size); ++ return 0; ++} ++ ++static inline void free_ts_data(tfm_stream_t * stm) ++{ ++ assert("edward-938", equi(ts_data(stm), ts_size(stm))); ++ ++ if (ts_data(stm)) ++ vfree(ts_data(stm)); ++ memset(stm, 0, sizeof *stm); ++} ++ ++/* Write modes for item conversion in flush convert phase */ ++typedef enum { ++ CRC_APPEND_ITEM = 1, ++ CRC_OVERWRITE_ITEM = 2, ++ CRC_CUT_ITEM = 3 ++} cryptcompress_write_mode_t; ++ ++typedef enum { ++ PCL_UNKNOWN = 0, /* invalid option */ ++ PCL_APPEND = 1, /* append and/or overwrite */ ++ PCL_TRUNCATE = 2 /* truncate */ ++} page_cluster_op; ++ ++/* Reiser4 file write/read transforms page cluster into disk cluster (and back) ++ using crypto/compression transforms implemented by reiser4 transform plugins. ++ Before each transform we allocate a pair of streams (tfm_unit) and assemble ++ page cluster into the input one. After transform we split output stream into ++ a set of items (disk cluster). ++*/ ++typedef struct tfm_cluster { ++ coa_set coa; ++ tfm_unit tun; ++ tfm_action act; ++ int uptodate; ++ int lsize; /* size of the logical cluster */ ++ int len; /* length of the transform stream */ ++} tfm_cluster_t; ++ ++static inline coa_t get_coa(tfm_cluster_t * tc, reiser4_compression_id id, tfm_action act) ++{ ++ return tc->coa[id][act]; ++} ++ ++static inline void ++set_coa(tfm_cluster_t * tc, reiser4_compression_id id, tfm_action act, coa_t coa) ++{ ++ tc->coa[id][act] = coa; ++} ++ ++static inline int ++alloc_coa(tfm_cluster_t * tc, compression_plugin * cplug) ++{ ++ coa_t coa; ++ ++ coa = cplug->alloc(tc->act); ++ if (IS_ERR(coa)) ++ return PTR_ERR(coa); ++ set_coa(tc, cplug->h.id, tc->act, coa); ++ return 0; ++} ++ ++static inline int ++grab_coa(tfm_cluster_t * tc, compression_plugin * cplug) ++{ ++ return (cplug->alloc && !get_coa(tc, cplug->h.id, tc->act) ? ++ alloc_coa(tc, cplug) : 0); ++} ++ ++static inline void free_coa_set(tfm_cluster_t * tc) ++{ ++ tfm_action j; ++ reiser4_compression_id i; ++ compression_plugin *cplug; ++ ++ assert("edward-810", tc != NULL); ++ ++ for (j = 0; j < TFMA_LAST; j++) ++ for (i = 0; i < LAST_COMPRESSION_ID; i++) { ++ if (!get_coa(tc, i, j)) ++ continue; ++ cplug = compression_plugin_by_id(i); ++ assert("edward-812", cplug->free != NULL); ++ cplug->free(get_coa(tc, i, j), j); ++ set_coa(tc, i, j, 0); ++ } ++ return; ++} ++ ++static inline tfm_stream_t *tfm_stream(tfm_cluster_t * tc, tfm_stream_id id) ++{ ++ return tc->tun[id]; ++} ++ ++static inline void ++set_tfm_stream(tfm_cluster_t * tc, tfm_stream_id id, tfm_stream_t * ts) ++{ ++ tc->tun[id] = ts; ++} ++ ++static inline __u8 *tfm_stream_data(tfm_cluster_t * tc, tfm_stream_id id) ++{ ++ return ts_data(tfm_stream(tc, id)); ++} ++ ++static inline void ++set_tfm_stream_data(tfm_cluster_t * tc, tfm_stream_id id, __u8 * data) ++{ ++ tfm_stream(tc, id)->data = data; ++} ++ ++static inline size_t tfm_stream_size(tfm_cluster_t * tc, tfm_stream_id id) ++{ ++ return ts_size(tfm_stream(tc, id)); ++} ++ ++static inline void ++set_tfm_stream_size(tfm_cluster_t * tc, tfm_stream_id id, size_t size) ++{ ++ tfm_stream(tc, id)->size = size; ++} ++ ++static inline int ++alloc_tfm_stream(tfm_cluster_t * tc, size_t size, tfm_stream_id id) ++{ ++ assert("edward-939", tc != NULL); ++ assert("edward-940", !tfm_stream(tc, id)); ++ ++ tc->tun[id] = kmalloc(sizeof(tfm_stream_t), reiser4_ctx_gfp_mask_get()); ++ if (!tc->tun[id]) ++ return -ENOMEM; ++ memset(tfm_stream(tc, id), 0, sizeof(tfm_stream_t)); ++ return alloc_ts_data(tfm_stream(tc, id), size); ++} ++ ++static inline int ++realloc_tfm_stream(tfm_cluster_t * tc, size_t size, tfm_stream_id id) ++{ ++ assert("edward-941", tfm_stream_size(tc, id) < size); ++ free_ts_data(tfm_stream(tc, id)); ++ return alloc_ts_data(tfm_stream(tc, id), size); ++} ++ ++static inline void free_tfm_stream(tfm_cluster_t * tc, tfm_stream_id id) ++{ ++ free_ts_data(tfm_stream(tc, id)); ++ free_ts(tfm_stream(tc, id)); ++ set_tfm_stream(tc, id, 0); ++} ++ ++static inline unsigned coa_overrun(compression_plugin * cplug, int ilen) ++{ ++ return (cplug->overrun != NULL ? cplug->overrun(ilen) : 0); ++} ++ ++static inline void free_tfm_unit(tfm_cluster_t * tc) ++{ ++ tfm_stream_id id; ++ for (id = 0; id < LAST_STREAM; id++) { ++ if (!tfm_stream(tc, id)) ++ continue; ++ free_tfm_stream(tc, id); ++ } ++} ++ ++static inline void put_tfm_cluster(tfm_cluster_t * tc) ++{ ++ assert("edward-942", tc != NULL); ++ free_coa_set(tc); ++ free_tfm_unit(tc); ++} ++ ++static inline int tfm_cluster_is_uptodate(tfm_cluster_t * tc) ++{ ++ assert("edward-943", tc != NULL); ++ assert("edward-944", tc->uptodate == 0 || tc->uptodate == 1); ++ return (tc->uptodate == 1); ++} ++ ++static inline void tfm_cluster_set_uptodate(tfm_cluster_t * tc) ++{ ++ assert("edward-945", tc != NULL); ++ assert("edward-946", tc->uptodate == 0 || tc->uptodate == 1); ++ tc->uptodate = 1; ++ return; ++} ++ ++static inline void tfm_cluster_clr_uptodate(tfm_cluster_t * tc) ++{ ++ assert("edward-947", tc != NULL); ++ assert("edward-948", tc->uptodate == 0 || tc->uptodate == 1); ++ tc->uptodate = 0; ++ return; ++} ++ ++static inline int tfm_stream_is_set(tfm_cluster_t * tc, tfm_stream_id id) ++{ ++ return (tfm_stream(tc, id) && ++ tfm_stream_data(tc, id) && tfm_stream_size(tc, id)); ++} ++ ++static inline int tfm_cluster_is_set(tfm_cluster_t * tc) ++{ ++ int i; ++ for (i = 0; i < LAST_STREAM; i++) ++ if (!tfm_stream_is_set(tc, i)) ++ return 0; ++ return 1; ++} ++ ++static inline void alternate_streams(tfm_cluster_t * tc) ++{ ++ tfm_stream_t *tmp = tfm_stream(tc, INPUT_STREAM); ++ ++ set_tfm_stream(tc, INPUT_STREAM, tfm_stream(tc, OUTPUT_STREAM)); ++ set_tfm_stream(tc, OUTPUT_STREAM, tmp); ++} ++ ++/* a kind of data that we can write to the window */ ++typedef enum { ++ DATA_WINDOW, /* the data we copy form user space */ ++ HOLE_WINDOW /* zeroes if we write hole */ ++} window_stat; ++ ++/* Sliding window of cluster size which should be set to the approprite position ++ (defined by cluster index) in a file before page cluster modification by ++ file_write. Then we translate file size, offset to write from, number of ++ bytes to write, etc.. to the following configuration needed to estimate ++ number of pages to read before write, etc... ++*/ ++typedef struct reiser4_slide { ++ unsigned off; /* offset we start to write/truncate from */ ++ unsigned count; /* number of bytes (zeroes) to write/truncate */ ++ unsigned delta; /* number of bytes to append to the hole */ ++ window_stat stat; /* a kind of data to write to the window */ ++} reiser4_slide_t; ++ ++/* The following is a set of possible disk cluster states */ ++typedef enum { ++ INVAL_DISK_CLUSTER, /* unknown state */ ++ PREP_DISK_CLUSTER, /* disk cluster got converted by flush ++ at least 1 time */ ++ UNPR_DISK_CLUSTER, /* disk cluster just created and should be ++ converted by flush */ ++ FAKE_DISK_CLUSTER /* disk cluster doesn't exist neither in memory ++ nor on disk */ ++} disk_cluster_stat; ++ ++/* ++ While implementing all transforms (from page to disk cluster, and back) ++ reiser4 cluster manager fills the following structure incapsulating pointers ++ to all the clusters for the same index including the sliding window above ++*/ ++typedef struct reiser4_cluster { ++ tfm_cluster_t tc; /* transform cluster */ ++ int nr_pages; /* number of pages */ ++ struct page **pages; /* page cluster */ ++ page_cluster_op op; /* page cluster operation */ ++ struct file *file; ++ hint_t *hint; /* disk cluster item for traversal */ ++ disk_cluster_stat dstat; /* state of the current disk cluster */ ++ cloff_t index; /* offset in the units of cluster size */ ++ int index_valid; /* to validate the index above, if needed */ ++ reiser4_slide_t *win; /* sliding window of cluster size */ ++ int reserved; /* this indicates that space for disk ++ cluster modification is reserved */ ++#if REISER4_DEBUG ++ reiser4_context *ctx; ++ int reserved_prepped; ++ int reserved_unprepped; ++#endif ++ ++} reiser4_cluster_t; ++ ++static inline __u8 * tfm_input_data (reiser4_cluster_t * clust) ++{ ++ return tfm_stream_data(&clust->tc, INPUT_STREAM); ++} ++ ++static inline __u8 * tfm_output_data (reiser4_cluster_t * clust) ++{ ++ return tfm_stream_data(&clust->tc, OUTPUT_STREAM); ++} ++ ++static inline int reset_cluster_pgset(reiser4_cluster_t * clust, int nrpages) ++{ ++ assert("edward-1057", clust->pages != NULL); ++ memset(clust->pages, 0, sizeof(*clust->pages) * nrpages); ++ return 0; ++} ++ ++static inline int alloc_cluster_pgset(reiser4_cluster_t * clust, int nrpages) ++{ ++ assert("edward-949", clust != NULL); ++ assert("edward-1362", clust->pages == NULL); ++ assert("edward-950", nrpages != 0 && nrpages <= MAX_CLUSTER_NRPAGES); ++ ++ clust->pages = ++ kmalloc(sizeof(*clust->pages) * nrpages, ++ reiser4_ctx_gfp_mask_get()); ++ if (!clust->pages) ++ return RETERR(-ENOMEM); ++ reset_cluster_pgset(clust, nrpages); ++ return 0; ++} ++ ++static inline void free_cluster_pgset(reiser4_cluster_t * clust) ++{ ++ assert("edward-951", clust->pages != NULL); ++ kfree(clust->pages); ++ clust->pages = NULL; ++} ++ ++static inline void put_cluster_handle(reiser4_cluster_t * clust) ++{ ++ assert("edward-435", clust != NULL); ++ ++ put_tfm_cluster(&clust->tc); ++ if (clust->pages) ++ free_cluster_pgset(clust); ++ memset(clust, 0, sizeof *clust); ++} ++ ++static inline void inc_keyload_count(crypto_stat_t * data) ++{ ++ assert("edward-1410", data != NULL); ++ data->keyload_count++; ++} ++ ++static inline void dec_keyload_count(crypto_stat_t * data) ++{ ++ assert("edward-1411", data != NULL); ++ assert("edward-1412", data->keyload_count > 0); ++ data->keyload_count--; ++} ++ ++/* cryptcompress specific part of reiser4_inode */ ++typedef struct cryptcompress_info { ++ crypto_stat_t *crypt; ++ /* the following 2 fields are controlled by compression mode plugin */ ++ int compress_toggle; /* current status of compressibility */ ++ int lattice_factor; /* factor of dynamic lattice. FIXME: Have a ++ compression_toggle to keep the factor */ ++#if REISER4_DEBUG ++ int pgcount; /* number of captured pages */ ++#endif ++} cryptcompress_info_t; ++ ++static inline void set_compression_toggle (cryptcompress_info_t * info, int val) ++{ ++ info->compress_toggle = val; ++} ++ ++static inline int get_compression_toggle (cryptcompress_info_t * info) ++{ ++ return info->compress_toggle; ++} ++ ++static inline int compression_is_on(cryptcompress_info_t * info) ++{ ++ return get_compression_toggle(info) == 1; ++} ++ ++static inline void turn_on_compression(cryptcompress_info_t * info) ++{ ++ set_compression_toggle(info, 1); ++} ++ ++static inline void turn_off_compression(cryptcompress_info_t * info) ++{ ++ set_compression_toggle(info, 0); ++} ++ ++static inline void set_lattice_factor(cryptcompress_info_t * info, int val) ++{ ++ info->lattice_factor = val; ++} ++ ++static inline int get_lattice_factor(cryptcompress_info_t * info) ++{ ++ return info->lattice_factor; ++} ++ ++cryptcompress_info_t *cryptcompress_inode_data(const struct inode *); ++int equal_to_rdk(znode *, const reiser4_key *); ++int goto_right_neighbor(coord_t *, lock_handle *); ++int cryptcompress_inode_ok(struct inode *inode); ++int coord_is_unprepped_ctail(const coord_t * coord); ++extern int ctail_read_disk_cluster (reiser4_cluster_t *, struct inode *, ++ znode_lock_mode mode); ++extern int do_readpage_ctail(struct inode *, reiser4_cluster_t *, ++ struct page * page, znode_lock_mode mode); ++extern int ctail_insert_unprepped_cluster(reiser4_cluster_t * clust, ++ struct inode * inode); ++extern int readpages_cryptcompress(struct file*, struct address_space*, ++ struct list_head*, unsigned); ++int bind_cryptcompress(struct inode *child, struct inode *parent); ++void destroy_inode_cryptcompress(struct inode * inode); ++int grab_cluster_pages(struct inode *inode, reiser4_cluster_t * clust); ++int write_conversion_hook(struct file *file, struct inode * inode, loff_t pos, ++ reiser4_cluster_t * clust, int * progress); ++crypto_stat_t * inode_crypto_stat (struct inode * inode); ++void inherit_crypto_stat_common(struct inode * parent, struct inode * object, ++ int (*can_inherit)(struct inode * child, ++ struct inode * parent)); ++void reiser4_attach_crypto_stat(struct inode * inode, crypto_stat_t * info); ++void change_crypto_stat(struct inode * inode, crypto_stat_t * new); ++crypto_stat_t * reiser4_alloc_crypto_stat (struct inode * inode); ++ ++static inline struct crypto_blkcipher * info_get_cipher(crypto_stat_t * info) ++{ ++ return info->cipher; ++} ++ ++static inline void info_set_cipher(crypto_stat_t * info, ++ struct crypto_blkcipher * tfm) ++{ ++ info->cipher = tfm; ++} ++ ++static inline struct crypto_hash * info_get_digest(crypto_stat_t * info) ++{ ++ return info->digest; ++} ++ ++static inline void info_set_digest(crypto_stat_t * info, ++ struct crypto_hash * tfm) ++{ ++ info->digest = tfm; ++} ++ ++#endif /* __FS_REISER4_CRYPTCOMPRESS_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/file/file.c b/fs/reiser4/plugin/file/file.c +new file mode 100644 +index 0000000..67501aa +--- /dev/null ++++ b/fs/reiser4/plugin/file/file.c +@@ -0,0 +1,2820 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* ++ * this file contains implementations of inode/file/address_space/file plugin ++ * operations specific for "unix file plugin" (plugin id is ++ * UNIX_FILE_PLUGIN_ID). "Unix file" is either built of tail items only ++ * (FORMATTING_ID) or of extent items only (EXTENT_POINTER_ID) or empty (have ++ * no items but stat data) ++ */ ++ ++#include "../../inode.h" ++#include "../../super.h" ++#include "../../tree_walk.h" ++#include "../../carry.h" ++#include "../../page_cache.h" ++#include "../../ioctl.h" ++#include "../object.h" ++#include "../../safe_link.h" ++ ++#include ++#include ++#include ++ ++ ++static int unpack(struct file *file, struct inode *inode, int forever); ++static void drop_access(unix_file_info_t *); ++static int hint_validate(hint_t * hint, const reiser4_key * key, int check_key, ++ znode_lock_mode lock_mode); ++ ++/* get unix file plugin specific portion of inode */ ++unix_file_info_t *unix_file_inode_data(const struct inode *inode) ++{ ++ return &reiser4_inode_data(inode)->file_plugin_data.unix_file_info; ++} ++ ++/** ++ * equal_to_rdk - compare key and znode's right delimiting key ++ * @node: node whose right delimiting key to compare with @key ++ * @key: key to compare with @node's right delimiting key ++ * ++ * Returns true if @key is equal to right delimiting key of @node. ++ */ ++int equal_to_rdk(znode *node, const reiser4_key *key) ++{ ++ int result; ++ ++ read_lock_dk(znode_get_tree(node)); ++ result = keyeq(key, znode_get_rd_key(node)); ++ read_unlock_dk(znode_get_tree(node)); ++ return result; ++} ++ ++#if REISER4_DEBUG ++ ++/** ++ * equal_to_ldk - compare key and znode's left delimiting key ++ * @node: node whose left delimiting key to compare with @key ++ * @key: key to compare with @node's left delimiting key ++ * ++ * Returns true if @key is equal to left delimiting key of @node. ++ */ ++int equal_to_ldk(znode *node, const reiser4_key *key) ++{ ++ int result; ++ ++ read_lock_dk(znode_get_tree(node)); ++ result = keyeq(key, znode_get_ld_key(node)); ++ read_unlock_dk(znode_get_tree(node)); ++ return result; ++} ++ ++/** ++ * check_coord - check whether coord corresponds to key ++ * @coord: coord to check ++ * @key: key @coord has to correspond to ++ * ++ * Returns true if @coord is set as if it was set as result of lookup with @key ++ * in coord->node. ++ */ ++static int check_coord(const coord_t *coord, const reiser4_key *key) ++{ ++ coord_t twin; ++ ++ node_plugin_by_node(coord->node)->lookup(coord->node, key, ++ FIND_MAX_NOT_MORE_THAN, &twin); ++ return coords_equal(coord, &twin); ++} ++ ++#endif /* REISER4_DEBUG */ ++ ++/** ++ * init_uf_coord - initialize extended coord ++ * @uf_coord: ++ * @lh: ++ * ++ * ++ */ ++void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh) ++{ ++ coord_init_zero(&uf_coord->coord); ++ coord_clear_iplug(&uf_coord->coord); ++ uf_coord->lh = lh; ++ init_lh(lh); ++ memset(&uf_coord->extension, 0, sizeof(uf_coord->extension)); ++ uf_coord->valid = 0; ++} ++ ++static void validate_extended_coord(uf_coord_t *uf_coord, loff_t offset) ++{ ++ assert("vs-1333", uf_coord->valid == 0); ++ ++ if (coord_is_between_items(&uf_coord->coord)) ++ return; ++ ++ assert("vs-1348", ++ item_plugin_by_coord(&uf_coord->coord)->s.file. ++ init_coord_extension); ++ ++ item_body_by_coord(&uf_coord->coord); ++ item_plugin_by_coord(&uf_coord->coord)->s.file. ++ init_coord_extension(uf_coord, offset); ++} ++ ++/** ++ * goto_right_neighbor - lock right neighbor, drop current node lock ++ * @coord: ++ * @lh: ++ * ++ * Obtain lock on right neighbor and drop lock on current node. ++ */ ++int goto_right_neighbor(coord_t *coord, lock_handle *lh) ++{ ++ int result; ++ lock_handle lh_right; ++ ++ assert("vs-1100", znode_is_locked(coord->node)); ++ ++ init_lh(&lh_right); ++ result = reiser4_get_right_neighbor(&lh_right, coord->node, ++ znode_is_wlocked(coord->node) ? ++ ZNODE_WRITE_LOCK : ZNODE_READ_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ if (result) { ++ done_lh(&lh_right); ++ return result; ++ } ++ ++ /* ++ * we hold two longterm locks on neighboring nodes. Unlock left of ++ * them ++ */ ++ done_lh(lh); ++ ++ coord_init_first_unit_nocheck(coord, lh_right.node); ++ move_lh(lh, &lh_right); ++ ++ return 0; ++ ++} ++ ++/** ++ * set_file_state ++ * @uf_info: ++ * @cbk_result: ++ * @level: ++ * ++ * This is to be used by find_file_item and in find_file_state to ++ * determine real state of file ++ */ ++static void set_file_state(unix_file_info_t *uf_info, int cbk_result, ++ tree_level level) ++{ ++ if (cbk_errored(cbk_result)) ++ /* error happened in find_file_item */ ++ return; ++ ++ assert("vs-1164", level == LEAF_LEVEL || level == TWIG_LEVEL); ++ ++ if (uf_info->container == UF_CONTAINER_UNKNOWN) { ++ /* ++ * container is unknown, therefore conversion can not be in ++ * progress ++ */ ++ assert("", ++ !reiser4_inode_get_flag(unix_file_info_to_inode(uf_info), ++ REISER4_PART_IN_CONV)); ++ if (cbk_result == CBK_COORD_NOTFOUND) ++ uf_info->container = UF_CONTAINER_EMPTY; ++ else if (level == LEAF_LEVEL) ++ uf_info->container = UF_CONTAINER_TAILS; ++ else ++ uf_info->container = UF_CONTAINER_EXTENTS; ++ } else { ++ /* ++ * file state is known, check whether it is set correctly if ++ * file is not being tail converted ++ */ ++ if (!reiser4_inode_get_flag(unix_file_info_to_inode(uf_info), ++ REISER4_PART_IN_CONV)) { ++ assert("vs-1162", ++ ergo(level == LEAF_LEVEL && ++ cbk_result == CBK_COORD_FOUND, ++ uf_info->container == UF_CONTAINER_TAILS)); ++ assert("vs-1165", ++ ergo(level == TWIG_LEVEL && ++ cbk_result == CBK_COORD_FOUND, ++ uf_info->container == UF_CONTAINER_EXTENTS)); ++ } ++ } ++} ++ ++int find_file_item_nohint(coord_t *coord, lock_handle *lh, ++ const reiser4_key *key, znode_lock_mode lock_mode, ++ struct inode *inode) ++{ ++ return reiser4_object_lookup(inode, key, coord, lh, lock_mode, ++ FIND_MAX_NOT_MORE_THAN, ++ TWIG_LEVEL, LEAF_LEVEL, ++ (lock_mode == ZNODE_READ_LOCK) ? CBK_UNIQUE : ++ (CBK_UNIQUE | CBK_FOR_INSERT), ++ NULL /* ra_info */ ); ++} ++ ++/** ++ * find_file_item - look for file item in the tree ++ * @hint: provides coordinate, lock handle, seal ++ * @key: key for search ++ * @mode: mode of lock to put on returned node ++ * @ra_info: ++ * @inode: ++ * ++ * This finds position in the tree corresponding to @key. It first tries to use ++ * @hint's seal if it is set. ++ */ ++int find_file_item(hint_t *hint, const reiser4_key *key, ++ znode_lock_mode lock_mode, ++ struct inode *inode) ++{ ++ int result; ++ coord_t *coord; ++ lock_handle *lh; ++ ++ assert("nikita-3030", reiser4_schedulable()); ++ assert("vs-1707", hint != NULL); ++ assert("vs-47", inode != NULL); ++ ++ coord = &hint->ext_coord.coord; ++ lh = hint->ext_coord.lh; ++ init_lh(lh); ++ ++ result = hint_validate(hint, key, 1 /* check key */, lock_mode); ++ if (!result) { ++ if (coord->between == AFTER_UNIT && ++ equal_to_rdk(coord->node, key)) { ++ result = goto_right_neighbor(coord, lh); ++ if (result == -E_NO_NEIGHBOR) ++ return RETERR(-EIO); ++ if (result) ++ return result; ++ assert("vs-1152", equal_to_ldk(coord->node, key)); ++ /* ++ * we moved to different node. Invalidate coord ++ * extension, zload is necessary to init it again ++ */ ++ hint->ext_coord.valid = 0; ++ } ++ ++ set_file_state(unix_file_inode_data(inode), CBK_COORD_FOUND, ++ znode_get_level(coord->node)); ++ ++ return CBK_COORD_FOUND; ++ } ++ ++ coord_init_zero(coord); ++ result = find_file_item_nohint(coord, lh, key, lock_mode, inode); ++ set_file_state(unix_file_inode_data(inode), result, ++ znode_get_level(coord->node)); ++ ++ /* FIXME: we might already have coord extension initialized */ ++ hint->ext_coord.valid = 0; ++ return result; ++} ++ ++/* plugin->u.file.write_flowom = NULL ++ plugin->u.file.read_flow = NULL */ ++ ++void hint_init_zero(hint_t * hint) ++{ ++ memset(hint, 0, sizeof(*hint)); ++ init_lh(&hint->lh); ++ hint->ext_coord.lh = &hint->lh; ++} ++ ++static int find_file_state(struct inode *inode, unix_file_info_t *uf_info) ++{ ++ int result; ++ reiser4_key key; ++ coord_t coord; ++ lock_handle lh; ++ ++ assert("vs-1628", ea_obtained(uf_info)); ++ ++ if (uf_info->container == UF_CONTAINER_UNKNOWN) { ++ key_by_inode_and_offset_common(inode, 0, &key); ++ init_lh(&lh); ++ result = find_file_item_nohint(&coord, &lh, &key, ++ ZNODE_READ_LOCK, inode); ++ set_file_state(uf_info, result, znode_get_level(coord.node)); ++ done_lh(&lh); ++ if (!cbk_errored(result)) ++ result = 0; ++ } else ++ result = 0; ++ assert("vs-1074", ++ ergo(result == 0, uf_info->container != UF_CONTAINER_UNKNOWN)); ++ reiser4_txn_restart_current(); ++ return result; ++} ++ ++/* estimate and reserve space needed to truncate page which gets partially truncated: one block for page itself, stat ++ data update (estimate_one_insert_into_item) and one item insertion (estimate_one_insert_into_item) which may happen ++ if page corresponds to hole extent and unallocated one will have to be created */ ++static int reserve_partial_page(reiser4_tree * tree) ++{ ++ grab_space_enable(); ++ return reiser4_grab_reserved(reiser4_get_current_sb(), ++ 1 + ++ 2 * estimate_one_insert_into_item(tree), ++ BA_CAN_COMMIT); ++} ++ ++/* estimate and reserve space needed to cut one item and update one stat data */ ++static int reserve_cut_iteration(reiser4_tree * tree) ++{ ++ __u64 estimate = estimate_one_item_removal(tree) ++ + estimate_one_insert_into_item(tree); ++ ++ assert("nikita-3172", lock_stack_isclean(get_current_lock_stack())); ++ ++ grab_space_enable(); ++ /* We need to double our estimate now that we can delete more than one ++ node. */ ++ return reiser4_grab_reserved(reiser4_get_current_sb(), estimate * 2, ++ BA_CAN_COMMIT); ++} ++ ++int reiser4_update_file_size(struct inode *inode, reiser4_key * key, ++ int update_sd) ++{ ++ int result = 0; ++ ++ INODE_SET_FIELD(inode, i_size, get_key_offset(key)); ++ if (update_sd) { ++ inode->i_ctime = inode->i_mtime = CURRENT_TIME; ++ result = reiser4_update_sd(inode); ++ } ++ return result; ++} ++ ++/* cut file items one by one starting from the last one until new file size (inode->i_size) is reached. Reserve space ++ and update file stat data on every single cut from the tree */ ++int ++cut_file_items(struct inode *inode, loff_t new_size, int update_sd, ++ loff_t cur_size, int (*update_actor) (struct inode *, ++ reiser4_key *, int)) ++{ ++ reiser4_key from_key, to_key; ++ reiser4_key smallest_removed; ++ file_plugin *fplug = inode_file_plugin(inode); ++ int result; ++ int progress = 0; ++ ++ assert("vs-1248", ++ fplug == file_plugin_by_id(UNIX_FILE_PLUGIN_ID) || ++ fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); ++ ++ fplug->key_by_inode(inode, new_size, &from_key); ++ to_key = from_key; ++ set_key_offset(&to_key, cur_size - 1 /*get_key_offset(reiser4_max_key()) */ ); ++ /* this loop normally runs just once */ ++ while (1) { ++ result = reserve_cut_iteration(reiser4_tree_by_inode(inode)); ++ if (result) ++ break; ++ ++ result = reiser4_cut_tree_object(current_tree, &from_key, &to_key, ++ &smallest_removed, inode, 1, ++ &progress); ++ if (result == -E_REPEAT) { ++ /* -E_REPEAT is a signal to interrupt a long file truncation process */ ++ if (progress) { ++ result = ++ update_actor(inode, &smallest_removed, ++ update_sd); ++ if (result) ++ break; ++ } ++ ++ /* the below does up(sbinfo->delete_mutex). Do not get folled */ ++ reiser4_release_reserved(inode->i_sb); ++ ++ /* reiser4_cut_tree_object() was interrupted probably because ++ * current atom requires commit, we have to release ++ * transaction handle to allow atom commit. */ ++ reiser4_txn_restart_current(); ++ continue; ++ } ++ if (result ++ && !(result == CBK_COORD_NOTFOUND && new_size == 0 ++ && inode->i_size == 0)) ++ break; ++ ++ set_key_offset(&smallest_removed, new_size); ++ /* Final sd update after the file gets its correct size */ ++ result = update_actor(inode, &smallest_removed, update_sd); ++ break; ++ } ++ ++ /* the below does up(sbinfo->delete_mutex). Do not get folled */ ++ reiser4_release_reserved(inode->i_sb); ++ ++ return result; ++} ++ ++int find_or_create_extent(struct page *page); ++ ++/* part of truncate_file_body: it is called when truncate is used to make file ++ shorter */ ++static int shorten_file(struct inode *inode, loff_t new_size) ++{ ++ int result; ++ struct page *page; ++ int padd_from; ++ unsigned long index; ++ char *kaddr; ++ unix_file_info_t *uf_info; ++ ++ /* ++ * all items of ordinary reiser4 file are grouped together. That is why ++ * we can use reiser4_cut_tree. Plan B files (for instance) can not be ++ * truncated that simply ++ */ ++ result = cut_file_items(inode, new_size, 1 /*update_sd */ , ++ get_key_offset(reiser4_max_key()), ++ reiser4_update_file_size); ++ if (result) ++ return result; ++ ++ uf_info = unix_file_inode_data(inode); ++ assert("vs-1105", new_size == inode->i_size); ++ if (new_size == 0) { ++ uf_info->container = UF_CONTAINER_EMPTY; ++ return 0; ++ } ++ ++ result = find_file_state(inode, uf_info); ++ if (result) ++ return result; ++ if (uf_info->container == UF_CONTAINER_TAILS) ++ /* ++ * No need to worry about zeroing last page after new file ++ * end ++ */ ++ return 0; ++ ++ padd_from = inode->i_size & (PAGE_CACHE_SIZE - 1); ++ if (!padd_from) ++ /* file is truncated to page boundary */ ++ return 0; ++ ++ result = reserve_partial_page(reiser4_tree_by_inode(inode)); ++ if (result) { ++ reiser4_release_reserved(inode->i_sb); ++ return result; ++ } ++ ++ /* last page is partially truncated - zero its content */ ++ index = (inode->i_size >> PAGE_CACHE_SHIFT); ++ page = read_mapping_page(inode->i_mapping, index, NULL); ++ if (IS_ERR(page)) { ++ /* ++ * the below does up(sbinfo->delete_mutex). Do not get ++ * confused ++ */ ++ reiser4_release_reserved(inode->i_sb); ++ if (likely(PTR_ERR(page) == -EINVAL)) { ++ /* looks like file is built of tail items */ ++ return 0; ++ } ++ return PTR_ERR(page); ++ } ++ wait_on_page_locked(page); ++ if (!PageUptodate(page)) { ++ page_cache_release(page); ++ /* ++ * the below does up(sbinfo->delete_mutex). Do not get ++ * confused ++ */ ++ reiser4_release_reserved(inode->i_sb); ++ return RETERR(-EIO); ++ } ++ ++ /* ++ * if page correspons to hole extent unit - unallocated one will be ++ * created here. This is not necessary ++ */ ++ result = find_or_create_extent(page); ++ ++ /* ++ * FIXME: cut_file_items has already updated inode. Probably it would ++ * be better to update it here when file is really truncated ++ */ ++ if (result) { ++ page_cache_release(page); ++ /* ++ * the below does up(sbinfo->delete_mutex). Do not get ++ * confused ++ */ ++ reiser4_release_reserved(inode->i_sb); ++ return result; ++ } ++ ++ lock_page(page); ++ assert("vs-1066", PageLocked(page)); ++ kaddr = kmap_atomic(page, KM_USER0); ++ memset(kaddr + padd_from, 0, PAGE_CACHE_SIZE - padd_from); ++ flush_dcache_page(page); ++ kunmap_atomic(kaddr, KM_USER0); ++ unlock_page(page); ++ page_cache_release(page); ++ /* the below does up(sbinfo->delete_mutex). Do not get confused */ ++ reiser4_release_reserved(inode->i_sb); ++ return 0; ++} ++ ++/** ++ * should_have_notail ++ * @uf_info: ++ * @new_size: ++ * ++ * Calls formatting plugin to see whether file of size @new_size has to be ++ * stored in unformatted nodes or in tail items. 0 is returned for later case. ++ */ ++static int should_have_notail(const unix_file_info_t *uf_info, loff_t new_size) ++{ ++ if (!uf_info->tplug) ++ return 1; ++ return !uf_info->tplug->have_tail(unix_file_info_to_inode(uf_info), ++ new_size); ++ ++} ++ ++/** ++ * truncate_file_body - change length of file ++ * @inode: inode of file ++ * @new_size: new file length ++ * ++ * Adjusts items file @inode is built of to match @new_size. It may either cut ++ * items or add them to represent a hole at the end of file. The caller has to ++ * obtain exclusive access to the file. ++ */ ++static int truncate_file_body(struct inode *inode, loff_t new_size) ++{ ++ int result; ++ ++ if (inode->i_size < new_size) { ++ /* expanding truncate */ ++ struct dentry dentry; ++ struct file file; ++ unix_file_info_t *uf_info; ++ ++ dentry.d_inode = inode; ++ file.f_dentry = &dentry; ++ file.private_data = NULL; ++ file.f_pos = new_size; ++ file.private_data = NULL; ++ uf_info = unix_file_inode_data(inode); ++ result = find_file_state(inode, uf_info); ++ if (result) ++ return result; ++ ++ if (should_have_notail(uf_info, new_size)) { ++ /* ++ * file of size @new_size has to be built of ++ * extents. If it is built of tails - convert to ++ * extents ++ */ ++ if (uf_info->container == UF_CONTAINER_TAILS) { ++ /* ++ * if file is being convered by another process ++ * - wait until it completes ++ */ ++ while (1) { ++ if (reiser4_inode_get_flag(inode, ++ REISER4_PART_IN_CONV)) { ++ drop_exclusive_access(uf_info); ++ schedule(); ++ get_exclusive_access(uf_info); ++ continue; ++ } ++ break; ++ } ++ ++ if (uf_info->container == UF_CONTAINER_TAILS) { ++ result = tail2extent(uf_info); ++ if (result) ++ return result; ++ } ++ } ++ result = reiser4_write_extent(&file, NULL, 0, ++ &new_size); ++ if (result) ++ return result; ++ uf_info->container = UF_CONTAINER_EXTENTS; ++ } else { ++ if (uf_info->container == UF_CONTAINER_EXTENTS) { ++ result = reiser4_write_extent(&file, NULL, 0, ++ &new_size); ++ if (result) ++ return result; ++ } else { ++ result = reiser4_write_tail(&file, NULL, 0, ++ &new_size); ++ if (result) ++ return result; ++ uf_info->container = UF_CONTAINER_TAILS; ++ } ++ } ++ BUG_ON(result > 0); ++ INODE_SET_FIELD(inode, i_size, new_size); ++ file_update_time(&file); ++ result = reiser4_update_sd(inode); ++ BUG_ON(result != 0); ++ reiser4_free_file_fsdata(&file); ++ } else ++ result = shorten_file(inode, new_size); ++ return result; ++} ++ ++/* plugin->u.write_sd_by_inode = write_sd_by_inode_common */ ++ ++/** ++ * load_file_hint - copy hint from struct file to local variable ++ * @file: file to get hint from ++ * @hint: structure to fill ++ * ++ * Reiser4 specific portion of struct file may contain information (hint) ++ * stored on exiting from previous read or write. That information includes ++ * seal of znode and coord within that znode where previous read or write ++ * stopped. This function copies that information to @hint if it was stored or ++ * initializes @hint by 0s otherwise. ++ */ ++int load_file_hint(struct file *file, hint_t *hint) ++{ ++ reiser4_file_fsdata *fsdata; ++ ++ if (file) { ++ fsdata = reiser4_get_file_fsdata(file); ++ if (IS_ERR(fsdata)) ++ return PTR_ERR(fsdata); ++ ++ spin_lock_inode(file->f_dentry->d_inode); ++ if (reiser4_seal_is_set(&fsdata->reg.hint.seal)) { ++ *hint = fsdata->reg.hint; ++ init_lh(&hint->lh); ++ hint->ext_coord.lh = &hint->lh; ++ spin_unlock_inode(file->f_dentry->d_inode); ++ /* ++ * force re-validation of the coord on the first ++ * iteration of the read/write loop. ++ */ ++ hint->ext_coord.valid = 0; ++ assert("nikita-19892", coords_equal(&hint->seal.coord1, ++ &hint->ext_coord. ++ coord)); ++ return 0; ++ } ++ memset(&fsdata->reg.hint, 0, sizeof(hint_t)); ++ spin_unlock_inode(file->f_dentry->d_inode); ++ } ++ hint_init_zero(hint); ++ return 0; ++} ++ ++/** ++ * save_file_hint - copy hint to reiser4 private struct file's part ++ * @file: file to save hint in ++ * @hint: hint to save ++ * ++ * This copies @hint to reiser4 private part of struct file. It can help ++ * speedup future accesses to the file. ++ */ ++void save_file_hint(struct file *file, const hint_t *hint) ++{ ++ reiser4_file_fsdata *fsdata; ++ ++ assert("edward-1337", hint != NULL); ++ ++ if (!file || !reiser4_seal_is_set(&hint->seal)) ++ return; ++ fsdata = reiser4_get_file_fsdata(file); ++ assert("vs-965", !IS_ERR(fsdata)); ++ assert("nikita-19891", ++ coords_equal(&hint->seal.coord1, &hint->ext_coord.coord)); ++ assert("vs-30", hint->lh.owner == NULL); ++ spin_lock_inode(file->f_dentry->d_inode); ++ fsdata->reg.hint = *hint; ++ spin_unlock_inode(file->f_dentry->d_inode); ++ return; ++} ++ ++void reiser4_unset_hint(hint_t * hint) ++{ ++ assert("vs-1315", hint); ++ hint->ext_coord.valid = 0; ++ reiser4_seal_done(&hint->seal); ++ done_lh(&hint->lh); ++} ++ ++/* coord must be set properly. So, that reiser4_set_hint ++ has nothing to do */ ++void reiser4_set_hint(hint_t * hint, const reiser4_key * key, ++ znode_lock_mode mode) ++{ ++ ON_DEBUG(coord_t * coord = &hint->ext_coord.coord); ++ assert("vs-1207", WITH_DATA(coord->node, check_coord(coord, key))); ++ ++ reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, key); ++ hint->offset = get_key_offset(key); ++ hint->mode = mode; ++ done_lh(&hint->lh); ++} ++ ++int hint_is_set(const hint_t * hint) ++{ ++ return reiser4_seal_is_set(&hint->seal); ++} ++ ++#if REISER4_DEBUG ++static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2) ++{ ++ return (get_key_locality(k1) == get_key_locality(k2) && ++ get_key_type(k1) == get_key_type(k2) && ++ get_key_band(k1) == get_key_band(k2) && ++ get_key_ordering(k1) == get_key_ordering(k2) && ++ get_key_objectid(k1) == get_key_objectid(k2)); ++} ++#endif ++ ++static int ++hint_validate(hint_t * hint, const reiser4_key * key, int check_key, ++ znode_lock_mode lock_mode) ++{ ++ if (!hint || !hint_is_set(hint) || hint->mode != lock_mode) ++ /* hint either not set or set by different operation */ ++ return RETERR(-E_REPEAT); ++ ++ assert("vs-1277", all_but_offset_key_eq(key, &hint->seal.key)); ++ ++ if (check_key && get_key_offset(key) != hint->offset) ++ /* hint is set for different key */ ++ return RETERR(-E_REPEAT); ++ ++ assert("vs-31", hint->ext_coord.lh == &hint->lh); ++ return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord, key, ++ hint->ext_coord.lh, lock_mode, ++ ZNODE_LOCK_LOPRI); ++} ++ ++/** ++ * find_or_create_extent - ++ * @page: ++ * ++ * ++ */ ++/* look for place at twig level for extent corresponding to page, call extent's writepage method to create ++ unallocated extent if it does not exist yet, initialize jnode, capture page */ ++int find_or_create_extent(struct page *page) ++{ ++ int result; ++ struct inode *inode; ++ int plugged_hole; ++ ++ jnode *node; ++ ++ assert("vs-1065", page->mapping && page->mapping->host); ++ inode = page->mapping->host; ++ ++ lock_page(page); ++ node = jnode_of_page(page); ++ if (IS_ERR(node)) { ++ unlock_page(page); ++ return PTR_ERR(node); ++ } ++ JF_SET(node, JNODE_WRITE_PREPARED); ++ unlock_page(page); ++ ++ if (node->blocknr == 0) { ++ plugged_hole = 0; ++ result = reiser4_update_extent(inode, node, page_offset(page), ++ &plugged_hole); ++ if (result) { ++ JF_CLR(node, JNODE_WRITE_PREPARED); ++ jput(node); ++ warning("", "reiser4_update_extent failed: %d", result); ++ return result; ++ } ++ if (plugged_hole) ++ reiser4_update_sd(inode); ++ } else { ++ spin_lock_jnode(node); ++ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0); ++ BUG_ON(result != 0); ++ jnode_make_dirty_locked(node); ++ spin_unlock_jnode(node); ++ } ++ ++ BUG_ON(node->atom == NULL); ++ JF_CLR(node, JNODE_WRITE_PREPARED); ++ jput(node); ++ ++ if (get_current_context()->entd) { ++ entd_context *ent = get_entd_context(node->tree->super); ++ ++ if (ent->cur_request->page == page) ++ ent->cur_request->node = node; ++ } ++ return 0; ++} ++ ++/** ++ * has_anonymous_pages - check whether inode has pages dirtied via mmap ++ * @inode: inode to check ++ * ++ * Returns true if inode's mapping has dirty pages which do not belong to any ++ * atom. Those are either tagged PAGECACHE_TAG_REISER4_MOVED in mapping's page ++ * tree or were eflushed and can be found via jnodes tagged ++ * EFLUSH_TAG_ANONYMOUS in radix tree of jnodes. ++ */ ++static int has_anonymous_pages(struct inode *inode) ++{ ++ int result; ++ ++ read_lock_irq(&inode->i_mapping->tree_lock); ++ result = radix_tree_tagged(&inode->i_mapping->page_tree, PAGECACHE_TAG_REISER4_MOVED); ++ read_unlock_irq(&inode->i_mapping->tree_lock); ++ return result; ++} ++ ++/** ++ * capture_page_and_create_extent - ++ * @page: page to be captured ++ * ++ * Grabs space for extent creation and stat data update and calls function to ++ * do actual work. ++ */ ++static int capture_page_and_create_extent(struct page *page) ++{ ++ int result; ++ struct inode *inode; ++ ++ assert("vs-1084", page->mapping && page->mapping->host); ++ inode = page->mapping->host; ++ assert("vs-1139", ++ unix_file_inode_data(inode)->container == UF_CONTAINER_EXTENTS); ++ /* page belongs to file */ ++ assert("vs-1393", ++ inode->i_size > page_offset(page)); ++ ++ /* page capture may require extent creation (if it does not exist yet) ++ and stat data's update (number of blocks changes on extent ++ creation) */ ++ grab_space_enable(); ++ result = reiser4_grab_space(2 * estimate_one_insert_into_item ++ (reiser4_tree_by_inode(inode)), ++ BA_CAN_COMMIT); ++ if (likely(!result)) ++ result = find_or_create_extent(page); ++ ++ if (result != 0) ++ SetPageError(page); ++ return result; ++} ++ ++/* this is implementation of method commit_write of struct ++ address_space_operations for unix file plugin */ ++int ++commit_write_unix_file(struct file *file, struct page *page, ++ unsigned from, unsigned to) ++{ ++ reiser4_context *ctx; ++ struct inode *inode; ++ int result; ++ ++ assert("umka-3101", file != NULL); ++ assert("umka-3102", page != NULL); ++ assert("umka-3093", PageLocked(page)); ++ ++ SetPageUptodate(page); ++ ++ inode = page->mapping->host; ++ ctx = reiser4_init_context(page->mapping->host->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ page_cache_get(page); ++ unlock_page(page); ++ result = capture_page_and_create_extent(page); ++ lock_page(page); ++ page_cache_release(page); ++ ++ /* don't commit transaction under inode semaphore */ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* ++ * Support for "anonymous" pages and jnodes. ++ * ++ * When file is write-accessed through mmap pages can be dirtied from the user ++ * level. In this case kernel is not notified until one of following happens: ++ * ++ * (1) msync() ++ * ++ * (2) truncate() (either explicit or through unlink) ++ * ++ * (3) VM scanner starts reclaiming mapped pages, dirtying them before ++ * starting write-back. ++ * ++ * As a result of (3) ->writepage may be called on a dirty page without ++ * jnode. Such page is called "anonymous" in reiser4. Certain work-loads ++ * (iozone) generate huge number of anonymous pages. Emergency flush handles ++ * this situation by creating jnode for anonymous page, starting IO on the ++ * page, and marking jnode with JNODE_KEEPME bit so that it's not thrown out of ++ * memory. Such jnode is also called anonymous. ++ * ++ * reiser4_sync_sb() method tries to insert anonymous pages and jnodes into ++ * tree. This is done by capture_anonymous_*() functions below. ++ */ ++ ++/** ++ * capture_anonymous_page - involve page into transaction ++ * @pg: page to deal with ++ * ++ * Takes care that @page has corresponding metadata in the tree, creates jnode ++ * for @page and captures it. On success 1 is returned. ++ */ ++static int capture_anonymous_page(struct page *page) ++{ ++ int result; ++ ++ if (PageWriteback(page)) ++ /* FIXME: do nothing? */ ++ return 0; ++ ++ result = capture_page_and_create_extent(page); ++ if (result == 0) { ++ result = 1; ++ } else ++ warning("nikita-3329", ++ "Cannot capture anon page: %i", result); ++ ++ return result; ++} ++ ++/** ++ * capture_anonymous_pages - find and capture pages dirtied via mmap ++ * @mapping: address space where to look for pages ++ * @index: start index ++ * @to_capture: maximum number of pages to capture ++ * ++ * Looks for pages tagged REISER4_MOVED starting from the *@index-th page, ++ * captures (involves into atom) them, returns number of captured pages, ++ * updates @index to next page after the last captured one. ++ */ ++static int ++capture_anonymous_pages(struct address_space *mapping, pgoff_t *index, ++ unsigned int to_capture) ++{ ++ int result; ++ struct pagevec pvec; ++ unsigned int i, count; ++ int nr; ++ ++ pagevec_init(&pvec, 0); ++ count = min(pagevec_space(&pvec), to_capture); ++ nr = 0; ++ ++ /* find pages tagged MOVED */ ++ write_lock_irq(&mapping->tree_lock); ++ pvec.nr = radix_tree_gang_lookup_tag(&mapping->page_tree, ++ (void **)pvec.pages, *index, count, ++ PAGECACHE_TAG_REISER4_MOVED); ++ if (pagevec_count(&pvec) == 0) { ++ /* ++ * there are no pages tagged MOVED in mapping->page_tree ++ * starting from *index ++ */ ++ write_unlock_irq(&mapping->tree_lock); ++ *index = (pgoff_t)-1; ++ return 0; ++ } ++ ++ /* clear MOVED tag for all found pages */ ++ for (i = 0; i < pagevec_count(&pvec); i++) { ++ void *p; ++ ++ page_cache_get(pvec.pages[i]); ++ p = radix_tree_tag_clear(&mapping->page_tree, pvec.pages[i]->index, ++ PAGECACHE_TAG_REISER4_MOVED); ++ assert("vs-49", p == pvec.pages[i]); ++ } ++ write_unlock_irq(&mapping->tree_lock); ++ ++ ++ *index = pvec.pages[i - 1]->index + 1; ++ ++ for (i = 0; i < pagevec_count(&pvec); i++) { ++ /* ++ * tag PAGECACHE_TAG_REISER4_MOVED will be cleared by ++ * reiser4_set_page_dirty_internal which is called when jnode is ++ * captured ++ */ ++ result = capture_anonymous_page(pvec.pages[i]); ++ if (result == 1) ++ nr++; ++ else { ++ if (result < 0) { ++ warning("vs-1454", ++ "failed to capture page: " ++ "result=%d, captured=%d)\n", ++ result, i); ++ ++ /* ++ * set MOVED tag to all pages which left not ++ * captured ++ */ ++ write_lock_irq(&mapping->tree_lock); ++ for (; i < pagevec_count(&pvec); i ++) { ++ radix_tree_tag_set(&mapping->page_tree, ++ pvec.pages[i]->index, ++ PAGECACHE_TAG_REISER4_MOVED); ++ } ++ write_unlock_irq(&mapping->tree_lock); ++ ++ pagevec_release(&pvec); ++ return result; ++ } else { ++ /* ++ * result == 0. capture_anonymous_page returns ++ * 0 for Writeback-ed page. Set MOVED tag on ++ * that page ++ */ ++ write_lock_irq(&mapping->tree_lock); ++ radix_tree_tag_set(&mapping->page_tree, ++ pvec.pages[i]->index, ++ PAGECACHE_TAG_REISER4_MOVED); ++ write_unlock_irq(&mapping->tree_lock); ++ if (i == 0) ++ *index = pvec.pages[0]->index; ++ else ++ *index = pvec.pages[i - 1]->index + 1; ++ } ++ } ++ } ++ pagevec_release(&pvec); ++ return nr; ++} ++ ++/** ++ * capture_anonymous_jnodes - find and capture anonymous jnodes ++ * @mapping: address space where to look for jnodes ++ * @from: start index ++ * @to: end index ++ * @to_capture: maximum number of jnodes to capture ++ * ++ * Looks for jnodes tagged EFLUSH_TAG_ANONYMOUS in inode's tree of jnodes in ++ * the range of indexes @from-@to and captures them, returns number of captured ++ * jnodes, updates @from to next jnode after the last captured one. ++ */ ++static int ++capture_anonymous_jnodes(struct address_space *mapping, ++ pgoff_t *from, pgoff_t to, int to_capture) ++{ ++ *from = to; ++ return 0; ++} ++ ++/* ++ * Commit atom of the jnode of a page. ++ */ ++static int sync_page(struct page *page) ++{ ++ int result; ++ do { ++ jnode *node; ++ txn_atom *atom; ++ ++ lock_page(page); ++ node = jprivate(page); ++ if (node != NULL) { ++ spin_lock_jnode(node); ++ atom = jnode_get_atom(node); ++ spin_unlock_jnode(node); ++ } else ++ atom = NULL; ++ unlock_page(page); ++ result = reiser4_sync_atom(atom); ++ } while (result == -E_REPEAT); ++ /* ++ * ZAM-FIXME-HANS: document the logic of this loop, is it just to ++ * handle the case where more pages get added to the atom while we are ++ * syncing it? ++ */ ++ assert("nikita-3485", ergo(result == 0, ++ get_current_context()->trans->atom == NULL)); ++ return result; ++} ++ ++/* ++ * Commit atoms of pages on @pages list. ++ * call sync_page for each page from mapping's page tree ++ */ ++static int sync_page_list(struct inode *inode) ++{ ++ int result; ++ struct address_space *mapping; ++ unsigned long from; /* start index for radix_tree_gang_lookup */ ++ unsigned int found; /* return value for radix_tree_gang_lookup */ ++ ++ mapping = inode->i_mapping; ++ from = 0; ++ result = 0; ++ read_lock_irq(&mapping->tree_lock); ++ while (result == 0) { ++ struct page *page; ++ ++ found = ++ radix_tree_gang_lookup(&mapping->page_tree, (void **)&page, ++ from, 1); ++ assert("", found < 2); ++ if (found == 0) ++ break; ++ ++ /* page may not leave radix tree because it is protected from truncating by inode->i_mutex locked by ++ sys_fsync */ ++ page_cache_get(page); ++ read_unlock_irq(&mapping->tree_lock); ++ ++ from = page->index + 1; ++ ++ result = sync_page(page); ++ ++ page_cache_release(page); ++ read_lock_irq(&mapping->tree_lock); ++ } ++ ++ read_unlock_irq(&mapping->tree_lock); ++ return result; ++} ++ ++static int commit_file_atoms(struct inode *inode) ++{ ++ int result; ++ unix_file_info_t *uf_info; ++ ++ uf_info = unix_file_inode_data(inode); ++ ++ get_exclusive_access(uf_info); ++ /* ++ * find what items file is made from ++ */ ++ result = find_file_state(inode, uf_info); ++ drop_exclusive_access(uf_info); ++ if (result != 0) ++ return result; ++ ++ /* ++ * file state cannot change because we are under ->i_mutex ++ */ ++ switch (uf_info->container) { ++ case UF_CONTAINER_EXTENTS: ++ /* find_file_state might open join an atom */ ++ reiser4_txn_restart_current(); ++ result = ++ /* ++ * when we are called by ++ * filemap_fdatawrite-> ++ * do_writepages()-> ++ * reiser4_writepages() ++ * ++ * inode->i_mapping->dirty_pages are spices into ++ * ->io_pages, leaving ->dirty_pages dirty. ++ * ++ * When we are called from ++ * reiser4_fsync()->sync_unix_file(), we have to ++ * commit atoms of all pages on the ->dirty_list. ++ * ++ * So for simplicity we just commit ->io_pages and ++ * ->dirty_pages. ++ */ ++ sync_page_list(inode); ++ break; ++ case UF_CONTAINER_TAILS: ++ /* ++ * NOTE-NIKITA probably we can be smarter for tails. For now ++ * just commit all existing atoms. ++ */ ++ result = txnmgr_force_commit_all(inode->i_sb, 0); ++ break; ++ case UF_CONTAINER_EMPTY: ++ result = 0; ++ break; ++ case UF_CONTAINER_UNKNOWN: ++ default: ++ result = -EIO; ++ break; ++ } ++ ++ /* ++ * commit current transaction: there can be captured nodes from ++ * find_file_state() and finish_conversion(). ++ */ ++ reiser4_txn_restart_current(); ++ return result; ++} ++ ++/** ++ * writepages_unix_file - writepages of struct address_space_operations ++ * @mapping: ++ * @wbc: ++ * ++ * This captures anonymous pages and anonymous jnodes. Anonymous pages are ++ * pages which are dirtied via mmapping. Anonymous jnodes are ones which were ++ * created by reiser4_writepage. ++ */ ++int writepages_unix_file(struct address_space *mapping, ++ struct writeback_control *wbc) ++{ ++ int result; ++ unix_file_info_t *uf_info; ++ pgoff_t pindex, jindex, nr_pages; ++ long to_capture; ++ struct inode *inode; ++ ++ inode = mapping->host; ++ if (!has_anonymous_pages(inode)) { ++ result = 0; ++ goto end; ++ } ++ jindex = pindex = wbc->range_start >> PAGE_CACHE_SHIFT; ++ result = 0; ++ nr_pages = ++ (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; ++ uf_info = unix_file_inode_data(inode); ++ ++ do { ++ reiser4_context *ctx; ++ ++ if (wbc->sync_mode != WB_SYNC_ALL) ++ to_capture = min(wbc->nr_to_write, CAPTURE_APAGE_BURST); ++ else ++ to_capture = CAPTURE_APAGE_BURST; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) { ++ result = PTR_ERR(ctx); ++ break; ++ } ++ /* avoid recursive calls to ->sync_inodes */ ++ ctx->nobalance = 1; ++ assert("zam-760", lock_stack_isclean(get_current_lock_stack())); ++ assert("", LOCK_CNT_NIL(inode_sem_w)); ++ assert("", LOCK_CNT_NIL(inode_sem_r)); ++ ++ reiser4_txn_restart_current(); ++ ++ /* we have to get nonexclusive access to the file */ ++ if (get_current_context()->entd) { ++ /* ++ * use nonblocking version of nonexclusive_access to ++ * avoid deadlock which might look like the following: ++ * process P1 holds NEA on file F1 and called entd to ++ * reclaim some memory. Entd works for P1 and is going ++ * to capture pages of file F2. To do that entd has to ++ * get NEA to F2. F2 is held by process P2 which also ++ * called entd. But entd is serving P1 at the moment ++ * and P2 has to wait. Process P3 trying to get EA to ++ * file F2. Existence of pending EA request to file F2 ++ * makes impossible for entd to get NEA to file ++ * F2. Neither of these process can continue. Using ++ * nonblocking version of gettign NEA is supposed to ++ * avoid this deadlock. ++ */ ++ if (try_to_get_nonexclusive_access(uf_info) == 0) { ++ result = RETERR(-EBUSY); ++ reiser4_exit_context(ctx); ++ break; ++ } ++ } else ++ get_nonexclusive_access(uf_info); ++ ++ while (to_capture > 0) { ++ pgoff_t start; ++ ++ assert("vs-1727", jindex <= pindex); ++ if (pindex == jindex) { ++ start = pindex; ++ result = ++ capture_anonymous_pages(inode->i_mapping, ++ &pindex, ++ to_capture); ++ if (result <= 0) ++ break; ++ to_capture -= result; ++ wbc->nr_to_write -= result; ++ if (start + result == pindex) { ++ jindex = pindex; ++ continue; ++ } ++ if (to_capture <= 0) ++ break; ++ } ++ /* deal with anonymous jnodes between jindex and pindex */ ++ result = ++ capture_anonymous_jnodes(inode->i_mapping, &jindex, ++ pindex, to_capture); ++ if (result < 0) ++ break; ++ to_capture -= result; ++ get_current_context()->nr_captured += result; ++ ++ if (jindex == (pgoff_t) - 1) { ++ assert("vs-1728", pindex == (pgoff_t) - 1); ++ break; ++ } ++ } ++ if (to_capture <= 0) ++ /* there may be left more pages */ ++ __mark_inode_dirty(inode, I_DIRTY_PAGES); ++ ++ drop_nonexclusive_access(uf_info); ++ if (result < 0) { ++ /* error happened */ ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ if (wbc->sync_mode != WB_SYNC_ALL) { ++ reiser4_exit_context(ctx); ++ return 0; ++ } ++ result = commit_file_atoms(inode); ++ reiser4_exit_context(ctx); ++ if (pindex >= nr_pages && jindex == pindex) ++ break; ++ } while (1); ++ ++ end: ++ if (is_in_reiser4_context()) { ++ if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) { ++ /* ++ * there are already pages to flush, flush them out, do ++ * not delay until end of reiser4_sync_inodes ++ */ ++ reiser4_writeout(inode->i_sb, wbc); ++ get_current_context()->nr_captured = 0; ++ } ++ } ++ return result; ++} ++ ++/* ++ * ->sync() method for unix file. ++ * ++ * We are trying to be smart here. Instead of committing all atoms (original ++ * solution), we scan dirty pages of this file and commit all atoms they are ++ * part of. ++ * ++ * Situation is complicated by anonymous pages: i.e., extent-less pages ++ * dirtied through mmap. Fortunately sys_fsync() first calls ++ * filemap_fdatawrite() that will ultimately call reiser4_writepages(), insert ++ * all missing extents and capture anonymous pages. ++ */ ++int sync_unix_file(struct file *file, struct dentry *dentry, int datasync) ++{ ++ reiser4_context *ctx; ++ txn_atom *atom; ++ reiser4_block_nr reserve; ++ ++ ctx = reiser4_init_context(dentry->d_inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ reserve = estimate_update_common(dentry->d_inode); ++ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) { ++ reiser4_exit_context(ctx); ++ return RETERR(-ENOSPC); ++ } ++ write_sd_by_inode_common(dentry->d_inode); ++ ++ atom = get_current_atom_locked(); ++ spin_lock_txnh(ctx->trans); ++ force_commit_atom(ctx->trans); ++ reiser4_exit_context(ctx); ++ return 0; ++} ++ ++/** ++ * readpage_unix_file_nolock - readpage of struct address_space_operations ++ * @file: ++ * @page: ++ * ++ * Compose a key and search for item containing information about @page ++ * data. If item is found - its readpage method is called. ++ */ ++int readpage_unix_file(struct file *file, struct page *page) ++{ ++ reiser4_context *ctx; ++ int result; ++ struct inode *inode; ++ reiser4_key key; ++ item_plugin *iplug; ++ hint_t *hint; ++ lock_handle *lh; ++ coord_t *coord; ++ ++ assert("vs-1062", PageLocked(page)); ++ assert("vs-976", !PageUptodate(page)); ++ assert("vs-1061", page->mapping && page->mapping->host); ++ ++ if (page->mapping->host->i_size <= page_offset(page)) { ++ /* page is out of file already */ ++ unlock_page(page); ++ return -EINVAL; ++ } ++ ++ inode = page->mapping->host; ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) { ++ unlock_page(page); ++ return PTR_ERR(ctx); ++ } ++ ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) { ++ unlock_page(page); ++ reiser4_exit_context(ctx); ++ return RETERR(-ENOMEM); ++ } ++ ++ result = load_file_hint(file, hint); ++ if (result) { ++ kfree(hint); ++ unlock_page(page); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ lh = &hint->lh; ++ ++ /* get key of first byte of the page */ ++ key_by_inode_and_offset_common(inode, page_offset(page), &key); ++ ++ /* look for file metadata corresponding to first byte of page */ ++ page_cache_get(page); ++ unlock_page(page); ++ result = find_file_item(hint, &key, ZNODE_READ_LOCK, inode); ++ lock_page(page); ++ page_cache_release(page); ++ ++ if (page->mapping == NULL) { ++ /* ++ * readpage allows truncate to run concurrently. Page was ++ * truncated while it was not locked ++ */ ++ done_lh(lh); ++ kfree(hint); ++ unlock_page(page); ++ reiser4_txn_restart(ctx); ++ reiser4_exit_context(ctx); ++ return -EINVAL; ++ } ++ ++ if (result != CBK_COORD_FOUND || hint->ext_coord.coord.between != AT_UNIT) { ++ if (result == CBK_COORD_FOUND && ++ hint->ext_coord.coord.between != AT_UNIT) ++ /* file is truncated */ ++ result = -EINVAL; ++ done_lh(lh); ++ kfree(hint); ++ unlock_page(page); ++ reiser4_txn_restart(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ /* ++ * item corresponding to page is found. It can not be removed because ++ * znode lock is held ++ */ ++ if (PageUptodate(page)) { ++ done_lh(lh); ++ kfree(hint); ++ unlock_page(page); ++ reiser4_txn_restart(ctx); ++ reiser4_exit_context(ctx); ++ return 0; ++ } ++ ++ coord = &hint->ext_coord.coord; ++ result = zload(coord->node); ++ if (result) { ++ done_lh(lh); ++ kfree(hint); ++ unlock_page(page); ++ reiser4_txn_restart(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ validate_extended_coord(&hint->ext_coord, page_offset(page)); ++ ++ if (!coord_is_existing_unit(coord)) { ++ /* this indicates corruption */ ++ warning("vs-280", ++ "Looking for page %lu of file %llu (size %lli). " ++ "No file items found (%d). File is corrupted?\n", ++ page->index, (unsigned long long)get_inode_oid(inode), ++ inode->i_size, result); ++ zrelse(coord->node); ++ done_lh(lh); ++ kfree(hint); ++ unlock_page(page); ++ reiser4_txn_restart(ctx); ++ reiser4_exit_context(ctx); ++ return RETERR(-EIO); ++ } ++ ++ /* ++ * get plugin of found item or use plugin if extent if there are no ++ * one ++ */ ++ iplug = item_plugin_by_coord(coord); ++ if (iplug->s.file.readpage) ++ result = iplug->s.file.readpage(coord, page); ++ else ++ result = RETERR(-EINVAL); ++ ++ if (!result) { ++ set_key_offset(&key, ++ (loff_t) (page->index + 1) << PAGE_CACHE_SHIFT); ++ /* FIXME should call reiser4_set_hint() */ ++ reiser4_unset_hint(hint); ++ } else { ++ unlock_page(page); ++ reiser4_unset_hint(hint); ++ } ++ assert("vs-979", ++ ergo(result == 0, (PageLocked(page) || PageUptodate(page)))); ++ assert("vs-9791", ergo(result != 0, !PageLocked(page))); ++ ++ zrelse(coord->node); ++ done_lh(lh); ++ ++ save_file_hint(file, hint); ++ kfree(hint); ++ ++ /* ++ * FIXME: explain why it is needed. HINT: page allocation in write can ++ * not be done when atom is not NULL because reiser4_writepage can not ++ * kick entd and have to eflush ++ */ ++ reiser4_txn_restart(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++struct uf_readpages_context { ++ lock_handle lh; ++ coord_t coord; ++}; ++ ++/* A callback function for readpages_unix_file/read_cache_pages. ++ * If the file is build of tails, then return error (-ENOENT). ++ * ++ * @data -- a pointer to reiser4_readpages_context object, ++ * to save the twig lock and the coord between ++ * read_cache_page iterations. ++ * @page -- page to start read. ++ */ ++static int uf_readpages_filler(void * data, struct page * page) ++{ ++ struct uf_readpages_context *rc = data; ++ jnode * node; ++ int ret = 0; ++ reiser4_extent *ext; ++ __u64 ext_index; ++ int cbk_done = 0; ++ struct address_space * mapping = page->mapping; ++ ++ if (PageUptodate(page)) { ++ unlock_page(page); ++ return 0; ++ } ++ if (rc->lh.node == 0) { ++ /* no twig lock - have to do tree search. */ ++ reiser4_key key; ++ repeat: ++ unlock_page(page); ++ key_by_inode_and_offset_common( ++ mapping->host, page_offset(page), &key); ++ ret = coord_by_key( ++ &get_super_private(mapping->host->i_sb)->tree, ++ &key, &rc->coord, &rc->lh, ++ ZNODE_READ_LOCK, FIND_EXACT, ++ TWIG_LEVEL, TWIG_LEVEL, CBK_UNIQUE, NULL); ++ if (ret) ++ return ret; ++ lock_page(page); ++ cbk_done = 1; ++ } ++ ret = zload(rc->coord.node); ++ if (ret) { ++ unlock_page(page); ++ return ret; ++ } ++ if (!coord_is_existing_item(&rc->coord) || ++ !item_is_extent(&rc->coord)) { ++ zrelse(rc->coord.node); ++ unlock_page(page); ++ return RETERR(-EIO); ++ } ++ ext = extent_by_coord(&rc->coord); ++ ext_index = extent_unit_index(&rc->coord); ++ if (page->index < ext_index || ++ page->index >= ext_index + extent_get_width(ext)) { ++ /* the page index doesn't belong to the extent unit ++ which the coord points to - release the lock and ++ repeat with tree search. */ ++ zrelse(rc->coord.node); ++ done_lh(&rc->lh); ++ /* we can be here after a CBK call only in case of ++ corruption of the tree or the tree lookup algorithm bug. */ ++ if (unlikely(cbk_done)) { ++ unlock_page(page); ++ return RETERR(-EIO); ++ } ++ goto repeat; ++ } ++ node = jnode_of_page(page); ++ if (unlikely(IS_ERR(node))) { ++ zrelse(rc->coord.node); ++ unlock_page(page); ++ return PTR_ERR(node); ++ } ++ ret = reiser4_do_readpage_extent(ext, page->index - ext_index, page); ++ jput(node); ++ zrelse(rc->coord.node); ++ if (ret) ++ unlock_page(page); ++ return ret; ++} ++ ++/** ++ * readpages_unix_file - called by the readahead code, starts reading for each ++ * page of given list of pages ++ */ ++int readpages_unix_file( ++ struct file *file, struct address_space *mapping, ++ struct list_head *pages, unsigned nr_pages) ++{ ++ reiser4_context *ctx; ++ struct uf_readpages_context rc; ++ int ret; ++ ++ ctx = reiser4_init_context(mapping->host->i_sb); ++ if (IS_ERR(ctx)) { ++ put_pages_list(pages); ++ return PTR_ERR(ctx); ++ } ++ init_lh(&rc.lh); ++ ret = read_cache_pages(mapping, pages, uf_readpages_filler, &rc); ++ done_lh(&rc.lh); ++ context_set_commit_async(ctx); ++ /* close the transaction to protect further page allocation from deadlocks */ ++ reiser4_txn_restart(ctx); ++ reiser4_exit_context(ctx); ++ return ret; ++} ++ ++static reiser4_block_nr unix_file_estimate_read(struct inode *inode, ++ loff_t count UNUSED_ARG) ++{ ++ /* We should reserve one block, because of updating of the stat data ++ item */ ++ assert("vs-1249", ++ inode_file_plugin(inode)->estimate.update == ++ estimate_update_common); ++ return estimate_update_common(inode); ++} ++ ++/* this is called with nonexclusive access obtained, file's container can not change */ ++static ssize_t read_file(hint_t *hint, struct file *file, /* file to read from to */ ++ char __user *buf, /* address of user-space buffer */ ++ size_t count, /* number of bytes to read */ ++ loff_t *off) ++{ ++ int result; ++ struct inode *inode; ++ flow_t flow; ++ int (*read_f) (struct file *, flow_t *, hint_t *); ++ coord_t *coord; ++ znode *loaded; ++ ++ inode = file->f_dentry->d_inode; ++ ++ /* build flow */ ++ assert("vs-1250", ++ inode_file_plugin(inode)->flow_by_inode == ++ flow_by_inode_unix_file); ++ result = ++ flow_by_inode_unix_file(inode, buf, 1 /* user space */ , count, ++ *off, READ_OP, &flow); ++ if (unlikely(result)) ++ return result; ++ ++ /* get seal and coord sealed with it from reiser4 private data ++ of struct file. The coord will tell us where our last read ++ of this file finished, and the seal will help to determine ++ if that location is still valid. ++ */ ++ coord = &hint->ext_coord.coord; ++ while (flow.length && result == 0) { ++ result = ++ find_file_item(hint, &flow.key, ZNODE_READ_LOCK, inode); ++ if (cbk_errored(result)) ++ /* error happened */ ++ break; ++ ++ if (coord->between != AT_UNIT) { ++ /* there were no items corresponding to given offset */ ++ done_lh(hint->ext_coord.lh); ++ break; ++ } ++ ++ loaded = coord->node; ++ result = zload(loaded); ++ if (unlikely(result)) { ++ done_lh(hint->ext_coord.lh); ++ break; ++ } ++ ++ if (hint->ext_coord.valid == 0) ++ validate_extended_coord(&hint->ext_coord, ++ get_key_offset(&flow.key)); ++ ++ assert("vs-4", hint->ext_coord.valid == 1); ++ assert("vs-33", hint->ext_coord.lh == &hint->lh); ++ /* call item's read method */ ++ read_f = item_plugin_by_coord(coord)->s.file.read; ++ result = read_f(file, &flow, hint); ++ zrelse(loaded); ++ done_lh(hint->ext_coord.lh); ++ } ++ ++ return (count - flow.length) ? (count - flow.length) : result; ++} ++ ++static ssize_t read_unix_file_container_tails(struct file*, char __user*, size_t, loff_t*); ++ ++/** ++ * read_unix_file - read of struct file_operations ++ * @file: file to read from ++ * @buf: address of user-space buffer ++ * @read_amount: number of bytes to read ++ * @off: position in file to read from ++ * ++ * This is implementation of vfs's read method of struct file_operations for ++ * unix file plugin. ++ */ ++ssize_t read_unix_file(struct file *file, char __user *buf, size_t read_amount, ++ loff_t *off) ++{ ++ reiser4_context *ctx; ++ ssize_t result; ++ struct inode *inode; ++ unix_file_info_t *uf_info; ++ ++ if (unlikely(read_amount == 0)) ++ return 0; ++ ++ assert("umka-072", file != NULL); ++ assert("umka-074", off != NULL); ++ inode = file->f_dentry->d_inode; ++ assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD)); ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ uf_info = unix_file_inode_data(inode); ++ if (uf_info->container == UF_CONTAINER_UNKNOWN) { ++ get_exclusive_access(uf_info); ++ result = find_file_state(inode, uf_info); ++ if (unlikely(result != 0)) ++ goto out; ++ } else ++ get_nonexclusive_access(uf_info); ++ result = reiser4_grab_space_force(unix_file_estimate_read(inode, read_amount), ++ BA_CAN_COMMIT); ++ if (unlikely(result != 0)) ++ goto out; ++ if (uf_info->container == UF_CONTAINER_EXTENTS){ ++ result = do_sync_read(file, buf, read_amount, off); ++ } else if (uf_info->container == UF_CONTAINER_TAILS || ++ reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV) || ++ reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) { ++ result = read_unix_file_container_tails(file, buf, read_amount, off); ++ } else { ++ assert("zam-1085", uf_info->container == UF_CONTAINER_EMPTY); ++ result = 0; ++ } ++out: ++ drop_access(uf_info); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++static ssize_t read_unix_file_container_tails( ++ struct file *file, char __user *buf, size_t read_amount, loff_t *off) ++{ ++ int result; ++ struct inode *inode; ++ hint_t *hint; ++ unix_file_info_t *uf_info; ++ size_t count, read, left; ++ loff_t size; ++ ++ assert("umka-072", file != NULL); ++ assert("umka-074", off != NULL); ++ inode = file->f_dentry->d_inode; ++ assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD)); ++ ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) ++ return RETERR(-ENOMEM); ++ ++ result = load_file_hint(file, hint); ++ if (result) { ++ kfree(hint); ++ return result; ++ } ++ ++ left = read_amount; ++ count = 0; ++ uf_info = unix_file_inode_data(inode); ++ while (left > 0) { ++ reiser4_txn_restart_current(); ++ size = i_size_read(inode); ++ if (*off >= size) ++ /* position to read from is past the end of file */ ++ break; ++ if (*off + left > size) ++ left = size - *off; ++ /* faultin user page */ ++ result = fault_in_pages_writeable(buf, left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left); ++ if (result) ++ return RETERR(-EFAULT); ++ ++ read = read_file(hint, file, buf, ++ left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left, ++ off); ++ if (read < 0) { ++ result = read; ++ break; ++ } ++ left -= read; ++ buf += read; ++ ++ /* update position in a file */ ++ *off += read; ++ /* total number of read bytes */ ++ count += read; ++ } ++ done_lh(&hint->lh); ++ save_file_hint(file, hint); ++ kfree(hint); ++ if (count) ++ file_accessed(file); ++ /* return number of read bytes or error code if nothing is read */ ++ return count ? count : result; ++} ++ ++/* This function takes care about @file's pages. First of all it checks if ++ filesystems readonly and if so gets out. Otherwise, it throws out all ++ pages of file if it was mapped for read and going to be mapped for write ++ and consists of tails. This is done in order to not manage few copies ++ of the data (first in page cache and second one in tails them selves) ++ for the case of mapping files consisting tails. ++ ++ Here also tail2extent conversion is performed if it is allowed and file ++ is going to be written or mapped for write. This functions may be called ++ from write_unix_file() or mmap_unix_file(). */ ++static int check_pages_unix_file(struct file *file, struct inode *inode) ++{ ++ reiser4_invalidate_pages(inode->i_mapping, 0, ++ (inode->i_size + PAGE_CACHE_SIZE - ++ 1) >> PAGE_CACHE_SHIFT, 0); ++ return unpack(file, inode, 0 /* not forever */ ); ++} ++ ++/** ++ * mmap_unix_file - mmap of struct file_operations ++ * @file: file to mmap ++ * @vma: ++ * ++ * This is implementation of vfs's mmap method of struct file_operations for ++ * unix file plugin. It converts file to extent if necessary. Sets ++ * reiser4_inode's flag - REISER4_HAS_MMAP. ++ */ ++int mmap_unix_file(struct file *file, struct vm_area_struct *vma) ++{ ++ reiser4_context *ctx; ++ int result; ++ struct inode *inode; ++ unix_file_info_t *uf_info; ++ reiser4_block_nr needed; ++ ++ inode = file->f_dentry->d_inode; ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ uf_info = unix_file_inode_data(inode); ++ ++ get_exclusive_access(uf_info); ++ ++ if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) { ++ /* ++ * we need file built of extent items. If it is still built of ++ * tail items we have to convert it. Find what items the file ++ * is built of ++ */ ++ result = find_file_state(inode, uf_info); ++ if (result != 0) { ++ drop_exclusive_access(uf_info); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ assert("vs-1648", (uf_info->container == UF_CONTAINER_TAILS || ++ uf_info->container == UF_CONTAINER_EXTENTS || ++ uf_info->container == UF_CONTAINER_EMPTY)); ++ if (uf_info->container == UF_CONTAINER_TAILS) { ++ /* ++ * invalidate all pages and convert file from tails to ++ * extents ++ */ ++ result = check_pages_unix_file(file, inode); ++ if (result) { ++ drop_exclusive_access(uf_info); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ } ++ } ++ ++ /* ++ * generic_file_mmap will do update_atime. Grab space for stat data ++ * update. ++ */ ++ needed = inode_file_plugin(inode)->estimate.update(inode); ++ result = reiser4_grab_space_force(needed, BA_CAN_COMMIT); ++ if (result) { ++ drop_exclusive_access(uf_info); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ result = generic_file_mmap(file, vma); ++ if (result == 0) { ++ /* mark file as having mapping. */ ++ reiser4_inode_set_flag(inode, REISER4_HAS_MMAP); ++ } ++ ++ drop_exclusive_access(uf_info); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/** ++ * find_first_item ++ * @inode: ++ * ++ * Finds file item which is responsible for first byte in the file. ++ */ ++static int find_first_item(struct inode *inode) ++{ ++ coord_t coord; ++ lock_handle lh; ++ reiser4_key key; ++ int result; ++ ++ coord_init_zero(&coord); ++ init_lh(&lh); ++ inode_file_plugin(inode)->key_by_inode(inode, 0, &key); ++ result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, ++ inode); ++ if (result == CBK_COORD_FOUND) { ++ if (coord.between == AT_UNIT) { ++ result = zload(coord.node); ++ if (result == 0) { ++ result = item_id_by_coord(&coord); ++ zrelse(coord.node); ++ if (result != EXTENT_POINTER_ID && ++ result != FORMATTING_ID) ++ result = RETERR(-EIO); ++ } ++ } else ++ result = RETERR(-EIO); ++ } ++ done_lh(&lh); ++ return result; ++} ++ ++/** ++ * open_unix_file ++ * @inode: ++ * @file: ++ * ++ * If filesystem is not readonly - complete uncompleted tail conversion if ++ * there was one ++ */ ++int open_unix_file(struct inode *inode, struct file *file) ++{ ++ int result; ++ reiser4_context *ctx; ++ unix_file_info_t *uf_info; ++ ++ if (IS_RDONLY(inode)) ++ return 0; ++ ++ if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) ++ return 0; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ uf_info = unix_file_inode_data(inode); ++ get_exclusive_access(uf_info); ++ ++ /* ++ * it may happen that another process is doing tail conversion. Wait ++ * until it completes ++ */ ++ while (1) { ++ if (reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV)) { ++ drop_exclusive_access(uf_info); ++ schedule(); ++ get_exclusive_access(uf_info); ++ continue; ++ } ++ break; ++ } ++ ++ if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) { ++ /* ++ * other process completed the conversion ++ */ ++ drop_exclusive_access(uf_info); ++ reiser4_exit_context(ctx); ++ return 0; ++ } ++ ++ /* ++ * file left in semi converted state after unclean shutdown or another ++ * thread is doing conversion and dropped exclusive access which doing ++ * balance dirty pages. Complete the conversion ++ */ ++ result = find_first_item(inode); ++ if (result == EXTENT_POINTER_ID) ++ /* ++ * first item is extent, therefore there was incomplete ++ * tail2extent conversion. Complete it ++ */ ++ result = tail2extent(unix_file_inode_data(inode)); ++ else if (result == FORMATTING_ID) ++ /* ++ * first item is formatting item, therefore there was ++ * incomplete extent2tail conversion. Complete it ++ */ ++ result = extent2tail(unix_file_inode_data(inode)); ++ else ++ result = -EIO; ++ ++ assert("vs-1712", ++ ergo(result == 0, ++ (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED) && ++ !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV)))); ++ drop_exclusive_access(uf_info); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++#define NEITHER_OBTAINED 0 ++#define EA_OBTAINED 1 ++#define NEA_OBTAINED 2 ++ ++static void drop_access(unix_file_info_t *uf_info) ++{ ++ if (uf_info->exclusive_use) ++ drop_exclusive_access(uf_info); ++ else ++ drop_nonexclusive_access(uf_info); ++} ++ ++#define debug_wuf(format, ...) printk("%s: %d: %s: " format "\n", \ ++ __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__) ++ ++/** ++ * write_unix_file - write of struct file_operations ++ * @file: file to write to ++ * @buf: address of user-space buffer ++ * @write_amount: number of bytes to write ++ * @off: position in file to write to ++ * ++ * This is implementation of vfs's write method of struct file_operations for ++ * unix file plugin. ++ */ ++ssize_t write_unix_file(struct file *file, const char __user *buf, ++ size_t count, loff_t *pos) ++{ ++ int result; ++ reiser4_context *ctx; ++ struct inode *inode; ++ unix_file_info_t *uf_info; ++ ssize_t written; ++ int try_free_space; ++ int to_write = PAGE_CACHE_SIZE * WRITE_GRANULARITY; ++ size_t left; ++ ssize_t (*write_op)(struct file *, const char __user *, size_t, ++ loff_t *pos); ++ int ea; ++ loff_t new_size; ++ ++ inode = file->f_dentry->d_inode; ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ mutex_lock(&inode->i_mutex); ++ ++ assert("vs-947", !reiser4_inode_get_flag(inode, REISER4_NO_SD)); ++ assert("vs-9471", (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED))); ++ ++ /* check amount of bytes to write and writing position */ ++ result = generic_write_checks(file, pos, &count, 0); ++ if (result) { ++ mutex_unlock(&inode->i_mutex); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ result = remove_suid(file->f_dentry); ++ if (result) { ++ mutex_unlock(&inode->i_mutex); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ /* remove_suid might create a transaction */ ++ reiser4_txn_restart(ctx); ++ ++ uf_info = unix_file_inode_data(inode); ++ ++ current->backing_dev_info = inode->i_mapping->backing_dev_info; ++ written = 0; ++ try_free_space = 0; ++ left = count; ++ ea = NEITHER_OBTAINED; ++ ++ new_size = i_size_read(inode); ++ if (*pos + count > new_size) ++ new_size = *pos + count; ++ ++ while (left) { ++ if (left < to_write) ++ to_write = left; ++ ++ if (uf_info->container == UF_CONTAINER_EMPTY) { ++ get_exclusive_access(uf_info); ++ ea = EA_OBTAINED; ++ if (uf_info->container != UF_CONTAINER_EMPTY) { ++ /* file is made not empty by another process */ ++ drop_exclusive_access(uf_info); ++ ea = NEITHER_OBTAINED; ++ continue; ++ } ++ } else if (uf_info->container == UF_CONTAINER_UNKNOWN) { ++ /* ++ * get exclusive access directly just to not have to ++ * re-obtain it if file will appear empty ++ */ ++ get_exclusive_access(uf_info); ++ ea = EA_OBTAINED; ++ result = find_file_state(inode, uf_info); ++ if (result) { ++ drop_exclusive_access(uf_info); ++ ea = NEITHER_OBTAINED; ++ break; ++ } ++ } else { ++ get_nonexclusive_access(uf_info); ++ ea = NEA_OBTAINED; ++ } ++ ++ /* either EA or NEA is obtained. Choose item write method */ ++ if (uf_info->container == UF_CONTAINER_EXTENTS) { ++ /* file is built of extent items */ ++ write_op = reiser4_write_extent; ++ } else if (uf_info->container == UF_CONTAINER_EMPTY) { ++ /* file is empty */ ++ if (should_have_notail(uf_info, new_size)) ++ write_op = reiser4_write_extent; ++ else ++ write_op = reiser4_write_tail; ++ } else { ++ /* file is built of tail items */ ++ if (should_have_notail(uf_info, new_size)) { ++ if (ea == NEA_OBTAINED) { ++ drop_nonexclusive_access(uf_info); ++ get_exclusive_access(uf_info); ++ ea = EA_OBTAINED; ++ } ++ if (uf_info->container == UF_CONTAINER_TAILS) { ++ /* ++ * if file is being convered by another ++ * process - wait until it completes ++ */ ++ while (1) { ++ if (reiser4_inode_get_flag(inode, ++ REISER4_PART_IN_CONV)) { ++ drop_exclusive_access(uf_info); ++ schedule(); ++ get_exclusive_access(uf_info); ++ continue; ++ } ++ break; ++ } ++ if (uf_info->container == UF_CONTAINER_TAILS) { ++ result = tail2extent(uf_info); ++ if (result) ++ break; ++ } ++ } ++ drop_exclusive_access(uf_info); ++ ea = NEITHER_OBTAINED; ++ continue; ++ } ++ write_op = reiser4_write_tail; ++ } ++ ++ written = write_op(file, buf, to_write, pos); ++ if (written == -ENOSPC && try_free_space) { ++ drop_access(uf_info); ++ txnmgr_force_commit_all(inode->i_sb, 0); ++ try_free_space = 0; ++ continue; ++ } ++ if (written < 0) { ++ drop_access(uf_info); ++ result = written; ++ break; ++ } ++ /* something is written. */ ++ if (uf_info->container == UF_CONTAINER_EMPTY) { ++ assert("", ea == EA_OBTAINED); ++ uf_info->container = ++ (write_op == reiser4_write_extent) ? ++ UF_CONTAINER_EXTENTS : UF_CONTAINER_TAILS; ++ } else { ++ assert("", ergo(uf_info->container == UF_CONTAINER_EXTENTS, ++ write_op == reiser4_write_extent)); ++ assert("", ergo(uf_info->container == UF_CONTAINER_TAILS, ++ write_op == reiser4_write_tail)); ++ } ++ if (*pos + written > inode->i_size) ++ INODE_SET_FIELD(inode, i_size, *pos + written); ++ file_update_time(file); ++ result = reiser4_update_sd(inode); ++ if (result) { ++ mutex_unlock(&inode->i_mutex); ++ current->backing_dev_info = NULL; ++ drop_access(uf_info); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ drop_access(uf_info); ++ ea = NEITHER_OBTAINED; ++ reiser4_txn_restart(ctx); ++ current->journal_info = NULL; ++ /* ++ * tell VM how many pages were dirtied. Maybe number of pages ++ * which were dirty already should not be counted ++ */ ++ balance_dirty_pages_ratelimited_nr(inode->i_mapping, ++ (written + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE); ++ current->journal_info = ctx; ++ ++ left -= written; ++ buf += written; ++ *pos += written; ++ } ++ ++ mutex_unlock(&inode->i_mutex); ++ ++ if (result == 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { ++ reiser4_txn_restart_current(); ++ grab_space_enable(); ++ result = sync_unix_file(file, file->f_dentry, ++ 0 /* data and stat data */ ); ++ if (result) ++ warning("reiser4-7", "failed to sync file %llu", ++ (unsigned long long)get_inode_oid(inode)); ++ } ++ ++ current->backing_dev_info = NULL; ++ ++ reiser4_exit_context(ctx); ++ ++ /* ++ * return number of written bytes or error code if nothing is ++ * written. Note, that it does not work correctly in case when ++ * sync_unix_file returns error ++ */ ++ return (count - left) ? (count - left) : result; ++} ++ ++/** ++ * release_unix_file - release of struct file_operations ++ * @inode: inode of released file ++ * @file: file to release ++ * ++ * Implementation of release method of struct file_operations for unix file ++ * plugin. If last reference to indode is released - convert all extent items ++ * into tail items if necessary. Frees reiser4 specific file data. ++ */ ++int release_unix_file(struct inode *inode, struct file *file) ++{ ++ reiser4_context *ctx; ++ unix_file_info_t *uf_info; ++ int result; ++ int in_reiser4; ++ ++ in_reiser4 = is_in_reiser4_context(); ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ result = 0; ++ if (in_reiser4 == 0) { ++ uf_info = unix_file_inode_data(inode); ++ ++ get_exclusive_access(uf_info); ++ if (atomic_read(&file->f_dentry->d_count) == 1 && ++ uf_info->container == UF_CONTAINER_EXTENTS && ++ !should_have_notail(uf_info, inode->i_size) && ++ !rofs_inode(inode)) { ++ result = extent2tail(uf_info); ++ if (result != 0) { ++ warning("nikita-3233", ++ "Failed (%d) to convert in %s (%llu)", ++ result, __FUNCTION__, ++ (unsigned long long) ++ get_inode_oid(inode)); ++ } ++ } ++ drop_exclusive_access(uf_info); ++ } else { ++ /* ++ we are within reiser4 context already. How latter is ++ possible? Simple: ++ ++ (gdb) bt ++ #0 get_exclusive_access () ++ #2 0xc01e56d3 in release_unix_file () ++ #3 0xc01c3643 in reiser4_release () ++ #4 0xc014cae0 in __fput () ++ #5 0xc013ffc3 in remove_vm_struct () ++ #6 0xc0141786 in exit_mmap () ++ #7 0xc0118480 in mmput () ++ #8 0xc0133205 in oom_kill () ++ #9 0xc01332d1 in out_of_memory () ++ #10 0xc013bc1d in try_to_free_pages () ++ #11 0xc013427b in __alloc_pages () ++ #12 0xc013f058 in do_anonymous_page () ++ #13 0xc013f19d in do_no_page () ++ #14 0xc013f60e in handle_mm_fault () ++ #15 0xc01131e5 in do_page_fault () ++ #16 0xc0104935 in error_code () ++ #17 0xc025c0c6 in __copy_to_user_ll () ++ #18 0xc01d496f in reiser4_read_tail () ++ #19 0xc01e4def in read_unix_file () ++ #20 0xc01c3504 in reiser4_read () ++ #21 0xc014bd4f in vfs_read () ++ #22 0xc014bf66 in sys_read () ++ */ ++ warning("vs-44", "out of memory?"); ++ } ++ ++ reiser4_free_file_fsdata(file); ++ ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++static void set_file_notail(struct inode *inode) ++{ ++ reiser4_inode *state; ++ formatting_plugin *tplug; ++ ++ state = reiser4_inode_data(inode); ++ tplug = formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID); ++ force_plugin_pset(inode, PSET_FORMATTING, (reiser4_plugin *)tplug); ++} ++ ++/* if file is built of tails - convert it to extents */ ++static int unpack(struct file *filp, struct inode *inode, int forever) ++{ ++ int result = 0; ++ unix_file_info_t *uf_info; ++ ++ uf_info = unix_file_inode_data(inode); ++ assert("vs-1628", ea_obtained(uf_info)); ++ ++ result = find_file_state(inode, uf_info); ++ if (result) ++ return result; ++ assert("vs-1074", uf_info->container != UF_CONTAINER_UNKNOWN); ++ ++ if (uf_info->container == UF_CONTAINER_TAILS) { ++ /* ++ * if file is being convered by another process - wait until it ++ * completes ++ */ ++ while (1) { ++ if (reiser4_inode_get_flag(inode, ++ REISER4_PART_IN_CONV)) { ++ drop_exclusive_access(uf_info); ++ schedule(); ++ get_exclusive_access(uf_info); ++ continue; ++ } ++ break; ++ } ++ if (uf_info->container == UF_CONTAINER_TAILS) { ++ result = tail2extent(uf_info); ++ if (result) ++ return result; ++ } ++ } ++ if (forever) { ++ /* safe new formatting plugin in stat data */ ++ __u64 tograb; ++ ++ set_file_notail(inode); ++ ++ grab_space_enable(); ++ tograb = inode_file_plugin(inode)->estimate.update(inode); ++ result = reiser4_grab_space(tograb, BA_CAN_COMMIT); ++ result = reiser4_update_sd(inode); ++ } ++ ++ return result; ++} ++ ++/* implentation of vfs' ioctl method of struct file_operations for unix file ++ plugin ++*/ ++int ++ioctl_unix_file(struct inode *inode, struct file *filp, ++ unsigned int cmd, unsigned long arg UNUSED_ARG) ++{ ++ reiser4_context *ctx; ++ int result; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ switch (cmd) { ++ case REISER4_IOC_UNPACK: ++ get_exclusive_access(unix_file_inode_data(inode)); ++ result = unpack(filp, inode, 1 /* forever */ ); ++ drop_exclusive_access(unix_file_inode_data(inode)); ++ break; ++ ++ default: ++ result = RETERR(-ENOSYS); ++ break; ++ } ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* implentation of vfs' bmap method of struct address_space_operations for unix ++ file plugin ++*/ ++sector_t bmap_unix_file(struct address_space * mapping, sector_t lblock) ++{ ++ reiser4_context *ctx; ++ sector_t result; ++ reiser4_key key; ++ coord_t coord; ++ lock_handle lh; ++ struct inode *inode; ++ item_plugin *iplug; ++ sector_t block; ++ ++ inode = mapping->host; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ key_by_inode_and_offset_common(inode, ++ (loff_t) lblock * current_blocksize, ++ &key); ++ ++ init_lh(&lh); ++ result = ++ find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode); ++ if (cbk_errored(result)) { ++ done_lh(&lh); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ result = zload(coord.node); ++ if (result) { ++ done_lh(&lh); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ iplug = item_plugin_by_coord(&coord); ++ if (iplug->s.file.get_block) { ++ result = iplug->s.file.get_block(&coord, lblock, &block); ++ if (result == 0) ++ result = block; ++ } else ++ result = RETERR(-EINVAL); ++ ++ zrelse(coord.node); ++ done_lh(&lh); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/** ++ * flow_by_inode_unix_file - initizlize structure flow ++ * @inode: inode of file for which read or write is abou ++ * @buf: buffer to perform read to or write from ++ * @user: flag showing whether @buf is user space or kernel space ++ * @size: size of buffer @buf ++ * @off: start offset fro read or write ++ * @op: READ or WRITE ++ * @flow: ++ * ++ * Initializes fields of @flow: key, size of data, i/o mode (read or write). ++ */ ++int flow_by_inode_unix_file(struct inode *inode, ++ const char __user *buf, int user, ++ loff_t size, loff_t off, ++ rw_op op, flow_t *flow) ++{ ++ assert("nikita-1100", inode != NULL); ++ ++ flow->length = size; ++ memcpy(&flow->data, &buf, sizeof(buf)); ++ flow->user = user; ++ flow->op = op; ++ assert("nikita-1931", inode_file_plugin(inode) != NULL); ++ assert("nikita-1932", ++ inode_file_plugin(inode)->key_by_inode == ++ key_by_inode_and_offset_common); ++ /* calculate key of write position and insert it into flow->key */ ++ return key_by_inode_and_offset_common(inode, off, &flow->key); ++} ++ ++/* plugin->u.file.set_plug_in_sd = NULL ++ plugin->u.file.set_plug_in_inode = NULL ++ plugin->u.file.create_blank_sd = NULL */ ++/* plugin->u.file.delete */ ++/* ++ plugin->u.file.add_link = reiser4_add_link_common ++ plugin->u.file.rem_link = NULL */ ++ ++/* plugin->u.file.owns_item ++ this is common_file_owns_item with assertion */ ++/* Audited by: green(2002.06.15) */ ++int ++owns_item_unix_file(const struct inode *inode /* object to check against */ , ++ const coord_t * coord /* coord to check */ ) ++{ ++ int result; ++ ++ result = owns_item_common(inode, coord); ++ if (!result) ++ return 0; ++ if (!plugin_of_group(item_plugin_by_coord(coord), ++ UNIX_FILE_METADATA_ITEM_TYPE)) ++ return 0; ++ assert("vs-547", ++ item_id_by_coord(coord) == EXTENT_POINTER_ID || ++ item_id_by_coord(coord) == FORMATTING_ID); ++ return 1; ++} ++ ++static int setattr_truncate(struct inode *inode, struct iattr *attr) ++{ ++ int result; ++ int s_result; ++ loff_t old_size; ++ reiser4_tree *tree; ++ ++ inode_check_scale(inode, inode->i_size, attr->ia_size); ++ ++ old_size = inode->i_size; ++ tree = reiser4_tree_by_inode(inode); ++ ++ result = safe_link_grab(tree, BA_CAN_COMMIT); ++ if (result == 0) ++ result = safe_link_add(inode, SAFE_TRUNCATE); ++ if (result == 0) ++ result = truncate_file_body(inode, attr->ia_size); ++ if (result) ++ warning("vs-1588", "truncate_file failed: oid %lli, " ++ "old size %lld, new size %lld, retval %d", ++ (unsigned long long)get_inode_oid(inode), ++ old_size, attr->ia_size, result); ++ ++ s_result = safe_link_grab(tree, BA_CAN_COMMIT); ++ if (s_result == 0) ++ s_result = ++ safe_link_del(tree, get_inode_oid(inode), SAFE_TRUNCATE); ++ if (s_result != 0) { ++ warning("nikita-3417", "Cannot kill safelink %lli: %i", ++ (unsigned long long)get_inode_oid(inode), s_result); ++ } ++ safe_link_release(tree); ++ return result; ++} ++ ++/* plugin->u.file.setattr method */ ++/* This calls inode_setattr and if truncate is in effect it also takes ++ exclusive inode access to avoid races */ ++int setattr_unix_file(struct dentry *dentry, /* Object to change attributes */ ++ struct iattr *attr /* change description */ ) ++{ ++ int result; ++ ++ if (attr->ia_valid & ATTR_SIZE) { ++ reiser4_context *ctx; ++ unix_file_info_t *uf_info; ++ ++ /* truncate does reservation itself and requires exclusive ++ access obtained */ ++ ctx = reiser4_init_context(dentry->d_inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ uf_info = unix_file_inode_data(dentry->d_inode); ++ get_exclusive_access(uf_info); ++ result = setattr_truncate(dentry->d_inode, attr); ++ drop_exclusive_access(uf_info); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ } else ++ result = reiser4_setattr_common(dentry, attr); ++ ++ return result; ++} ++ ++/* plugin->u.file.init_inode_data */ ++void ++init_inode_data_unix_file(struct inode *inode, ++ reiser4_object_create_data * crd, int create) ++{ ++ unix_file_info_t *data; ++ ++ data = unix_file_inode_data(inode); ++ data->container = create ? UF_CONTAINER_EMPTY : UF_CONTAINER_UNKNOWN; ++ init_rwsem(&data->latch); ++ data->tplug = inode_formatting_plugin(inode); ++ data->exclusive_use = 0; ++ ++#if REISER4_DEBUG ++ data->ea_owner = NULL; ++ atomic_set(&data->nr_neas, 0); ++#endif ++ init_inode_ordering(inode, crd, create); ++} ++ ++/** ++ * delete_object_unix_file - delete_object of file_plugin ++ * @inode: inode to be deleted ++ * ++ * Truncates file to length 0, removes stat data and safe link. ++ */ ++int delete_object_unix_file(struct inode *inode) ++{ ++ unix_file_info_t *uf_info; ++ int result; ++ ++ if (reiser4_inode_get_flag(inode, REISER4_NO_SD)) ++ return 0; ++ ++ /* truncate file bogy first */ ++ uf_info = unix_file_inode_data(inode); ++ get_exclusive_access(uf_info); ++ result = truncate_file_body(inode, 0 /* size */ ); ++ drop_exclusive_access(uf_info); ++ ++ if (result) ++ warning("", "failed to truncate file (%llu) on removal: %d", ++ get_inode_oid(inode), result); ++ ++ /* remove stat data and safe link */ ++ return reiser4_delete_object_common(inode); ++} ++ ++/** ++ * sendfile_unix_file - sendfile of struct file_operations ++ * @file: file to be sent ++ * @ppos: position to start from ++ * @count: number of bytes to send ++ * @actor: function to copy data ++ * @target: where to copy read data ++ * ++ * Reads @count bytes from @file and calls @actor for every page read. This is ++ * needed for loop back devices support. ++ */ ++ssize_t ++sendfile_unix_file(struct file *file, loff_t *ppos, size_t count, ++ read_actor_t actor, void *target) ++{ ++ reiser4_context *ctx; ++ ssize_t result; ++ struct inode *inode; ++ unix_file_info_t *uf_info; ++ ++ inode = file->f_dentry->d_inode; ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ /* ++ * generic_file_sndfile may want to call update_atime. Grab space for ++ * stat data update ++ */ ++ result = reiser4_grab_space(estimate_update_common(inode), ++ BA_CAN_COMMIT); ++ if (result) ++ goto error; ++ mutex_lock(&inode->i_mutex); ++ reiser4_inode_set_flag(inode, REISER4_HAS_MMAP); ++ mutex_unlock(&inode->i_mutex); ++ ++ uf_info = unix_file_inode_data(inode); ++ get_nonexclusive_access(uf_info); ++ result = generic_file_sendfile(file, ppos, count, actor, target); ++ drop_nonexclusive_access(uf_info); ++ error: ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++int ++prepare_write_unix_file(struct file *file, struct page *page, ++ unsigned from, unsigned to) ++{ ++ reiser4_context *ctx; ++ unix_file_info_t *uf_info; ++ int ret; ++ ++ ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ uf_info = unix_file_inode_data(file->f_dentry->d_inode); ++ get_exclusive_access(uf_info); ++ ret = find_file_state(file->f_dentry->d_inode, uf_info); ++ if (ret == 0) { ++ if (uf_info->container == UF_CONTAINER_TAILS) ++ ret = -EINVAL; ++ else ++ ret = do_prepare_write(file, page, from, to); ++ } ++ drop_exclusive_access(uf_info); ++ ++ /* don't commit transaction under inode semaphore */ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return ret; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/file/file.h b/fs/reiser4/plugin/file/file.h +new file mode 100644 +index 0000000..e486a88 +--- /dev/null ++++ b/fs/reiser4/plugin/file/file.h +@@ -0,0 +1,272 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* this file contains declarations of methods implementing ++ file plugins (UNIX_FILE_PLUGIN_ID, CRYPTCOMPRESS_FILE_PLUGIN_ID ++ and SYMLINK_FILE_PLUGIN_ID) */ ++ ++#if !defined( __REISER4_FILE_H__ ) ++#define __REISER4_FILE_H__ ++ ++/* declarations of functions implementing UNIX_FILE_PLUGIN_ID file plugin */ ++ ++/* inode operations */ ++int setattr_unix_file(struct dentry *, struct iattr *); ++ ++/* file operations */ ++ssize_t read_unix_file(struct file *, char __user *buf, size_t read_amount, ++ loff_t *off); ++ssize_t write_unix_file(struct file *, const char __user *buf, size_t write_amount, ++ loff_t * off); ++int ioctl_unix_file(struct inode *, struct file *, unsigned int cmd, ++ unsigned long arg); ++int mmap_unix_file(struct file *, struct vm_area_struct *); ++int open_unix_file(struct inode *, struct file *); ++int release_unix_file(struct inode *, struct file *); ++int sync_unix_file(struct file *, struct dentry *, int datasync); ++ssize_t sendfile_unix_file(struct file *, loff_t *ppos, size_t count, ++ read_actor_t, void *target); ++ ++/* address space operations */ ++int readpage_unix_file(struct file *, struct page *); ++int readpages_unix_file(struct file*, struct address_space*, struct list_head*, unsigned); ++int writepages_unix_file(struct address_space *, struct writeback_control *); ++int prepare_write_unix_file(struct file *, struct page *, unsigned from, ++ unsigned to); ++int commit_write_unix_file(struct file *, struct page *, unsigned from, ++ unsigned to); ++sector_t bmap_unix_file(struct address_space *, sector_t lblock); ++ ++/* file plugin operations */ ++int flow_by_inode_unix_file(struct inode *, const char __user *buf, ++ int user, loff_t, loff_t, rw_op, flow_t *); ++int owns_item_unix_file(const struct inode *, const coord_t *); ++void init_inode_data_unix_file(struct inode *, reiser4_object_create_data *, ++ int create); ++int delete_object_unix_file(struct inode *); ++ ++/* ++ * all the write into unix file is performed by item write method. Write method ++ * of unix file plugin only decides which item plugin (extent or tail) and in ++ * which mode (one from the enum below) to call ++ */ ++typedef enum { ++ FIRST_ITEM = 1, ++ APPEND_ITEM = 2, ++ OVERWRITE_ITEM = 3 ++} write_mode_t; ++ ++/* unix file may be in one the following states */ ++typedef enum { ++ UF_CONTAINER_UNKNOWN = 0, ++ UF_CONTAINER_TAILS = 1, ++ UF_CONTAINER_EXTENTS = 2, ++ UF_CONTAINER_EMPTY = 3 ++} file_container_t; ++ ++struct formatting_plugin; ++struct inode; ++ ++/* unix file plugin specific part of reiser4 inode */ ++typedef struct unix_file_info { ++ /* ++ * this read-write lock protects file containerization change. Accesses ++ * which do not change file containerization (see file_container_t) ++ * (read, readpage, writepage, write (until tail conversion is ++ * involved)) take read-lock. Accesses which modify file ++ * containerization (truncate, conversion from tail to extent and back) ++ * take write-lock. ++ */ ++ struct rw_semaphore latch; ++ /* this enum specifies which items are used to build the file */ ++ file_container_t container; ++ /* ++ * plugin which controls when file is to be converted to extents and ++ * back to tail ++ */ ++ struct formatting_plugin *tplug; ++ /* if this is set, file is in exclusive use */ ++ int exclusive_use; ++#if REISER4_DEBUG ++ /* pointer to task struct of thread owning exclusive access to file */ ++ void *ea_owner; ++ atomic_t nr_neas; ++ void *last_reader; ++#endif ++} unix_file_info_t; ++ ++struct unix_file_info *unix_file_inode_data(const struct inode *inode); ++void get_exclusive_access(unix_file_info_t *); ++void drop_exclusive_access(unix_file_info_t *); ++void get_nonexclusive_access(unix_file_info_t *); ++void drop_nonexclusive_access(unix_file_info_t *); ++int try_to_get_nonexclusive_access(unix_file_info_t *); ++int find_file_item(hint_t *, const reiser4_key *, znode_lock_mode, ++ struct inode *); ++int find_file_item_nohint(coord_t *, lock_handle *, ++ const reiser4_key *, znode_lock_mode, ++ struct inode *); ++ ++int load_file_hint(struct file *, hint_t *); ++void save_file_hint(struct file *, const hint_t *); ++ ++#include "../item/extent.h" ++#include "../item/tail.h" ++#include "../item/ctail.h" ++ ++struct uf_coord { ++ coord_t coord; ++ lock_handle *lh; ++ int valid; ++ union { ++ extent_coord_extension_t extent; ++ tail_coord_extension_t tail; ++ ctail_coord_extension_t ctail; ++ } extension; ++}; ++ ++#include "../../forward.h" ++#include "../../seal.h" ++#include "../../lock.h" ++ ++/* ++ * This structure is used to speed up file operations (reads and writes). A ++ * hint is a suggestion about where a key resolved to last time. A seal ++ * indicates whether a node has been modified since a hint was last recorded. ++ * You check the seal, and if the seal is still valid, you can use the hint ++ * without traversing the tree again. ++ */ ++struct hint { ++ seal_t seal; /* a seal over last file item accessed */ ++ uf_coord_t ext_coord; ++ loff_t offset; ++ znode_lock_mode mode; ++ lock_handle lh; ++}; ++ ++static inline int hint_is_valid(hint_t * hint) ++{ ++ return hint->ext_coord.valid; ++} ++ ++static inline void hint_set_valid(hint_t * hint) ++{ ++ hint->ext_coord.valid = 1; ++} ++ ++static inline void hint_clr_valid(hint_t * hint) ++{ ++ hint->ext_coord.valid = 0; ++} ++ ++int load_file_hint(struct file *, hint_t *); ++void save_file_hint(struct file *, const hint_t *); ++void hint_init_zero(hint_t *); ++void reiser4_set_hint(hint_t *, const reiser4_key *, znode_lock_mode); ++int hint_is_set(const hint_t *); ++void reiser4_unset_hint(hint_t *); ++ ++int reiser4_update_file_size(struct inode *, reiser4_key *, int update_sd); ++int cut_file_items(struct inode *, loff_t new_size, int update_sd, ++ loff_t cur_size, int (*update_actor) (struct inode *, ++ reiser4_key *, int)); ++#if REISER4_DEBUG ++ ++/* return 1 is exclusive access is obtained, 0 - otherwise */ ++static inline int ea_obtained(unix_file_info_t * uf_info) ++{ ++ int ret; ++ ++ ret = down_read_trylock(&uf_info->latch); ++ if (ret) ++ up_read(&uf_info->latch); ++ return !ret; ++} ++ ++#endif ++ ++/* declarations of functions implementing SYMLINK_FILE_PLUGIN_ID file plugin */ ++int reiser4_create_symlink(struct inode *symlink, struct inode *dir, ++ reiser4_object_create_data *); ++void destroy_inode_symlink(struct inode *); ++ ++/* declarations of functions implementing CRYPTCOMPRESS_FILE_PLUGIN_ID ++ file plugin */ ++ ++/* inode operations */ ++int setattr_cryptcompress(struct dentry *, struct iattr *); ++int prot_setattr_cryptcompress(struct dentry *, struct iattr *); ++ ++/* file operations */ ++ssize_t read_cryptcompress(struct file *, char __user *buf, size_t read_amount, ++ loff_t * off); ++ssize_t prot_read_cryptcompress(struct file *, char __user *buf, ++ size_t read_amount, loff_t * off); ++ ++ssize_t write_cryptcompress(struct file *, const char __user *buf, size_t write_amount, ++ loff_t * off, int * conv); ++ssize_t prot_write_cryptcompress(struct file *, const char __user *buf, size_t write_amount, ++ loff_t * off); ++int mmap_cryptcompress(struct file *, struct vm_area_struct *); ++int prot_mmap_cryptcompress(struct file *, struct vm_area_struct *); ++ssize_t sendfile_cryptcompress(struct file *file, loff_t *ppos, size_t count, ++ read_actor_t actor, void *target); ++ssize_t prot_sendfile_cryptcompress(struct file *file, loff_t *ppos, size_t count, ++ read_actor_t actor, void *target); ++ ++int release_cryptcompress(struct inode *, struct file *); ++int prot_release_cryptcompress(struct inode *, struct file *); ++ ++/* address space operations */ ++extern int readpage_cryptcompress(struct file *, struct page *); ++extern int writepages_cryptcompress(struct address_space *, ++ struct writeback_control *); ++/* file plugin operations */ ++int flow_by_inode_cryptcompress(struct inode *, const char __user *buf, ++ int user, loff_t, loff_t, rw_op, flow_t *); ++int key_by_inode_cryptcompress(struct inode *, loff_t off, reiser4_key *); ++int create_cryptcompress(struct inode *, struct inode *, ++ reiser4_object_create_data *); ++int delete_object_cryptcompress(struct inode *); ++void init_inode_data_cryptcompress(struct inode *, reiser4_object_create_data *, ++ int create); ++int cut_tree_worker_cryptcompress(tap_t *, const reiser4_key * from_key, ++ const reiser4_key * to_key, ++ reiser4_key * smallest_removed, ++ struct inode *object, int truncate, ++ int *progress); ++void destroy_inode_cryptcompress(struct inode *); ++int open_object_cryptcompress(struct inode * inode, struct file * file); ++ ++extern reiser4_plugin_ops cryptcompress_plugin_ops; ++ ++#define WRITE_GRANULARITY 32 ++ ++int tail2extent(unix_file_info_t *); ++int extent2tail(unix_file_info_t *); ++ ++int goto_right_neighbor(coord_t *, lock_handle *); ++int find_or_create_extent(struct page *); ++int equal_to_ldk(znode *, const reiser4_key *); ++ ++void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh); ++ ++static inline int cbk_errored(int cbk_result) ++{ ++ return (cbk_result != CBK_COORD_NOTFOUND ++ && cbk_result != CBK_COORD_FOUND); ++} ++ ++/* __REISER4_FILE_H__ */ ++#endif ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++*/ +diff --git a/fs/reiser4/plugin/file/file_conversion.c b/fs/reiser4/plugin/file/file_conversion.c +new file mode 100644 +index 0000000..2e07b66 +--- /dev/null ++++ b/fs/reiser4/plugin/file/file_conversion.c +@@ -0,0 +1,594 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, ++ licensing governed by reiser4/README */ ++ ++/* This file contains hooks that converts (*) cryptcompress files to unix-files, ++ and a set of protected (**) methods of a cryptcompress file plugin to perform ++ such conversion. ++ ++(*) ++ The conversion is performed for incompressible files to reduce cpu and memory ++ usage. If first logical cluster (64K by default) of a file is incompressible, ++ then we make a desicion, that the whole file is incompressible. ++ The conversion can be enabled via installing a special compression mode ++ plugin (CONVX_COMPRESSION_MODE_ID, see plugin/compress/compress_mode.c for ++ details). ++ ++(**) ++ The protection means serialization of critical sections (readers and writers ++ of @pset->file) ++*/ ++ ++#include "../../inode.h" ++#include "../cluster.h" ++#include "file.h" ++ ++#define conversion_enabled(inode) \ ++ (inode_compression_mode_plugin(inode) == \ ++ compression_mode_plugin_by_id(CONVX_COMPRESSION_MODE_ID)) ++ ++ ++/* Located sections (readers and writers of @pset->file) are not ++ permanently critical: cryptcompress file can be converted only ++ if the conversion is enabled (see the macrio above). And we don't ++ convert unix files at all. ++ The following helper macro is a sanity check to decide if we ++ need to protect a located section. ++*/ ++#define should_protect(inode) \ ++ (inode_file_plugin(inode) == \ ++ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID) && \ ++ conversion_enabled(inode)) ++ ++/* All protected methods have prefix "prot" in their names. ++ It is convenient to construct them by usual (unprotected) ones ++ using the following common macros: ++*/ ++ ++/* Macro for passive protection. ++ method_cryptcompress contains only readers */ ++#define PROT_PASSIVE(type, method, args) \ ++({ \ ++ type _result; \ ++ struct rw_semaphore * guard = \ ++ &reiser4_inode_data(inode)->conv_sem; \ ++ \ ++ if (should_protect(inode)) { \ ++ down_read(guard); \ ++ if (!should_protect(inode)) \ ++ up_read(guard); \ ++ } \ ++ if (inode_file_plugin(inode) == \ ++ file_plugin_by_id(UNIX_FILE_PLUGIN_ID)) \ ++ _result = method ## _unix_file args; \ ++ else \ ++ _result = method ## _cryptcompress args; \ ++ if (should_protect(inode)) \ ++ up_read(guard); \ ++ _result; \ ++}) ++ ++#define PROT_PASSIVE_VOID(method, args) \ ++({ \ ++ struct rw_semaphore * guard = \ ++ &reiser4_inode_data(inode)->conv_sem; \ ++ \ ++ if (should_protect(inode)) { \ ++ down_read(guard); \ ++ if (!should_protect(inode)) \ ++ up_read(guard); \ ++ } \ ++ if (inode_file_plugin(inode) == \ ++ file_plugin_by_id(UNIX_FILE_PLUGIN_ID)) \ ++ method ## _unix_file args; \ ++ else \ ++ method ## _cryptcompress args; \ ++ if (should_protect(inode)) \ ++ up_read(guard); \ ++}) ++ ++/* Macro for active protection. ++ active_expr contains readers and writers; after its ++ evaluation conversion should be disabled */ ++#define PROT_ACTIVE(type, method, args, active_expr) \ ++({ \ ++ type _result = 0; \ ++ struct rw_semaphore * guard = \ ++ &reiser4_inode_data(inode)->conv_sem; \ ++ reiser4_context * ctx = reiser4_init_context(inode->i_sb); \ ++ if (IS_ERR(ctx)) \ ++ return PTR_ERR(ctx); \ ++ \ ++ if (should_protect(inode)) { \ ++ down_write(guard); \ ++ if (should_protect(inode)) \ ++ _result = active_expr; \ ++ up_write(guard); \ ++ } \ ++ if (_result == 0) { \ ++ if (inode_file_plugin(inode) == \ ++ file_plugin_by_id(UNIX_FILE_PLUGIN_ID)) \ ++ _result = method ## _unix_file args; \ ++ else \ ++ _result = method ## _cryptcompress args; \ ++ } \ ++ reiser4_exit_context(ctx); \ ++ _result; \ ++}) ++ ++/* Pass management to the unix-file plugin with "notail" policy */ ++static int __cryptcompress2unixfile(struct file *file, struct inode * inode) ++{ ++ int result; ++ reiser4_inode *info; ++ unix_file_info_t * uf; ++ info = reiser4_inode_data(inode); ++ ++ result = aset_set_unsafe(&info->pset, ++ PSET_FILE, ++ (reiser4_plugin *) ++ file_plugin_by_id(UNIX_FILE_PLUGIN_ID)); ++ if (result) ++ return result; ++ result = aset_set_unsafe(&info->pset, ++ PSET_FORMATTING, ++ (reiser4_plugin *) ++ formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID)); ++ if (result) ++ return result; ++ /* get rid of non-standard plugins */ ++ info->plugin_mask &= ~cryptcompress_mask; ++ /* get rid of plugin stat-data extension */ ++ info->extmask &= ~(1 << PLUGIN_STAT); ++ ++ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN); ++ ++ /* FIXME use init_inode_data_unix_file() instead, ++ but aviod init_inode_ordering() */ ++ /* Init unix-file specific part of inode */ ++ uf = unix_file_inode_data(inode); ++ uf->container = UF_CONTAINER_UNKNOWN; ++ init_rwsem(&uf->latch); ++ uf->tplug = inode_formatting_plugin(inode); ++ uf->exclusive_use = 0; ++#if REISER4_DEBUG ++ uf->ea_owner = NULL; ++ atomic_set(&uf->nr_neas, 0); ++#endif ++ inode->i_op = ++ &file_plugin_by_id(UNIX_FILE_PLUGIN_ID)->inode_ops; ++ inode->i_fop = ++ &file_plugin_by_id(UNIX_FILE_PLUGIN_ID)->file_ops; ++ inode->i_mapping->a_ops = ++ &file_plugin_by_id(UNIX_FILE_PLUGIN_ID)->as_ops; ++ file->f_op = inode->i_fop; ++ return 0; ++} ++ ++#if REISER4_DEBUG ++static int disabled_conversion_inode_ok(struct inode * inode) ++{ ++ __u64 extmask = reiser4_inode_data(inode)->extmask; ++ __u16 plugin_mask = reiser4_inode_data(inode)->plugin_mask; ++ ++ return ((extmask & (1 << LIGHT_WEIGHT_STAT)) && ++ (extmask & (1 << UNIX_STAT)) && ++ (extmask & (1 << LARGE_TIMES_STAT)) && ++ (extmask & (1 << PLUGIN_STAT)) && ++ (plugin_mask & (1 << PSET_COMPRESSION_MODE))); ++} ++#endif ++ ++/* Assign another mode that will control ++ compression at flush time only */ ++static int disable_conversion_no_update_sd(struct inode * inode) ++{ ++ int result; ++ result = ++ force_plugin_pset(inode, ++ PSET_COMPRESSION_MODE, ++ (reiser4_plugin *)compression_mode_plugin_by_id ++ (LATTD_COMPRESSION_MODE_ID)); ++ assert("edward-1500", ++ ergo(!result, disabled_conversion_inode_ok(inode))); ++ return result; ++} ++ ++/* Disable future attempts to check/convert. This function is called by ++ conversion hooks. */ ++static int disable_conversion(struct inode * inode) ++{ ++ return disable_conversion_no_update_sd(inode); ++} ++ ++static int check_position(struct inode * inode, ++ loff_t pos /* initial position in the file */, ++ reiser4_cluster_t * clust, ++ int * check_compress) ++{ ++ assert("edward-1505", conversion_enabled(inode)); ++ assert("edward-1506", inode->i_size <= inode_cluster_size(inode)); ++ /* if file size is more then cluster size, then compressible ++ status must be figured out (i.e. compression was disabled, ++ or file plugin was converted to unix_file) */ ++ ++ if (pos > inode->i_size) ++ /* first logical cluster will contain a (partial) hole */ ++ return disable_conversion(inode); ++ if (inode->i_size == inode_cluster_size(inode)) ++ *check_compress = 1; ++ return 0; ++} ++ ++static void start_check_compressibility(struct inode * inode, ++ reiser4_cluster_t * clust, ++ hint_t * hint) ++{ ++ assert("edward-1507", clust->index == 1); ++ assert("edward-1508", !tfm_cluster_is_uptodate(&clust->tc)); ++ assert("edward-1509", cluster_get_tfm_act(&clust->tc) == TFMA_READ); ++ ++ hint_init_zero(hint); ++ clust->hint = hint; ++ clust->index --; ++ clust->nr_pages = count_to_nrpages(fsize_to_count(clust, inode)); ++ ++ /* first logical cluster (of index #0) must be complete */ ++ assert("edward-1510", fsize_to_count(clust, inode) == ++ inode_cluster_size(inode)); ++} ++ ++static void finish_check_compressibility(struct inode * inode, ++ reiser4_cluster_t * clust, ++ hint_t * hint) ++{ ++ reiser4_unset_hint(clust->hint); ++ clust->hint = hint; ++ clust->index ++; ++} ++ ++#if REISER4_DEBUG ++static int prepped_dclust_ok(hint_t * hint) ++{ ++ reiser4_key key; ++ coord_t * coord = &hint->ext_coord.coord; ++ ++ item_key_by_coord(coord, &key); ++ return (item_id_by_coord(coord) == CTAIL_ID && ++ !coord_is_unprepped_ctail(coord) && ++ (get_key_offset(&key) + nr_units_ctail(coord) == ++ dclust_get_extension_dsize(hint))); ++} ++#endif ++ ++#define fifty_persent(size) (size >> 1) ++/* evaluation of data compressibility */ ++#define data_is_compressible(osize, isize) \ ++ (osize < fifty_persent(isize)) ++ ++/* This is called only once per file life. ++ Read first logical cluster (of index #0) and estimate its compressibility. ++ Save estimation result in @compressible */ ++static int read_check_compressibility(struct inode * inode, ++ reiser4_cluster_t * clust, ++ int * compressible) ++{ ++ int i; ++ int result; ++ __u32 dst_len; ++ hint_t tmp_hint; ++ hint_t * cur_hint = clust->hint; ++ ++ start_check_compressibility(inode, clust, &tmp_hint); ++ ++ result = grab_cluster_pages(inode, clust); ++ if (result) ++ return result; ++ /* Read page cluster here */ ++ for (i = 0; i < clust->nr_pages; i++) { ++ struct page *page = clust->pages[i]; ++ lock_page(page); ++ result = do_readpage_ctail(inode, clust, page, ++ ZNODE_READ_LOCK); ++ unlock_page(page); ++ if (result) ++ goto error; ++ } ++ tfm_cluster_clr_uptodate(&clust->tc); ++ ++ cluster_set_tfm_act(&clust->tc, TFMA_WRITE); ++ ++ if (hint_is_valid(&tmp_hint) && !hint_is_unprepped_dclust(&tmp_hint)) { ++ /* lenght of compressed data is known, no need to compress */ ++ assert("edward-1511", ++ znode_is_write_locked(tmp_hint.ext_coord.coord.node)); ++ assert("edward-1512", ++ WITH_DATA(tmp_hint.ext_coord.coord.node, ++ prepped_dclust_ok(&tmp_hint))); ++ dst_len = dclust_get_extension_dsize(&tmp_hint); ++ } ++ else { ++ tfm_cluster_t * tc = &clust->tc; ++ compression_plugin * cplug = inode_compression_plugin(inode); ++ result = grab_tfm_stream(inode, tc, INPUT_STREAM); ++ if (result) ++ goto error; ++ for (i = 0; i < clust->nr_pages; i++) { ++ char *data; ++ lock_page(clust->pages[i]); ++ BUG_ON(!PageUptodate(clust->pages[i])); ++ data = kmap(clust->pages[i]); ++ memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i), ++ data, PAGE_CACHE_SIZE); ++ kunmap(clust->pages[i]); ++ unlock_page(clust->pages[i]); ++ } ++ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); ++ if (result) ++ goto error; ++ result = grab_coa(tc, cplug); ++ if (result) ++ goto error; ++ tc->len = tc->lsize = fsize_to_count(clust, inode); ++ assert("edward-1513", tc->len == inode_cluster_size(inode)); ++ dst_len = tfm_stream_size(tc, OUTPUT_STREAM); ++ cplug->compress(get_coa(tc, cplug->h.id, tc->act), ++ tfm_input_data(clust), tc->len, ++ tfm_output_data(clust), &dst_len); ++ assert("edward-1514", ++ dst_len <= tfm_stream_size(tc, OUTPUT_STREAM)); ++ } ++ finish_check_compressibility(inode, clust, cur_hint); ++ *compressible = data_is_compressible(dst_len, ++ inode_cluster_size(inode)); ++ return 0; ++ error: ++ reiser4_release_cluster_pages(clust); ++ return result; ++} ++ ++/* Cut disk cluster of index @idx */ ++static int cut_disk_cluster(struct inode * inode, cloff_t idx) ++{ ++ reiser4_key from, to; ++ assert("edward-1515", inode_file_plugin(inode) == ++ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); ++ key_by_inode_cryptcompress(inode, clust_to_off(idx, inode), &from); ++ to = from; ++ set_key_offset(&to, ++ get_key_offset(&from) + inode_cluster_size(inode) - 1); ++ return reiser4_cut_tree(reiser4_tree_by_inode(inode), ++ &from, &to, inode, 0); ++} ++ ++static int reserve_cryptcompress2unixfile(struct inode *inode) ++{ ++ reiser4_block_nr unformatted_nodes; ++ reiser4_tree *tree; ++ ++ tree = reiser4_tree_by_inode(inode); ++ ++ /* number of unformatted nodes which will be created */ ++ unformatted_nodes = cluster_nrpages(inode); /* N */ ++ ++ /* ++ * space required for one iteration of extent->tail conversion: ++ * ++ * 1. kill ctail items ++ * ++ * 2. insert N unformatted nodes ++ * ++ * 3. insert N (worst-case single-block ++ * extents) extent units. ++ * ++ * 4. drilling to the leaf level by coord_by_key() ++ * ++ * 5. possible update of stat-data ++ * ++ */ ++ grab_space_enable(); ++ return reiser4_grab_space ++ (2 * tree->height + ++ unformatted_nodes + ++ unformatted_nodes * estimate_one_insert_into_item(tree) + ++ 1 + estimate_one_insert_item(tree) + ++ inode_file_plugin(inode)->estimate.update(inode), ++ BA_CAN_COMMIT); ++} ++ ++/* clear flag that indicated conversion and update ++ stat-data with new (unix-file - specific) info */ ++static int complete_file_conversion(struct inode *inode) ++{ ++ int result; ++ ++ grab_space_enable(); ++ result = ++ reiser4_grab_space(inode_file_plugin(inode)->estimate.update(inode), ++ BA_CAN_COMMIT); ++ if (result == 0) { ++ reiser4_inode_clr_flag(inode, REISER4_FILE_CONV_IN_PROGRESS); ++ result = reiser4_update_sd(inode); ++ } ++ if (result) ++ warning("edward-1452", ++ "Converting %llu to unix-file: update sd failed (%i)", ++ (unsigned long long)get_inode_oid(inode), result); ++ return 0; ++} ++ ++ ++/* do conversion */ ++static int cryptcompress2unixfile(struct file *file, struct inode * inode, ++ reiser4_cluster_t * clust) ++{ ++ int i; ++ int result = 0; ++ cryptcompress_info_t *cr_info; ++ unix_file_info_t *uf_info; ++ ++ assert("edward-1516", clust->pages[0]->index == 0); ++ assert("edward-1517", clust->hint != NULL); ++ ++ /* release all cryptcompress-specific recources */ ++ cr_info = cryptcompress_inode_data(inode); ++ result = reserve_cryptcompress2unixfile(inode); ++ if (result) ++ goto out; ++ reiser4_inode_set_flag(inode, REISER4_FILE_CONV_IN_PROGRESS); ++ reiser4_unset_hint(clust->hint); ++ result = cut_disk_cluster(inode, 0); ++ if (result) ++ goto out; ++ /* captured jnode of cluster and assotiated resources (pages, ++ reserved disk space) were released by ->kill_hook() method ++ of the item plugin */ ++ ++ result = __cryptcompress2unixfile(file, inode); ++ if (result) ++ goto out; ++ /* At this point file is managed by unix file plugin */ ++ ++ uf_info = unix_file_inode_data(inode); ++ ++ assert("edward-1518", ++ ergo(jprivate(clust->pages[0]), ++ !jnode_is_cluster_page(jprivate(clust->pages[0])))); ++ for(i = 0; i < clust->nr_pages; i++) { ++ assert("edward-1519", clust->pages[i]); ++ assert("edward-1520", PageUptodate(clust->pages[i])); ++ ++ result = find_or_create_extent(clust->pages[i]); ++ if (result) ++ break; ++ } ++ if (!result) { ++ uf_info->container = UF_CONTAINER_EXTENTS; ++ complete_file_conversion(inode); ++ } ++ out: ++ all_grabbed2free(); ++ if (result) ++ warning("edward-1453", "Failed to convert file %llu: %i", ++ (unsigned long long)get_inode_oid(inode), result); ++ return result; ++} ++ ++/* Check, then perform or disable conversion if needed */ ++int write_conversion_hook(struct file *file, struct inode * inode, loff_t pos, ++ reiser4_cluster_t * clust, int * progress) ++{ ++ int result; ++ int check_compress = 0; ++ int compressible = 0; ++ ++ if (!conversion_enabled(inode)) ++ return 0; ++ result = check_position(inode, pos, clust, &check_compress); ++ if (result || !check_compress) ++ return result; ++ result = read_check_compressibility(inode, clust, &compressible); ++ if (result) ++ return result; ++ ++ /* At this point page cluster is grabbed and uptodate */ ++ if (!compressible) { ++ result = cryptcompress2unixfile(file, inode, clust); ++ if (result == 0) ++ *progress = 1; ++ } ++ else ++ result = disable_conversion(inode); ++ ++ reiser4_release_cluster_pages(clust); ++ return result; ++} ++ ++static int setattr_conversion_hook(struct inode * inode, struct iattr *attr) ++{ ++ return (attr->ia_valid & ATTR_SIZE ? disable_conversion(inode) : 0); ++} ++ ++/* Protected methods of cryptcompress file plugin constructed ++ by the macros above */ ++ ++/* Wrappers with active protection for: ++ . write_cryptcompress; ++ . setattr_cryptcompress; ++*/ ++ ++ssize_t prot_write_cryptcompress(struct file *file, const char __user *buf, ++ size_t count, loff_t *off) ++{ ++ int prot = 0; ++ int conv = 0; ++ ssize_t written_cr = 0; ++ ssize_t written_uf = 0; ++ struct inode * inode = file->f_dentry->d_inode; ++ struct rw_semaphore * guard = &reiser4_inode_data(inode)->conv_sem; ++ ++ if (should_protect(inode)) { ++ prot = 1; ++ down_write(guard); ++ } ++ written_cr = write_cryptcompress(file, buf, count, off, &conv); ++ if (prot) ++ up_write(guard); ++ if (written_cr < 0) ++ return written_cr; ++ if (conv) ++ written_uf = write_unix_file(file, buf + written_cr, ++ count - written_cr, off); ++ return written_cr + (written_uf < 0 ? 0 : written_uf); ++} ++ ++int prot_setattr_cryptcompress(struct dentry *dentry, struct iattr *attr) ++{ ++ struct inode * inode = dentry->d_inode; ++ return PROT_ACTIVE(int, setattr, (dentry, attr), ++ setattr_conversion_hook(inode, attr)); ++} ++ ++/* Wrappers with passive protection for: ++ . read_cryptcomperess; ++ . mmap_cryptcompress; ++ . release_cryptcompress; ++ . sendfile_cryptcompress; ++ . delete_object_cryptcompress. ++*/ ++ssize_t prot_read_cryptcompress(struct file * file, char __user * buf, ++ size_t size, loff_t * off) ++{ ++ struct inode * inode = file->f_dentry->d_inode; ++ return PROT_PASSIVE(ssize_t, read, (file, buf, size, off)); ++} ++ ++int prot_mmap_cryptcompress(struct file *file, struct vm_area_struct *vma) ++{ ++ struct inode *inode = file->f_dentry->d_inode; ++ return PROT_PASSIVE(int, mmap, (file, vma)); ++} ++ ++int prot_release_cryptcompress(struct inode *inode, struct file *file) ++{ ++ return PROT_PASSIVE(int, release, (inode, file)); ++} ++ ++ssize_t prot_sendfile_cryptcompress(struct file *file, loff_t *ppos, ++ size_t count, read_actor_t actor, ++ void *target) ++{ ++ struct inode * inode = file->f_dentry->d_inode; ++ return PROT_PASSIVE(ssize_t, sendfile, ++ (file, ppos, count, actor, target)); ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/file/invert.c b/fs/reiser4/plugin/file/invert.c +new file mode 100644 +index 0000000..7349878 +--- /dev/null ++++ b/fs/reiser4/plugin/file/invert.c +@@ -0,0 +1,493 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Suppose you want to conveniently read and write a large variety of small files conveniently within a single emacs ++ buffer, without having a separate buffer for each 8 byte or so file. Inverts are the way to do that. An invert ++ provides you with the contents of a set of subfiles plus its own contents. It is a file which inherits other files ++ when you read it, and allows you to write to it and through it to the files that it inherits from. In order for it ++ to know which subfiles each part of your write should go into, there must be delimiters indicating that. It tries to ++ make that easy for you by providing those delimiters in what you read from it. ++ ++ When you read it, an invert performs an inverted assignment. Instead of taking an assignment command and writing a ++ bunch of files, it takes a bunch of files and composes an assignment command for you to read from it that if executed ++ would create those files. But which files? Well, that must be specified in the body of the invert using a special ++ syntax, and that specification is called the invert of the assignment. ++ ++ When written to, an invert performs the assignment command that is written ++ to it, and modifies its own body to contain the invert of that ++ assignment. ++ ++ In other words, writing to an invert file what you have read from it ++ is the identity operation. ++ ++ Malformed assignments cause write errors. Partial writes are not ++ supported in v4.0, but will be. ++ ++ Example: ++ ++ If an invert contains: ++ ++ /filenameA/<>+"(some text stored in the invert)+/filenameB/<> ++ ++====================== ++Each element in this definition should be an invert, and all files ++should be called recursively - too. This is bad. If one of the ++included files in not a regular or invert file, then we can't read ++main file. ++ ++I think to make it is possible easier: ++ ++internal structure of invert file should be like symlink file. But ++read and write method should be explitely indicated in i/o operation.. ++ ++By default we read and write (if probably) as symlink and if we ++specify ..invert at reading time that too we can specify it at write time. ++ ++example: ++/my_invert_file/..invert<- ( (/filenameA<-"(The contents of filenameA))+"(some text stored in the invert)+(/filenameB<-"(The contents of filenameB) ) ) ++will create /my_invert_file as invert, and will creat /filenameA and /filenameB with specified body. ++ ++read of /my_invert_file/..invert will be ++/filenameA<-"(The contents of filenameA)+"(some text stored in the invert)+/filenameB<-"(The contents of filenameB) ++ ++but read of /my_invert_file/ will be ++The contents of filenameAsome text stored in the invertThe contents of filenameB ++ ++we also can creat this file as ++/my_invert_file/<-/filenameA+"(some text stored in the invert)+/filenameB ++will create /my_invert_file , and use existing files /filenameA and /filenameB. ++ ++and when we will read it will be as previously invert file. ++ ++This is correct? ++ ++ vv ++DEMIDOV-FIXME-HANS: ++ ++Maybe you are right, but then you must disable writes to /my_invert_file/ and only allow writes to /my_invert_file/..invert ++ ++Do you agree? Discuss it on reiserfs-list.... ++ ++-Hans ++======================= ++ ++ Then a read will return: ++ ++ /filenameA<-"(The contents of filenameA)+"(some text stored in the invert)+/filenameB<-"(The contents of filenameB) ++ ++ and a write of the line above to the invert will set the contents of ++ the invert and filenameA and filenameB to their original values. ++ ++ Note that the contents of an invert have no influence on the effect ++ of a write unless the write is a partial write (and a write of a ++ shorter file without using truncate first is a partial write). ++ ++ truncate() has no effect on filenameA and filenameB, it merely ++ resets the value of the invert. ++ ++ Writes to subfiles via the invert are implemented by preceding them ++ with truncates. ++ ++ Parse failures cause write failures. ++ ++ Questions to ponder: should the invert be acted on prior to file ++ close when writing to an open filedescriptor? ++ ++ Example: ++ ++ If an invert contains: ++ ++ "(This text and a pair of quotes are all that is here.) ++ ++Then a read will return: ++ ++ "(This text and a pair of quotes are all that is here.) ++ ++*/ ++ ++/* OPEN method places a struct file in memory associated with invert body ++ and returns something like file descriptor to the user for the future access ++ to the invert file. ++ During opening we parse the body of invert and get a list of the 'entryes' ++ (that describes all its subfiles) and place pointer on the first struct in ++ reiserfs-specific part of invert inode (arbitrary decision). ++ ++ Each subfile is described by the struct inv_entry that has a pointer @sd on ++ in-core based stat-data and a pointer on struct file @f (if we find that the ++ subfile uses more then one unformated node (arbitrary decision), we load ++ struct file in memory, otherwise we load base stat-data (and maybe 1-2 bytes ++ of some other information we need) ++ ++ Since READ and WRITE methods for inverts were formulated in assignment ++ language, they don't contain arguments 'size' and 'offset' that make sense ++ only in ordinary read/write methods. ++ ++ READ method is a combination of two methods: ++ 1) ordinary read method (with offset=0, lenght = @f->...->i_size) for entries ++ with @f != 0, this method uses pointer on struct file as an argument ++ 2) read method for inode-less files with @sd != 0, this method uses ++ in-core based stat-data instead struct file as an argument. ++ in the first case we don't use pagecache, just copy data that we got after ++ cbk() into userspace. ++ ++ WRITE method for invert files is more complex. ++ Besides declared WRITE-interface in assignment languageb above we need ++ to have an opportunity to edit unwrapped body of invert file with some ++ text editor, it means we need GENERIC WRITE METHOD for invert file: ++ ++ my_invert_file/..invert <- "string" ++ ++ this method parses "string" and looks for correct subfile signatures, also ++ the parsing process splits this "string" on the set of flows in accordance ++ with the set of subfiles specified by this signarure. ++ The found list of signatures #S is compared with the opened one #I of invert ++ file. If it doesn't have this one (#I==0, it will be so for instance if we ++ have just create this invert file) the write method assignes found signature ++ (#I=#S;) to the invert file. Then if #I==#S, generic write method splits ++ itself to the some write methods for ordinary or light-weight, or call itself ++ recursively for invert files with corresponding flows. ++ I am not sure, but the list of signatures looks like what mr.Demidov means ++ by 'delimiters'. ++ ++ The cases when #S<#I (#I<#S) (in the sense of set-theory) are also available ++ and cause delete (create new) subfiles (arbitrary decision - it may looks ++ too complex, but this interface will be the completest). The order of entries ++ of list #S (#I) and inherited order on #I (#S) must coincide. ++ The other parsing results give malformed signature that aborts READ method ++ and releases all resources. ++ ++ Format of subfile (entry) signature: ++ ++ "START_MAGIC"<>(TYPE="...",LOOKUP_ARG="...")SUBFILE_BODY"END_MAGIC" ++ ++ Legend: ++ ++ START_MAGIC - keyword indicates the start of subfile signature; ++ ++ <> indicates the start of 'subfile metadata', that is the pair ++ (TYPE="...",LOOKUP_ARG="...") in parenthesis separated by comma. ++ ++ TYPE - the string "type" indicates the start of one of the three words: ++ - ORDINARY_FILE, ++ - LIGHT_WEIGHT_FILE, ++ - INVERT_FILE; ++ ++ LOOKUP_ARG - lookup argument depends on previous type: ++ */ ++ ++ /************************************************************/ ++ /* TYPE * LOOKUP ARGUMENT */ ++ /************************************************************/ ++ /* LIGH_WEIGHT_FILE * stat-data key */ ++ /************************************************************/ ++ /* ORDINARY_FILE * filename */ ++ /************************************************************/ ++ /* INVERT_FILE * filename */ ++ /************************************************************/ ++ ++ /* where: ++ *stat-data key - the string contains stat data key of this subfile, it will be ++ passed to fast-access lookup method for light-weight files; ++ *filename - pathname of this subfile, iyt well be passed to VFS lookup methods ++ for ordinary and invert files; ++ ++ SUBFILE_BODY - data of this subfile (it will go to the flow) ++ END_MAGIC - the keyword indicates the end of subfile signature. ++ ++ The other simbols inside the signature interpreted as 'unformatted content', ++ which is available with VFS's read_link() (arbitraruy decision). ++ ++ NOTE: Parse method for a body of invert file uses mentioned signatures _without_ ++ subfile bodies. ++ ++ Now the only unclear thing is WRITE in regular light-weight subfile A that we ++ can describe only in assignment language: ++ ++ A <- "some_string" ++ ++ I guess we don't want to change stat-data and body items of file A ++ if this file exist, and size(A) != size("some_string") because this operation is ++ expencive, so we only do the partial write if size(A) > size("some_string") ++ and do truncate of the "some_string", and then do A <- "truncated string", if ++ size(A) < size("some_string"). This decision is also arbitrary.. ++ */ ++ ++/* here is infrastructure for formated flows */ ++ ++#define SUBFILE_HEADER_MAGIC 0x19196605 ++#define FLOW_HEADER_MAGIC 0x01194304 ++ ++#include "../plugin.h" ++#include "../../debug.h" ++#include "../../forward.h" ++#include "../object.h" ++#include "../item/item.h" ++#include "../item/static_stat.h" ++#include "../../dformat.h" ++#include "../znode.h" ++#include "../inode.h" ++ ++#include ++#include /* for struct file */ ++#include /* for struct list_head */ ++ ++typedef enum { ++ LIGHT_WEIGHT_FILE, ++ ORDINARY_FILE, ++ INVERT_FILE ++} inv_entry_type; ++ ++typedef struct flow_header { ++ d32 fl_magic; ++ d16 fl_nr; /* number of subfiles in the flow */ ++}; ++ ++typedef struct subfile_header { ++ d32 sh_magic; /* subfile magic */ ++ d16 sh_type; /* type of subfile: light-weight, ordinary, invert */ ++ d16 sh_arg_len; /* lenght of lookup argument (filename, key) */ ++ d32 sh_body_len; /* lenght of subfile body */ ++}; ++ ++/* functions to get/set fields of flow header */ ++ ++static void fl_set_magic(flow_header * fh, __u32 value) ++{ ++ cputod32(value, &fh->fh_magic); ++} ++ ++static __u32 fl_get_magic(flow_header * fh) ++{ ++ return d32tocpu(&fh->fh_magic); ++} ++static void fl_set_number(flow_header * fh, __u16 value) ++{ ++ cputod16(value, &fh->fh_nr); ++} ++static unsigned fl_get_number(flow_header * fh) ++{ ++ return d16tocpu(&fh->fh_nr); ++} ++ ++/* functions to get/set fields of subfile header */ ++ ++static void sh_set_magic(subfile_header * sh, __u32 value) ++{ ++ cputod32(value, &sh->sh_magic); ++} ++ ++static __u32 sh_get_magic(subfile_header * sh) ++{ ++ return d32tocpu(&sh->sh_magic); ++} ++static void sh_set_type(subfile_header * sh, __u16 value) ++{ ++ cputod16(value, &sh->sh_magic); ++} ++static unsigned sh_get_type(subfile_header * sh) ++{ ++ return d16tocpu(&sh->sh_magic); ++} ++static void sh_set_arg_len(subfile_header * sh, __u16 value) ++{ ++ cputod16(value, &sh->sh_arg_len); ++} ++static unsigned sh_get_arg_len(subfile_header * sh) ++{ ++ return d16tocpu(&sh->sh_arg_len); ++} ++static void sh_set_body_len(subfile_header * sh, __u32 value) ++{ ++ cputod32(value, &sh->sh_body_len); ++} ++ ++static __u32 sh_get_body_len(subfile_header * sh) ++{ ++ return d32tocpu(&sh->sh_body_len); ++} ++ ++/* in-core minimal stat-data, light-weight analog of inode */ ++ ++struct incore_sd_base { ++ umode_t isd_mode; ++ nlink_t isd_nlink; ++ loff_t isd_size; ++ char *isd_data; /* 'subflow' to write */ ++}; ++ ++/* open invert create a list of invert entries, ++ every entry is represented by structure inv_entry */ ++ ++struct inv_entry { ++ struct list_head *ie_list; ++ struct file *ie_file; /* this is NULL if the file doesn't ++ have unformated nodes */ ++ struct incore_sd_base *ie_sd; /* inode-less analog of struct file */ ++}; ++ ++/* allocate and init invert entry */ ++ ++static struct inv_entry *allocate_inv_entry(void) ++{ ++ struct inv_entry *inv_entry; ++ ++ inv_entry = reiser4_kmalloc(sizeof(struct inv_entry), GFP_KERNEL); ++ if (!inv_entry) ++ return ERR_PTR(RETERR(-ENOMEM)); ++ inv_entry->ie_file = NULL; ++ inv_entry->ie_sd = NULL; ++ INIT_LIST_HEAD(&inv_entry->ie_list); ++ return inv_entry; ++} ++ ++static int put_inv_entry(struct inv_entry *ientry) ++{ ++ int result = 0; ++ ++ assert("edward-96", ientry != NULL); ++ assert("edward-97", ientry->ie_list != NULL); ++ ++ list_del(ientry->ie_list); ++ if (ientry->ie_sd != NULL) { ++ kfree(ientry->ie_sd); ++ kfree(ientry); ++ } ++ if (ientry->ie_file != NULL) ++ result = filp_close(ientry->file, NULL); ++ return result; ++} ++ ++static int allocate_incore_sd_base(struct inv_entry *inv_entry) ++{ ++ struct incore_sd_base *isd_base assert("edward-98", inv_entry != NULL); ++ assert("edward-99", inv_entry->ie_inode = NULL); ++ assert("edward-100", inv_entry->ie_sd = NULL); ++ ++ isd_base = reiser4_kmalloc(sizeof(struct incore_sd_base), GFP_KERNEL); ++ if (!isd_base) ++ return RETERR(-ENOMEM); ++ inv_entry->ie_sd = isd_base; ++ return 0; ++} ++ ++/* this can be installed as ->init_inv_entry () method of ++ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). ++ Copies data from on-disk stat-data format into light-weight analog of inode . ++ Doesn't hanlde stat-data extensions. */ ++ ++static void sd_base_load(struct inv_entry *inv_entry, char *sd) ++{ ++ reiser4_stat_data_base *sd_base; ++ ++ assert("edward-101", inv_entry != NULL); ++ assert("edward-101", inv_entry->ie_sd != NULL); ++ assert("edward-102", sd != NULL); ++ ++ sd_base = (reiser4_stat_data_base *) sd; ++ inv_entry->incore_sd_base->isd_mode = d16tocpu(&sd_base->mode); ++ inv_entry->incore_sd_base->isd_nlink = d32tocpu(&sd_base->nlink); ++ inv_entry->incore_sd_base->isd_size = d64tocpu(&sd_base->size); ++ inv_entry->incore_sd_base->isd_data = NULL; ++} ++ ++/* initialise incore stat-data */ ++ ++static void init_incore_sd_base(struct inv_entry *inv_entry, coord_t * coord) ++{ ++ reiser4_plugin *plugin = item_plugin_by_coord(coord); ++ void *body = item_body_by_coord(coord); ++ ++ assert("edward-103", inv_entry != NULL); ++ assert("edward-104", plugin != NULL); ++ assert("edward-105", body != NULL); ++ ++ sd_base_load(inv_entry, body); ++} ++ ++/* takes a key or filename and allocates new invert_entry, ++ init and adds it into the list, ++ we use lookup_sd_by_key() for light-weight files and VFS lookup by filename */ ++ ++int get_inv_entry(struct inode *invert_inode, /* inode of invert's body */ ++ inv_entry_type type, /* LIGHT-WEIGHT or ORDINARY */ ++ const reiser4_key * key, /* key of invert entry stat-data */ ++ char *filename, /* filename of the file to be opened */ ++ int flags, int mode) ++{ ++ int result; ++ struct inv_entry *ientry; ++ ++ assert("edward-107", invert_inode != NULL); ++ ++ ientry = allocate_inv_entry(); ++ if (IS_ERR(ientry)) ++ return (PTR_ERR(ientry)); ++ ++ if (type == LIGHT_WEIGHT_FILE) { ++ coord_t coord; ++ lock_handle lh; ++ ++ assert("edward-108", key != NULL); ++ ++ init_coord(&coord); ++ init_lh(&lh); ++ result = ++ lookup_sd_by_key(reiser4_tree_by_inode(invert_inode), ++ ZNODE_READ_LOCK, &coord, &lh, key); ++ if (result == 0) ++ init_incore_sd_base(ientry, coord); ++ ++ done_lh(&lh); ++ done_coord(&coord); ++ return (result); ++ } else { ++ struct file *file = filp_open(filename, flags, mode); ++ /* FIXME_EDWARD here we need to check if we ++ did't follow to any mount point */ ++ ++ assert("edward-108", filename != NULL); ++ ++ if (IS_ERR(file)) ++ return (PTR_ERR(file)); ++ ientry->ie_file = file; ++ return 0; ++ } ++} ++ ++/* takes inode of invert, reads the body of this invert, parses it, ++ opens all invert entries and return pointer on the first inv_entry */ ++ ++struct inv_entry *open_invert(struct file *invert_file) ++{ ++ ++} ++ ++ssize_t subfile_read(struct *invert_entry, flow * f) ++{ ++ ++} ++ ++ssize_t subfile_write(struct *invert_entry, flow * f) ++{ ++ ++} ++ ++ssize_t invert_read(struct *file, flow * f) ++{ ++ ++} ++ ++ssize_t invert_write(struct *file, flow * f) ++{ ++ ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/file/symfile.c b/fs/reiser4/plugin/file/symfile.c +new file mode 100644 +index 0000000..814dfb8 +--- /dev/null ++++ b/fs/reiser4/plugin/file/symfile.c +@@ -0,0 +1,87 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Symfiles are a generalization of Unix symlinks. ++ ++ A symfile when read behaves as though you took its contents and ++ substituted them into the reiser4 naming system as the right hand side ++ of an assignment, and then read that which you had assigned to it. ++ ++ A key issue for symfiles is how to implement writes through to ++ subfiles. In general, one must have some method of determining what ++ of that which is written to the symfile is written to what subfile. ++ This can be done by use of custom plugin methods written by users, or ++ by using a few general methods we provide for those willing to endure ++ the insertion of delimiters into what is read. ++ ++ Writing to symfiles without delimiters to denote what is written to ++ what subfile is not supported by any plugins we provide in this ++ release. Our most sophisticated support for writes is that embodied ++ by the invert plugin (see invert.c). ++ ++ A read only version of the /etc/passwd file might be ++ constructed as a symfile whose contents are as follows: ++ ++ /etc/passwd/userlines/* ++ ++ or ++ ++ /etc/passwd/userlines/demidov+/etc/passwd/userlines/edward+/etc/passwd/userlines/reiser+/etc/passwd/userlines/root ++ ++ or ++ ++ /etc/passwd/userlines/(demidov+edward+reiser+root) ++ ++ A symfile with contents ++ ++ /filenameA+"(some text stored in the uninvertable symfile)+/filenameB ++ ++ will return when read ++ ++ The contents of filenameAsome text stored in the uninvertable symfileThe contents of filenameB ++ ++ and write of what has been read will not be possible to implement as ++ an identity operation because there are no delimiters denoting the ++ boundaries of what is to be written to what subfile. ++ ++ Note that one could make this a read/write symfile if one specified ++ delimiters, and the write method understood those delimiters delimited ++ what was written to subfiles. ++ ++ So, specifying the symfile in a manner that allows writes: ++ ++ /etc/passwd/userlines/demidov+"( ++ )+/etc/passwd/userlines/edward+"( ++ )+/etc/passwd/userlines/reiser+"( ++ )+/etc/passwd/userlines/root+"( ++ ) ++ ++ or ++ ++ /etc/passwd/userlines/(demidov+"( ++ )+edward+"( ++ )+reiser+"( ++ )+root+"( ++ )) ++ ++ and the file demidov might be specified as: ++ ++ /etc/passwd/userlines/demidov/username+"(:)+/etc/passwd/userlines/demidov/password+"(:)+/etc/passwd/userlines/demidov/userid+"(:)+/etc/passwd/userlines/demidov/groupid+"(:)+/etc/passwd/userlines/demidov/gecos+"(:)+/etc/passwd/userlines/demidov/home+"(:)+/etc/passwd/userlines/demidov/shell ++ ++ or ++ ++ /etc/passwd/userlines/demidov/(username+"(:)+password+"(:)+userid+"(:)+groupid+"(:)+gecos+"(:)+home+"(:)+shell) ++ ++ Notice that if the file demidov has a carriage return in it, the ++ parsing fails, but then if you put carriage returns in the wrong place ++ in a normal /etc/passwd file it breaks things also. ++ ++ Note that it is forbidden to have no text between two interpolations ++ if one wants to be able to define what parts of a write go to what ++ subfiles referenced in an interpolation. ++ ++ If one wants to be able to add new lines by writing to the file, one ++ must either write a custom plugin for /etc/passwd that knows how to ++ name an added line, or one must use an invert, or one must use a more ++ sophisticated symfile syntax that we are not planning to write for ++ version 4.0. ++*/ +diff --git a/fs/reiser4/plugin/file/symlink.c b/fs/reiser4/plugin/file/symlink.c +new file mode 100644 +index 0000000..bcf3ef8 +--- /dev/null ++++ b/fs/reiser4/plugin/file/symlink.c +@@ -0,0 +1,95 @@ ++/* Copyright 2002, 2003, 2005 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "../../inode.h" ++ ++#include ++#include ++ ++/* file plugin methods specific for symlink files ++ (SYMLINK_FILE_PLUGIN_ID) */ ++ ++/* this is implementation of create_object method of file plugin for ++ SYMLINK_FILE_PLUGIN_ID ++ */ ++ ++/** ++ * reiser4_create_symlink - create_object of file plugin for SYMLINK_FILE_PLUGIN_ID ++ * @symlink: inode of symlink object ++ * @dir: inode of parent directory ++ * @info: parameters of new object ++ * ++ * Inserts stat data with symlink extension where into the tree. ++ */ ++int reiser4_create_symlink(struct inode *symlink, ++ struct inode *dir UNUSED_ARG, ++ reiser4_object_create_data *data /* info passed to us ++ * this is filled by ++ * reiser4() syscall ++ * in particular */) ++{ ++ int result; ++ ++ assert("nikita-680", symlink != NULL); ++ assert("nikita-681", S_ISLNK(symlink->i_mode)); ++ assert("nikita-685", reiser4_inode_get_flag(symlink, REISER4_NO_SD)); ++ assert("nikita-682", dir != NULL); ++ assert("nikita-684", data != NULL); ++ assert("nikita-686", data->id == SYMLINK_FILE_PLUGIN_ID); ++ ++ /* ++ * stat data of symlink has symlink extension in which we store ++ * symlink content, that is, path symlink is pointing to. ++ */ ++ reiser4_inode_data(symlink)->extmask |= (1 << SYMLINK_STAT); ++ ++ assert("vs-838", symlink->i_private == NULL); ++ symlink->i_private = (void *)data->name; ++ ++ assert("vs-843", symlink->i_size == 0); ++ INODE_SET_FIELD(symlink, i_size, strlen(data->name)); ++ ++ /* insert stat data appended with data->name */ ++ result = inode_file_plugin(symlink)->write_sd_by_inode(symlink); ++ if (result) { ++ /* FIXME-VS: Make sure that symlink->i_private is not attached ++ to kmalloced data */ ++ INODE_SET_FIELD(symlink, i_size, 0); ++ } else { ++ assert("vs-849", symlink->i_private ++ && reiser4_inode_get_flag(symlink, ++ REISER4_GENERIC_PTR_USED)); ++ assert("vs-850", ++ !memcmp((char *)symlink->i_private, data->name, ++ (size_t) symlink->i_size + 1)); ++ } ++ return result; ++} ++ ++/* this is implementation of destroy_inode method of file plugin for ++ SYMLINK_FILE_PLUGIN_ID ++ */ ++void destroy_inode_symlink(struct inode *inode) ++{ ++ assert("edward-799", ++ inode_file_plugin(inode) == ++ file_plugin_by_id(SYMLINK_FILE_PLUGIN_ID)); ++ assert("edward-800", !is_bad_inode(inode) && is_inode_loaded(inode)); ++ assert("edward-801", reiser4_inode_get_flag(inode, ++ REISER4_GENERIC_PTR_USED)); ++ assert("vs-839", S_ISLNK(inode->i_mode)); ++ ++ kfree(inode->i_private); ++ inode->i_private = NULL; ++ reiser4_inode_clr_flag(inode, REISER4_GENERIC_PTR_USED); ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/file/tail_conversion.c b/fs/reiser4/plugin/file/tail_conversion.c +new file mode 100644 +index 0000000..b57776f +--- /dev/null ++++ b/fs/reiser4/plugin/file/tail_conversion.c +@@ -0,0 +1,726 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "../../inode.h" ++#include "../../super.h" ++#include "../../page_cache.h" ++#include "../../carry.h" ++#include "../../safe_link.h" ++#include "../../vfs_ops.h" ++ ++#include ++ ++/* this file contains: ++ tail2extent and extent2tail */ ++ ++/* exclusive access to a file is acquired when file state changes: tail2extent, empty2tail, extent2tail, etc */ ++void get_exclusive_access(unix_file_info_t * uf_info) ++{ ++ assert("nikita-3028", reiser4_schedulable()); ++ assert("nikita-3047", LOCK_CNT_NIL(inode_sem_w)); ++ assert("nikita-3048", LOCK_CNT_NIL(inode_sem_r)); ++ /* ++ * "deadlock avoidance": sometimes we commit a transaction under ++ * rw-semaphore on a file. Such commit can deadlock with another ++ * thread that captured some block (hence preventing atom from being ++ * committed) and waits on rw-semaphore. ++ */ ++ reiser4_txn_restart_current(); ++ LOCK_CNT_INC(inode_sem_w); ++ down_write(&uf_info->latch); ++ uf_info->exclusive_use = 1; ++ assert("vs-1713", uf_info->ea_owner == NULL); ++ assert("vs-1713", atomic_read(&uf_info->nr_neas) == 0); ++ ON_DEBUG(uf_info->ea_owner = current); ++} ++ ++void drop_exclusive_access(unix_file_info_t * uf_info) ++{ ++ assert("vs-1714", uf_info->ea_owner == current); ++ assert("vs-1715", atomic_read(&uf_info->nr_neas) == 0); ++ ON_DEBUG(uf_info->ea_owner = NULL); ++ uf_info->exclusive_use = 0; ++ up_write(&uf_info->latch); ++ assert("nikita-3049", LOCK_CNT_NIL(inode_sem_r)); ++ assert("nikita-3049", LOCK_CNT_GTZ(inode_sem_w)); ++ LOCK_CNT_DEC(inode_sem_w); ++ reiser4_txn_restart_current(); ++} ++ ++/** ++ * nea_grabbed - do something when file semaphore is down_read-ed ++ * @uf_info: ++ * ++ * This is called when nonexclisive access is obtained on file. All it does is ++ * for debugging purposes. ++ */ ++static void nea_grabbed(unix_file_info_t *uf_info) ++{ ++#if REISER4_DEBUG ++ LOCK_CNT_INC(inode_sem_r); ++ assert("vs-1716", uf_info->ea_owner == NULL); ++ atomic_inc(&uf_info->nr_neas); ++ uf_info->last_reader = current; ++#endif ++} ++ ++/** ++ * get_nonexclusive_access - get nonexclusive access to a file ++ * @uf_info: unix file specific part of inode to obtain access to ++ * ++ * Nonexclusive access is obtained on a file before read, write, readpage. ++ */ ++void get_nonexclusive_access(unix_file_info_t *uf_info) ++{ ++ assert("nikita-3029", reiser4_schedulable()); ++ assert("nikita-3361", get_current_context()->trans->atom == NULL); ++ ++ down_read(&uf_info->latch); ++ nea_grabbed(uf_info); ++} ++ ++/** ++ * try_to_get_nonexclusive_access - try to get nonexclusive access to a file ++ * @uf_info: unix file specific part of inode to obtain access to ++ * ++ * Non-blocking version of nonexclusive access obtaining. ++ */ ++int try_to_get_nonexclusive_access(unix_file_info_t *uf_info) ++{ ++ int result; ++ ++ result = down_read_trylock(&uf_info->latch); ++ if (result) ++ nea_grabbed(uf_info); ++ return result; ++} ++ ++void drop_nonexclusive_access(unix_file_info_t * uf_info) ++{ ++ assert("vs-1718", uf_info->ea_owner == NULL); ++ assert("vs-1719", atomic_read(&uf_info->nr_neas) > 0); ++ ON_DEBUG(atomic_dec(&uf_info->nr_neas)); ++ ++ up_read(&uf_info->latch); ++ ++ LOCK_CNT_DEC(inode_sem_r); ++ reiser4_txn_restart_current(); ++} ++ ++/* part of tail2extent. Cut all items covering @count bytes starting from ++ @offset */ ++/* Audited by: green(2002.06.15) */ ++static int cut_formatting_items(struct inode *inode, loff_t offset, int count) ++{ ++ reiser4_key from, to; ++ ++ /* AUDIT: How about putting an assertion here, what would check ++ all provided range is covered by tail items only? */ ++ /* key of first byte in the range to be cut */ ++ inode_file_plugin(inode)->key_by_inode(inode, offset, &from); ++ ++ /* key of last byte in that range */ ++ to = from; ++ set_key_offset(&to, (__u64) (offset + count - 1)); ++ ++ /* cut everything between those keys */ ++ return reiser4_cut_tree(reiser4_tree_by_inode(inode), &from, &to, ++ inode, 0); ++} ++ ++static void release_all_pages(struct page **pages, unsigned nr_pages) ++{ ++ unsigned i; ++ ++ for (i = 0; i < nr_pages; i++) { ++ if (pages[i] == NULL) { ++ unsigned j; ++ for (j = i + 1; j < nr_pages; j++) ++ assert("vs-1620", pages[j] == NULL); ++ break; ++ } ++ page_cache_release(pages[i]); ++ pages[i] = NULL; ++ } ++} ++ ++/* part of tail2extent. replace tail items with extent one. Content of tail ++ items (@count bytes) being cut are copied already into ++ pages. extent_writepage method is called to create extents corresponding to ++ those pages */ ++static int replace(struct inode *inode, struct page **pages, unsigned nr_pages, int count) ++{ ++ int result; ++ unsigned i; ++ STORE_COUNTERS; ++ ++ if (nr_pages == 0) ++ return 0; ++ ++ assert("vs-596", pages[0]); ++ ++ /* cut copied items */ ++ result = cut_formatting_items(inode, page_offset(pages[0]), count); ++ if (result) ++ return result; ++ ++ CHECK_COUNTERS; ++ ++ /* put into tree replacement for just removed items: extent item, namely */ ++ for (i = 0; i < nr_pages; i++) { ++ result = add_to_page_cache_lru(pages[i], inode->i_mapping, ++ pages[i]->index, ++ mapping_gfp_mask(inode-> ++ i_mapping)); ++ if (result) ++ break; ++ unlock_page(pages[i]); ++ result = find_or_create_extent(pages[i]); ++ if (result) ++ break; ++ SetPageUptodate(pages[i]); ++ } ++ return result; ++} ++ ++#define TAIL2EXTENT_PAGE_NUM 3 /* number of pages to fill before cutting tail ++ * items */ ++ ++static int reserve_tail2extent_iteration(struct inode *inode) ++{ ++ reiser4_block_nr unformatted_nodes; ++ reiser4_tree *tree; ++ ++ tree = reiser4_tree_by_inode(inode); ++ ++ /* number of unformatted nodes which will be created */ ++ unformatted_nodes = TAIL2EXTENT_PAGE_NUM; ++ ++ /* ++ * space required for one iteration of extent->tail conversion: ++ * ++ * 1. kill N tail items ++ * ++ * 2. insert TAIL2EXTENT_PAGE_NUM unformatted nodes ++ * ++ * 3. insert TAIL2EXTENT_PAGE_NUM (worst-case single-block ++ * extents) extent units. ++ * ++ * 4. drilling to the leaf level by coord_by_key() ++ * ++ * 5. possible update of stat-data ++ * ++ */ ++ grab_space_enable(); ++ return reiser4_grab_space ++ (2 * tree->height + ++ TAIL2EXTENT_PAGE_NUM + ++ TAIL2EXTENT_PAGE_NUM * estimate_one_insert_into_item(tree) + ++ 1 + estimate_one_insert_item(tree) + ++ inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT); ++} ++ ++/* clear stat data's flag indicating that conversion is being converted */ ++static int complete_conversion(struct inode *inode) ++{ ++ int result; ++ ++ grab_space_enable(); ++ result = ++ reiser4_grab_space(inode_file_plugin(inode)->estimate.update(inode), ++ BA_CAN_COMMIT); ++ if (result == 0) { ++ reiser4_inode_clr_flag(inode, REISER4_PART_MIXED); ++ result = reiser4_update_sd(inode); ++ } ++ if (result) ++ warning("vs-1696", "Failed to clear converting bit of %llu: %i", ++ (unsigned long long)get_inode_oid(inode), result); ++ return 0; ++} ++ ++/** ++ * find_start ++ * @inode: ++ * @id: ++ * @offset: ++ * ++ * this is used by tail2extent and extent2tail to detect where previous ++ * uncompleted conversion stopped ++ */ ++static int find_start(struct inode *inode, reiser4_plugin_id id, __u64 *offset) ++{ ++ int result; ++ lock_handle lh; ++ coord_t coord; ++ unix_file_info_t *ufo; ++ int found; ++ reiser4_key key; ++ ++ ufo = unix_file_inode_data(inode); ++ init_lh(&lh); ++ result = 0; ++ found = 0; ++ inode_file_plugin(inode)->key_by_inode(inode, *offset, &key); ++ do { ++ init_lh(&lh); ++ result = find_file_item_nohint(&coord, &lh, &key, ++ ZNODE_READ_LOCK, inode); ++ ++ if (result == CBK_COORD_FOUND) { ++ if (coord.between == AT_UNIT) { ++ /*coord_clear_iplug(&coord); */ ++ result = zload(coord.node); ++ if (result == 0) { ++ if (item_id_by_coord(&coord) == id) ++ found = 1; ++ else ++ item_plugin_by_coord(&coord)->s. ++ file.append_key(&coord, ++ &key); ++ zrelse(coord.node); ++ } ++ } else ++ result = RETERR(-ENOENT); ++ } ++ done_lh(&lh); ++ } while (result == 0 && !found); ++ *offset = get_key_offset(&key); ++ return result; ++} ++ ++/** ++ * tail2extent ++ * @uf_info: ++ * ++ * ++ */ ++int tail2extent(unix_file_info_t *uf_info) ++{ ++ int result; ++ reiser4_key key; /* key of next byte to be moved to page */ ++ char *p_data; /* data of page */ ++ unsigned page_off = 0, /* offset within the page where to copy data */ ++ count; /* number of bytes of item which can be ++ * copied to page */ ++ struct page *pages[TAIL2EXTENT_PAGE_NUM]; ++ struct page *page; ++ int done; /* set to 1 when all file is read */ ++ char *item; ++ int i; ++ struct inode *inode; ++ int first_iteration; ++ int bytes; ++ __u64 offset; ++ ++ assert("nikita-3362", ea_obtained(uf_info)); ++ inode = unix_file_info_to_inode(uf_info); ++ assert("nikita-3412", !IS_RDONLY(inode)); ++ assert("vs-1649", uf_info->container != UF_CONTAINER_EXTENTS); ++ assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV)); ++ ++ offset = 0; ++ first_iteration = 1; ++ result = 0; ++ if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) { ++ /* ++ * file is marked on disk as there was a conversion which did ++ * not complete due to either crash or some error. Find which ++ * offset tail conversion stopped at ++ */ ++ result = find_start(inode, FORMATTING_ID, &offset); ++ if (result == -ENOENT) { ++ /* no tail items found, everything is converted */ ++ uf_info->container = UF_CONTAINER_EXTENTS; ++ complete_conversion(inode); ++ return 0; ++ } else if (result != 0) ++ /* some other error */ ++ return result; ++ first_iteration = 0; ++ } ++ ++ reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV); ++ ++ /* get key of first byte of a file */ ++ inode_file_plugin(inode)->key_by_inode(inode, offset, &key); ++ ++ done = 0; ++ while (done == 0) { ++ memset(pages, 0, sizeof(pages)); ++ result = reserve_tail2extent_iteration(inode); ++ if (result != 0) ++ goto out; ++ if (first_iteration) { ++ reiser4_inode_set_flag(inode, REISER4_PART_MIXED); ++ reiser4_update_sd(inode); ++ first_iteration = 0; ++ } ++ bytes = 0; ++ for (i = 0; i < sizeof_array(pages) && done == 0; i++) { ++ assert("vs-598", ++ (get_key_offset(&key) & ~PAGE_CACHE_MASK) == 0); ++ page = alloc_page(reiser4_ctx_gfp_mask_get()); ++ if (!page) { ++ result = RETERR(-ENOMEM); ++ goto error; ++ } ++ ++ page->index = ++ (unsigned long)(get_key_offset(&key) >> ++ PAGE_CACHE_SHIFT); ++ /* ++ * usually when one is going to longterm lock znode (as ++ * find_file_item does, for instance) he must not hold ++ * locked pages. However, there is an exception for ++ * case tail2extent. Pages appearing here are not ++ * reachable to everyone else, they are clean, they do ++ * not have jnodes attached so keeping them locked do ++ * not risk deadlock appearance ++ */ ++ assert("vs-983", !PagePrivate(page)); ++ reiser4_invalidate_pages(inode->i_mapping, page->index, ++ 1, 0); ++ ++ for (page_off = 0; page_off < PAGE_CACHE_SIZE;) { ++ coord_t coord; ++ lock_handle lh; ++ ++ /* get next item */ ++ /* FIXME: we might want to readahead here */ ++ init_lh(&lh); ++ result = ++ find_file_item_nohint(&coord, &lh, &key, ++ ZNODE_READ_LOCK, ++ inode); ++ if (result != CBK_COORD_FOUND) { ++ /* ++ * error happened of not items of file ++ * were found ++ */ ++ done_lh(&lh); ++ page_cache_release(page); ++ goto error; ++ } ++ ++ if (coord.between == AFTER_UNIT) { ++ /* ++ * end of file is reached. Padd page ++ * with zeros ++ */ ++ done_lh(&lh); ++ done = 1; ++ p_data = kmap_atomic(page, KM_USER0); ++ memset(p_data + page_off, 0, ++ PAGE_CACHE_SIZE - page_off); ++ kunmap_atomic(p_data, KM_USER0); ++ break; ++ } ++ ++ result = zload(coord.node); ++ if (result) { ++ page_cache_release(page); ++ done_lh(&lh); ++ goto error; ++ } ++ assert("vs-856", coord.between == AT_UNIT); ++ item = ((char *)item_body_by_coord(&coord)) + ++ coord.unit_pos; ++ ++ /* how many bytes to copy */ ++ count = ++ item_length_by_coord(&coord) - ++ coord.unit_pos; ++ /* limit length of copy to end of page */ ++ if (count > PAGE_CACHE_SIZE - page_off) ++ count = PAGE_CACHE_SIZE - page_off; ++ ++ /* ++ * copy item (as much as will fit starting from ++ * the beginning of the item) into the page ++ */ ++ p_data = kmap_atomic(page, KM_USER0); ++ memcpy(p_data + page_off, item, count); ++ kunmap_atomic(p_data, KM_USER0); ++ ++ page_off += count; ++ bytes += count; ++ set_key_offset(&key, ++ get_key_offset(&key) + count); ++ ++ zrelse(coord.node); ++ done_lh(&lh); ++ } /* end of loop which fills one page by content of ++ * formatting items */ ++ ++ if (page_off) { ++ /* something was copied into page */ ++ pages[i] = page; ++ } else { ++ page_cache_release(page); ++ assert("vs-1648", done == 1); ++ break; ++ } ++ } /* end of loop through pages of one conversion iteration */ ++ ++ if (i > 0) { ++ result = replace(inode, pages, i, bytes); ++ release_all_pages(pages, sizeof_array(pages)); ++ if (result) ++ goto error; ++ /* ++ * we have to drop exclusive access to avoid deadlock ++ * which may happen because called by ++ * reiser4_writepages capture_unix_file requires to get ++ * non-exclusive access to a file. It is safe to drop ++ * EA in the middle of tail2extent conversion because ++ * write_unix_file/unix_setattr(truncate)/release_unix_file(extent2tail) ++ * are serialized by reiser4_inode->mutex_write semaphore and ++ * because read_unix_file works (should at least) on ++ * partially converted files ++ */ ++ drop_exclusive_access(uf_info); ++ /* throttle the conversion */ ++ reiser4_throttle_write(inode); ++ get_exclusive_access(uf_info); ++ ++ /* ++ * nobody is allowed to complete conversion but a ++ * process which started it ++ */ ++ assert("", reiser4_inode_get_flag(inode, ++ REISER4_PART_MIXED)); ++ } ++ } ++ ++ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV); ++ ++ if (result == 0) { ++ /* file is converted to extent items */ ++ assert("vs-1697", reiser4_inode_get_flag(inode, ++ REISER4_PART_MIXED)); ++ ++ uf_info->container = UF_CONTAINER_EXTENTS; ++ complete_conversion(inode); ++ } else { ++ /* ++ * conversion is not complete. Inode was already marked as ++ * REISER4_PART_CONV and stat-data were updated at the first ++ * iteration of the loop above. ++ */ ++ error: ++ release_all_pages(pages, sizeof_array(pages)); ++ warning("nikita-2282", "Partial conversion of %llu: %i", ++ (unsigned long long)get_inode_oid(inode), result); ++ } ++ ++ out: ++ return result; ++} ++ ++static int reserve_extent2tail_iteration(struct inode *inode) ++{ ++ reiser4_tree *tree; ++ ++ tree = reiser4_tree_by_inode(inode); ++ /* ++ * reserve blocks for (in this order): ++ * ++ * 1. removal of extent item ++ * ++ * 2. insertion of tail by insert_flow() ++ * ++ * 3. drilling to the leaf level by coord_by_key() ++ * ++ * 4. possible update of stat-data ++ */ ++ grab_space_enable(); ++ return reiser4_grab_space ++ (estimate_one_item_removal(tree) + ++ estimate_insert_flow(tree->height) + ++ 1 + estimate_one_insert_item(tree) + ++ inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT); ++} ++ ++/* for every page of file: read page, cut part of extent pointing to this page, ++ put data of page tree by tail item */ ++int extent2tail(unix_file_info_t *uf_info) ++{ ++ int result; ++ struct inode *inode; ++ struct page *page; ++ unsigned long num_pages, i; ++ unsigned long start_page; ++ reiser4_key from; ++ reiser4_key to; ++ unsigned count; ++ __u64 offset; ++ ++ assert("nikita-3362", ea_obtained(uf_info)); ++ inode = unix_file_info_to_inode(uf_info); ++ assert("nikita-3412", !IS_RDONLY(inode)); ++ assert("vs-1649", uf_info->container != UF_CONTAINER_TAILS); ++ assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV)); ++ ++ offset = 0; ++ if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) { ++ /* ++ * file is marked on disk as there was a conversion which did ++ * not complete due to either crash or some error. Find which ++ * offset tail conversion stopped at ++ */ ++ result = find_start(inode, EXTENT_POINTER_ID, &offset); ++ if (result == -ENOENT) { ++ /* no extent found, everything is converted */ ++ uf_info->container = UF_CONTAINER_TAILS; ++ complete_conversion(inode); ++ return 0; ++ } else if (result != 0) ++ /* some other error */ ++ return result; ++ } ++ ++ reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV); ++ ++ /* number of pages in the file */ ++ num_pages = ++ (inode->i_size + - offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; ++ start_page = offset >> PAGE_CACHE_SHIFT; ++ ++ inode_file_plugin(inode)->key_by_inode(inode, offset, &from); ++ to = from; ++ ++ result = 0; ++ for (i = 0; i < num_pages; i++) { ++ __u64 start_byte; ++ ++ result = reserve_extent2tail_iteration(inode); ++ if (result != 0) ++ break; ++ if (i == 0 && offset == 0) { ++ reiser4_inode_set_flag(inode, REISER4_PART_MIXED); ++ reiser4_update_sd(inode); ++ } ++ ++ page = read_mapping_page(inode->i_mapping, ++ (unsigned)(i + start_page), NULL); ++ if (IS_ERR(page)) { ++ result = PTR_ERR(page); ++ break; ++ } ++ ++ wait_on_page_locked(page); ++ ++ if (!PageUptodate(page)) { ++ page_cache_release(page); ++ result = RETERR(-EIO); ++ break; ++ } ++ ++ /* cut part of file we have read */ ++ start_byte = (__u64) (i << PAGE_CACHE_SHIFT); ++ set_key_offset(&from, start_byte); ++ set_key_offset(&to, start_byte + PAGE_CACHE_SIZE - 1); ++ /* ++ * reiser4_cut_tree_object() returns -E_REPEAT to allow atom ++ * commits during over-long truncates. But ++ * extent->tail conversion should be performed in one ++ * transaction. ++ */ ++ result = reiser4_cut_tree(reiser4_tree_by_inode(inode), &from, ++ &to, inode, 0); ++ ++ if (result) { ++ page_cache_release(page); ++ break; ++ } ++ ++ /* put page data into tree via tail_write */ ++ count = PAGE_CACHE_SIZE; ++ if ((i == (num_pages - 1)) && ++ (inode->i_size & ~PAGE_CACHE_MASK)) ++ /* last page can be incompleted */ ++ count = (inode->i_size & ~PAGE_CACHE_MASK); ++ while (count) { ++ struct dentry dentry; ++ struct file file; ++ loff_t pos; ++ ++ dentry.d_inode = inode; ++ file.f_dentry = &dentry; ++ file.private_data = NULL; ++ file.f_pos = start_byte; ++ file.private_data = NULL; ++ pos = start_byte; ++ result = reiser4_write_tail(&file, ++ (char __user *)kmap(page), ++ count, &pos); ++ reiser4_free_file_fsdata(&file); ++ if (result <= 0) { ++ warning("", "reiser4_write_tail failed"); ++ page_cache_release(page); ++ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV); ++ return result; ++ } ++ count -= result; ++ } ++ ++ /* release page */ ++ lock_page(page); ++ /* page is already detached from jnode and mapping. */ ++ assert("vs-1086", page->mapping == NULL); ++ assert("nikita-2690", ++ (!PagePrivate(page) && jprivate(page) == 0)); ++ /* waiting for writeback completion with page lock held is ++ * perfectly valid. */ ++ wait_on_page_writeback(page); ++ reiser4_drop_page(page); ++ /* release reference taken by read_cache_page() above */ ++ page_cache_release(page); ++ ++ drop_exclusive_access(uf_info); ++ /* throttle the conversion */ ++ reiser4_throttle_write(inode); ++ get_exclusive_access(uf_info); ++ /* ++ * nobody is allowed to complete conversion but a process which ++ * started it ++ */ ++ assert("", reiser4_inode_get_flag(inode, REISER4_PART_MIXED)); ++ } ++ ++ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV); ++ ++ if (i == num_pages) { ++ /* file is converted to formatted items */ ++ assert("vs-1698", reiser4_inode_get_flag(inode, ++ REISER4_PART_MIXED)); ++ assert("vs-1260", ++ inode_has_no_jnodes(reiser4_inode_data(inode))); ++ ++ uf_info->container = UF_CONTAINER_TAILS; ++ complete_conversion(inode); ++ return 0; ++ } ++ /* ++ * conversion is not complete. Inode was already marked as ++ * REISER4_PART_MIXED and stat-data were updated at the first * ++ * iteration of the loop above. ++ */ ++ warning("nikita-2282", ++ "Partial conversion of %llu: %lu of %lu: %i", ++ (unsigned long long)get_inode_oid(inode), i, ++ num_pages, result); ++ ++ return result; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/file_ops.c b/fs/reiser4/plugin/file_ops.c +new file mode 100644 +index 0000000..ef8ba9d +--- /dev/null ++++ b/fs/reiser4/plugin/file_ops.c +@@ -0,0 +1,168 @@ ++/* Copyright 2005 by Hans Reiser, licensing governed by ++ reiser4/README */ ++ ++/* this file contains typical implementations for some of methods of ++ struct file_operations and of struct address_space_operations ++*/ ++ ++#include "../inode.h" ++#include "object.h" ++ ++/* file operations */ ++ ++/* implementation of vfs's llseek method of struct file_operations for ++ typical directory can be found in readdir_common.c ++*/ ++loff_t reiser4_llseek_dir_common(struct file *, loff_t, int origin); ++ ++/* implementation of vfs's readdir method of struct file_operations for ++ typical directory can be found in readdir_common.c ++*/ ++int reiser4_readdir_common(struct file *, void *dirent, filldir_t); ++ ++/** ++ * reiser4_release_dir_common - release of struct file_operations ++ * @inode: inode of released file ++ * @file: file to release ++ * ++ * Implementation of release method of struct file_operations for typical ++ * directory. All it does is freeing of reiser4 specific file data. ++*/ ++int reiser4_release_dir_common(struct inode *inode, struct file *file) ++{ ++ reiser4_context *ctx; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ reiser4_free_file_fsdata(file); ++ reiser4_exit_context(ctx); ++ return 0; ++} ++ ++/* this is common implementation of vfs's fsync method of struct ++ file_operations ++*/ ++int reiser4_sync_common(struct file *file, struct dentry *dentry, int datasync) ++{ ++ reiser4_context *ctx; ++ int result; ++ ++ ctx = reiser4_init_context(dentry->d_inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ result = txnmgr_force_commit_all(dentry->d_inode->i_sb, 0); ++ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* this is common implementation of vfs's sendfile method of struct ++ file_operations ++ ++ Reads @count bytes from @file and calls @actor for every page read. This is ++ needed for loop back devices support. ++*/ ++#if 0 ++ssize_t ++sendfile_common(struct file *file, loff_t *ppos, size_t count, ++ read_actor_t actor, void *target) ++{ ++ reiser4_context *ctx; ++ ssize_t result; ++ ++ ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ result = generic_file_sendfile(file, ppos, count, actor, target); ++ reiser4_exit_context(ctx); ++ return result; ++} ++#endif /* 0 */ ++ ++/* address space operations */ ++ ++/* this is common implementation of vfs's prepare_write method of struct ++ address_space_operations ++*/ ++int ++prepare_write_common(struct file *file, struct page *page, unsigned from, ++ unsigned to) ++{ ++ reiser4_context *ctx; ++ int result; ++ ++ ctx = reiser4_init_context(page->mapping->host->i_sb); ++ result = do_prepare_write(file, page, from, to); ++ ++ /* don't commit transaction under inode semaphore */ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ ++ return result; ++} ++ ++/* this is helper for prepare_write_common and prepare_write_unix_file ++ */ ++int ++do_prepare_write(struct file *file, struct page *page, unsigned from, ++ unsigned to) ++{ ++ int result; ++ file_plugin *fplug; ++ struct inode *inode; ++ ++ assert("umka-3099", file != NULL); ++ assert("umka-3100", page != NULL); ++ assert("umka-3095", PageLocked(page)); ++ ++ if (to - from == PAGE_CACHE_SIZE || PageUptodate(page)) ++ return 0; ++ ++ inode = page->mapping->host; ++ fplug = inode_file_plugin(inode); ++ ++ if (page->mapping->a_ops->readpage == NULL) ++ return RETERR(-EINVAL); ++ ++ result = page->mapping->a_ops->readpage(file, page); ++ if (result != 0) { ++ SetPageError(page); ++ ClearPageUptodate(page); ++ /* All reiser4 readpage() implementations should return the ++ * page locked in case of error. */ ++ assert("nikita-3472", PageLocked(page)); ++ } else { ++ /* ++ * ->readpage() either: ++ * ++ * 1. starts IO against @page. @page is locked for IO in ++ * this case. ++ * ++ * 2. doesn't start IO. @page is unlocked. ++ * ++ * In either case, page should be locked. ++ */ ++ lock_page(page); ++ /* ++ * IO (if any) is completed at this point. Check for IO ++ * errors. ++ */ ++ if (!PageUptodate(page)) ++ result = RETERR(-EIO); ++ } ++ assert("umka-3098", PageLocked(page)); ++ return result; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/file_ops_readdir.c b/fs/reiser4/plugin/file_ops_readdir.c +new file mode 100644 +index 0000000..2bd7826 +--- /dev/null ++++ b/fs/reiser4/plugin/file_ops_readdir.c +@@ -0,0 +1,657 @@ ++/* Copyright 2005 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#include "../inode.h" ++ ++/* return true, iff @coord points to the valid directory item that is part of ++ * @inode directory. */ ++static int is_valid_dir_coord(struct inode *inode, coord_t * coord) ++{ ++ return plugin_of_group(item_plugin_by_coord(coord), ++ DIR_ENTRY_ITEM_TYPE) && ++ inode_file_plugin(inode)->owns_item(inode, coord); ++} ++ ++/* compare two logical positions within the same directory */ ++static cmp_t dir_pos_cmp(const dir_pos * p1, const dir_pos * p2) ++{ ++ cmp_t result; ++ ++ assert("nikita-2534", p1 != NULL); ++ assert("nikita-2535", p2 != NULL); ++ ++ result = de_id_cmp(&p1->dir_entry_key, &p2->dir_entry_key); ++ if (result == EQUAL_TO) { ++ int diff; ++ ++ diff = p1->pos - p2->pos; ++ result = ++ (diff < 0) ? LESS_THAN : (diff ? GREATER_THAN : EQUAL_TO); ++ } ++ return result; ++} ++ ++/* see comment before reiser4_readdir_common() for overview of why "adjustment" is ++ * necessary. */ ++static void ++adjust_dir_pos(struct file *dir, ++ readdir_pos * readdir_spot, const dir_pos * mod_point, int adj) ++{ ++ dir_pos *pos; ++ ++ /* ++ * new directory entry was added (adj == +1) or removed (adj == -1) at ++ * the @mod_point. Directory file descriptor @dir is doing readdir and ++ * is currently positioned at @readdir_spot. Latter has to be updated ++ * to maintain stable readdir. ++ */ ++ /* directory is positioned to the beginning. */ ++ if (readdir_spot->entry_no == 0) ++ return; ++ ++ pos = &readdir_spot->position; ++ switch (dir_pos_cmp(mod_point, pos)) { ++ case LESS_THAN: ++ /* @mod_pos is _before_ @readdir_spot, that is, entry was ++ * added/removed on the left (in key order) of current ++ * position. */ ++ /* logical number of directory entry readdir is "looking" at ++ * changes */ ++ readdir_spot->entry_no += adj; ++ assert("nikita-2577", ++ ergo(dir != NULL, reiser4_get_dir_fpos(dir) + adj >= 0)); ++ if (de_id_cmp(&pos->dir_entry_key, ++ &mod_point->dir_entry_key) == EQUAL_TO) { ++ assert("nikita-2575", mod_point->pos < pos->pos); ++ /* ++ * if entry added/removed has the same key as current ++ * for readdir, update counter of duplicate keys in ++ * @readdir_spot. ++ */ ++ pos->pos += adj; ++ } ++ break; ++ case GREATER_THAN: ++ /* directory is modified after @pos: nothing to do. */ ++ break; ++ case EQUAL_TO: ++ /* cannot insert an entry readdir is looking at, because it ++ already exists. */ ++ assert("nikita-2576", adj < 0); ++ /* directory entry to which @pos points to is being ++ removed. ++ ++ NOTE-NIKITA: Right thing to do is to update @pos to point ++ to the next entry. This is complex (we are under spin-lock ++ for one thing). Just rewind it to the beginning. Next ++ readdir will have to scan the beginning of ++ directory. Proper solution is to use semaphore in ++ spin lock's stead and use rewind_right() here. ++ ++ NOTE-NIKITA: now, semaphore is used, so... ++ */ ++ memset(readdir_spot, 0, sizeof *readdir_spot); ++ } ++} ++ ++/* scan all file-descriptors for this directory and adjust their ++ positions respectively. Should be used by implementations of ++ add_entry and rem_entry of dir plugin */ ++void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de, ++ int offset, int adj) ++{ ++ reiser4_file_fsdata *scan; ++ dir_pos mod_point; ++ ++ assert("nikita-2536", dir != NULL); ++ assert("nikita-2538", de != NULL); ++ assert("nikita-2539", adj != 0); ++ ++ build_de_id(dir, &de->d_name, &mod_point.dir_entry_key); ++ mod_point.pos = offset; ++ ++ spin_lock_inode(dir); ++ ++ /* ++ * new entry was added/removed in directory @dir. Scan all file ++ * descriptors for @dir that are currently involved into @readdir and ++ * update them. ++ */ ++ ++ list_for_each_entry(scan, get_readdir_list(dir), dir.linkage) ++ adjust_dir_pos(scan->back, &scan->dir.readdir, &mod_point, adj); ++ ++ spin_unlock_inode(dir); ++} ++ ++/* ++ * traverse tree to start/continue readdir from the readdir position @pos. ++ */ ++static int dir_go_to(struct file *dir, readdir_pos * pos, tap_t * tap) ++{ ++ reiser4_key key; ++ int result; ++ struct inode *inode; ++ ++ assert("nikita-2554", pos != NULL); ++ ++ inode = dir->f_dentry->d_inode; ++ result = inode_dir_plugin(inode)->build_readdir_key(dir, &key); ++ if (result != 0) ++ return result; ++ result = reiser4_object_lookup(inode, ++ &key, ++ tap->coord, ++ tap->lh, ++ tap->mode, ++ FIND_EXACT, ++ LEAF_LEVEL, LEAF_LEVEL, ++ 0, &tap->ra_info); ++ if (result == CBK_COORD_FOUND) ++ result = rewind_right(tap, (int)pos->position.pos); ++ else { ++ tap->coord->node = NULL; ++ done_lh(tap->lh); ++ result = RETERR(-EIO); ++ } ++ return result; ++} ++ ++/* ++ * handling of non-unique keys: calculate at what ordinal position within ++ * sequence of directory items with identical keys @pos is. ++ */ ++static int set_pos(struct inode *inode, readdir_pos * pos, tap_t * tap) ++{ ++ int result; ++ coord_t coord; ++ lock_handle lh; ++ tap_t scan; ++ de_id *did; ++ reiser4_key de_key; ++ ++ coord_init_zero(&coord); ++ init_lh(&lh); ++ reiser4_tap_init(&scan, &coord, &lh, ZNODE_READ_LOCK); ++ reiser4_tap_copy(&scan, tap); ++ reiser4_tap_load(&scan); ++ pos->position.pos = 0; ++ ++ did = &pos->position.dir_entry_key; ++ ++ if (is_valid_dir_coord(inode, scan.coord)) { ++ ++ build_de_id_by_key(unit_key_by_coord(scan.coord, &de_key), did); ++ ++ while (1) { ++ ++ result = go_prev_unit(&scan); ++ if (result != 0) ++ break; ++ ++ if (!is_valid_dir_coord(inode, scan.coord)) { ++ result = -EINVAL; ++ break; ++ } ++ ++ /* get key of directory entry */ ++ unit_key_by_coord(scan.coord, &de_key); ++ if (de_id_key_cmp(did, &de_key) != EQUAL_TO) { ++ /* duplicate-sequence is over */ ++ break; ++ } ++ pos->position.pos++; ++ } ++ } else ++ result = RETERR(-ENOENT); ++ reiser4_tap_relse(&scan); ++ reiser4_tap_done(&scan); ++ return result; ++} ++ ++/* ++ * "rewind" directory to @offset, i.e., set @pos and @tap correspondingly. ++ */ ++static int dir_rewind(struct file *dir, readdir_pos * pos, tap_t * tap) ++{ ++ __u64 destination; ++ __s64 shift; ++ int result; ++ struct inode *inode; ++ loff_t dirpos; ++ ++ assert("nikita-2553", dir != NULL); ++ assert("nikita-2548", pos != NULL); ++ assert("nikita-2551", tap->coord != NULL); ++ assert("nikita-2552", tap->lh != NULL); ++ ++ dirpos = reiser4_get_dir_fpos(dir); ++ shift = dirpos - pos->fpos; ++ /* this is logical directory entry within @dir which we are rewinding ++ * to */ ++ destination = pos->entry_no + shift; ++ ++ inode = dir->f_dentry->d_inode; ++ if (dirpos < 0) ++ return RETERR(-EINVAL); ++ else if (destination == 0ll || dirpos == 0) { ++ /* rewind to the beginning of directory */ ++ memset(pos, 0, sizeof *pos); ++ return dir_go_to(dir, pos, tap); ++ } else if (destination >= inode->i_size) ++ return RETERR(-ENOENT); ++ ++ if (shift < 0) { ++ /* I am afraid of negative numbers */ ++ shift = -shift; ++ /* rewinding to the left */ ++ if (shift <= (int)pos->position.pos) { ++ /* destination is within sequence of entries with ++ duplicate keys. */ ++ result = dir_go_to(dir, pos, tap); ++ } else { ++ shift -= pos->position.pos; ++ while (1) { ++ /* repetitions: deadlock is possible when ++ going to the left. */ ++ result = dir_go_to(dir, pos, tap); ++ if (result == 0) { ++ result = rewind_left(tap, shift); ++ if (result == -E_DEADLOCK) { ++ reiser4_tap_done(tap); ++ continue; ++ } ++ } ++ break; ++ } ++ } ++ } else { ++ /* rewinding to the right */ ++ result = dir_go_to(dir, pos, tap); ++ if (result == 0) ++ result = rewind_right(tap, shift); ++ } ++ if (result == 0) { ++ result = set_pos(inode, pos, tap); ++ if (result == 0) { ++ /* update pos->position.pos */ ++ pos->entry_no = destination; ++ pos->fpos = dirpos; ++ } ++ } ++ return result; ++} ++ ++/* ++ * Function that is called by common_readdir() on each directory entry while ++ * doing readdir. ->filldir callback may block, so we had to release long term ++ * lock while calling it. To avoid repeating tree traversal, seal is used. If ++ * seal is broken, we return -E_REPEAT. Node is unlocked in this case. ++ * ++ * Whether node is unlocked in case of any other error is undefined. It is ++ * guaranteed to be still locked if success (0) is returned. ++ * ++ * When ->filldir() wants no more, feed_entry() returns 1, and node is ++ * unlocked. ++ */ ++static int ++feed_entry(struct file *f, ++ readdir_pos * pos, tap_t * tap, filldir_t filldir, void *dirent) ++{ ++ item_plugin *iplug; ++ char *name; ++ reiser4_key sd_key; ++ int result; ++ char buf[DE_NAME_BUF_LEN]; ++ char name_buf[32]; ++ char *local_name; ++ unsigned file_type; ++ seal_t seal; ++ coord_t *coord; ++ reiser4_key entry_key; ++ ++ coord = tap->coord; ++ iplug = item_plugin_by_coord(coord); ++ ++ /* pointer to name within the node */ ++ name = iplug->s.dir.extract_name(coord, buf); ++ assert("nikita-1371", name != NULL); ++ ++ /* key of object the entry points to */ ++ if (iplug->s.dir.extract_key(coord, &sd_key) != 0) ++ return RETERR(-EIO); ++ ++ /* we must release longterm znode lock before calling filldir to avoid ++ deadlock which may happen if filldir causes page fault. So, copy ++ name to intermediate buffer */ ++ if (strlen(name) + 1 > sizeof(name_buf)) { ++ local_name = kmalloc(strlen(name) + 1, ++ reiser4_ctx_gfp_mask_get()); ++ if (local_name == NULL) ++ return RETERR(-ENOMEM); ++ } else ++ local_name = name_buf; ++ ++ strcpy(local_name, name); ++ file_type = iplug->s.dir.extract_file_type(coord); ++ ++ unit_key_by_coord(coord, &entry_key); ++ reiser4_seal_init(&seal, coord, &entry_key); ++ ++ longterm_unlock_znode(tap->lh); ++ ++ /* ++ * send information about directory entry to the ->filldir() filler ++ * supplied to us by caller (VFS). ++ * ++ * ->filldir is entitled to do weird things. For example, ->filldir ++ * supplied by knfsd re-enters file system. Make sure no locks are ++ * held. ++ */ ++ assert("nikita-3436", lock_stack_isclean(get_current_lock_stack())); ++ ++ reiser4_txn_restart_current(); ++ result = filldir(dirent, name, (int)strlen(name), ++ /* offset of this entry */ ++ f->f_pos, ++ /* inode number of object bounden by this entry */ ++ oid_to_uino(get_key_objectid(&sd_key)), file_type); ++ if (local_name != name_buf) ++ kfree(local_name); ++ if (result < 0) ++ /* ->filldir() is satisfied. (no space in buffer, IOW) */ ++ result = 1; ++ else ++ result = reiser4_seal_validate(&seal, coord, &entry_key, ++ tap->lh, tap->mode, ++ ZNODE_LOCK_HIPRI); ++ return result; ++} ++ ++static void move_entry(readdir_pos * pos, coord_t * coord) ++{ ++ reiser4_key de_key; ++ de_id *did; ++ ++ /* update @pos */ ++ ++pos->entry_no; ++ did = &pos->position.dir_entry_key; ++ ++ /* get key of directory entry */ ++ unit_key_by_coord(coord, &de_key); ++ ++ if (de_id_key_cmp(did, &de_key) == EQUAL_TO) ++ /* we are within sequence of directory entries ++ with duplicate keys. */ ++ ++pos->position.pos; ++ else { ++ pos->position.pos = 0; ++ build_de_id_by_key(&de_key, did); ++ } ++ ++pos->fpos; ++} ++ ++/* ++ * STATELESS READDIR ++ * ++ * readdir support in reiser4 relies on ability to update readdir_pos embedded ++ * into reiser4_file_fsdata on each directory modification (name insertion and ++ * removal), see reiser4_readdir_common() function below. This obviously doesn't ++ * work when reiser4 is accessed over NFS, because NFS doesn't keep any state ++ * across client READDIR requests for the same directory. ++ * ++ * To address this we maintain a "pool" of detached reiser4_file_fsdata ++ * (d_cursor). Whenever NFS readdir request comes, we detect this, and try to ++ * find detached reiser4_file_fsdata corresponding to previous readdir ++ * request. In other words, additional state is maintained on the ++ * server. (This is somewhat contrary to the design goals of NFS protocol.) ++ * ++ * To efficiently detect when our ->readdir() method is called by NFS server, ++ * dentry is marked as "stateless" in reiser4_decode_fh() (this is checked by ++ * file_is_stateless() function). ++ * ++ * To find out d_cursor in the pool, we encode client id (cid) in the highest ++ * bits of NFS readdir cookie: when first readdir request comes to the given ++ * directory from the given client, cookie is set to 0. This situation is ++ * detected, global cid_counter is incremented, and stored in highest bits of ++ * all direntry offsets returned to the client, including last one. As the ++ * only valid readdir cookie is one obtained as direntry->offset, we are ++ * guaranteed that next readdir request (continuing current one) will have ++ * current cid in the highest bits of starting readdir cookie. All d_cursors ++ * are hashed into per-super-block hash table by (oid, cid) key. ++ * ++ * In addition d_cursors are placed into per-super-block radix tree where they ++ * are keyed by oid alone. This is necessary to efficiently remove them during ++ * rmdir. ++ * ++ * At last, currently unused d_cursors are linked into special list. This list ++ * is used d_cursor_shrink to reclaim d_cursors on memory pressure. ++ * ++ */ ++ ++/* ++ * prepare for readdir. ++ */ ++static int dir_readdir_init(struct file *f, tap_t * tap, readdir_pos ** pos) ++{ ++ struct inode *inode; ++ reiser4_file_fsdata *fsdata; ++ int result; ++ ++ assert("nikita-1359", f != NULL); ++ inode = f->f_dentry->d_inode; ++ assert("nikita-1360", inode != NULL); ++ ++ if (!S_ISDIR(inode->i_mode)) ++ return RETERR(-ENOTDIR); ++ ++ /* try to find detached readdir state */ ++ result = reiser4_attach_fsdata(f, inode); ++ if (result != 0) ++ return result; ++ ++ fsdata = reiser4_get_file_fsdata(f); ++ assert("nikita-2571", fsdata != NULL); ++ if (IS_ERR(fsdata)) ++ return PTR_ERR(fsdata); ++ ++ /* add file descriptor to the readdir list hanging of directory ++ * inode. This list is used to scan "readdirs-in-progress" while ++ * inserting or removing names in the directory. */ ++ spin_lock_inode(inode); ++ if (list_empty_careful(&fsdata->dir.linkage)) ++ list_add(&fsdata->dir.linkage, get_readdir_list(inode)); ++ *pos = &fsdata->dir.readdir; ++ spin_unlock_inode(inode); ++ ++ /* move @tap to the current position */ ++ return dir_rewind(f, *pos, tap); ++} ++ ++/* this is implementation of vfs's llseek method of struct file_operations for ++ typical directory ++ See comment before reiser4_readdir_common() for explanation. ++*/ ++loff_t reiser4_llseek_dir_common(struct file * file, loff_t off, int origin) ++{ ++ reiser4_context *ctx; ++ loff_t result; ++ struct inode *inode; ++ ++ inode = file->f_dentry->d_inode; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ mutex_lock(&inode->i_mutex); ++ ++ /* update ->f_pos */ ++ result = default_llseek(file, off, origin); ++ if (result >= 0) { ++ int ff; ++ coord_t coord; ++ lock_handle lh; ++ tap_t tap; ++ readdir_pos *pos; ++ ++ coord_init_zero(&coord); ++ init_lh(&lh); ++ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK); ++ ++ ff = dir_readdir_init(file, &tap, &pos); ++ reiser4_detach_fsdata(file); ++ if (ff != 0) ++ result = (loff_t) ff; ++ reiser4_tap_done(&tap); ++ } ++ reiser4_detach_fsdata(file); ++ mutex_unlock(&inode->i_mutex); ++ ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* this is common implementation of vfs's readdir method of struct ++ file_operations ++ ++ readdir problems: ++ ++ readdir(2)/getdents(2) interface is based on implicit assumption that ++ readdir can be restarted from any particular point by supplying file system ++ with off_t-full of data. That is, file system fills ->d_off field in struct ++ dirent and later user passes ->d_off to the seekdir(3), which is, actually, ++ implemented by glibc as lseek(2) on directory. ++ ++ Reiser4 cannot restart readdir from 64 bits of data, because two last ++ components of the key of directory entry are unknown, which given 128 bits: ++ locality and type fields in the key of directory entry are always known, to ++ start readdir() from given point objectid and offset fields have to be ++ filled. ++ ++ Traditional UNIX API for scanning through directory ++ (readdir/seekdir/telldir/opendir/closedir/rewindir/getdents) is based on the ++ assumption that directory is structured very much like regular file, in ++ particular, it is implied that each name within given directory (directory ++ entry) can be uniquely identified by scalar offset and that such offset is ++ stable across the life-time of the name is identifies. ++ ++ This is manifestly not so for reiser4. In reiser4 the only stable unique ++ identifies for the directory entry is its key that doesn't fit into ++ seekdir/telldir API. ++ ++ solution: ++ ++ Within each file descriptor participating in readdir-ing of directory ++ plugin/dir/dir.h:readdir_pos is maintained. This structure keeps track of ++ the "current" directory entry that file descriptor looks at. It contains a ++ key of directory entry (plus some additional info to deal with non-unique ++ keys that we wouldn't dwell onto here) and a logical position of this ++ directory entry starting from the beginning of the directory, that is ++ ordinal number of this entry in the readdir order. ++ ++ Obviously this logical position is not stable in the face of directory ++ modifications. To work around this, on each addition or removal of directory ++ entry all file descriptors for directory inode are scanned and their ++ readdir_pos are updated accordingly (adjust_dir_pos()). ++*/ ++int reiser4_readdir_common(struct file *f /* directory file being read */, ++ void *dirent /* opaque data passed to us by VFS */, ++ filldir_t filld /* filler function passed to us ++ * by VFS */) ++{ ++ reiser4_context *ctx; ++ int result; ++ struct inode *inode; ++ coord_t coord; ++ lock_handle lh; ++ tap_t tap; ++ readdir_pos *pos; ++ ++ assert("nikita-1359", f != NULL); ++ inode = f->f_dentry->d_inode; ++ assert("nikita-1360", inode != NULL); ++ ++ if (!S_ISDIR(inode->i_mode)) ++ return RETERR(-ENOTDIR); ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ coord_init_zero(&coord); ++ init_lh(&lh); ++ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK); ++ ++ reiser4_readdir_readahead_init(inode, &tap); ++ ++ repeat: ++ result = dir_readdir_init(f, &tap, &pos); ++ if (result == 0) { ++ result = reiser4_tap_load(&tap); ++ /* scan entries one by one feeding them to @filld */ ++ while (result == 0) { ++ coord_t *coord; ++ ++ coord = tap.coord; ++ assert("nikita-2572", coord_is_existing_unit(coord)); ++ assert("nikita-3227", is_valid_dir_coord(inode, coord)); ++ ++ result = feed_entry(f, pos, &tap, filld, dirent); ++ if (result > 0) { ++ break; ++ } else if (result == 0) { ++ ++f->f_pos; ++ result = go_next_unit(&tap); ++ if (result == -E_NO_NEIGHBOR || ++ result == -ENOENT) { ++ result = 0; ++ break; ++ } else if (result == 0) { ++ if (is_valid_dir_coord(inode, coord)) ++ move_entry(pos, coord); ++ else ++ break; ++ } ++ } else if (result == -E_REPEAT) { ++ /* feed_entry() had to restart. */ ++ ++f->f_pos; ++ reiser4_tap_relse(&tap); ++ goto repeat; ++ } else ++ warning("vs-1617", ++ "reiser4_readdir_common: unexpected error %d", ++ result); ++ } ++ reiser4_tap_relse(&tap); ++ ++ if (result >= 0) ++ f->f_version = inode->i_version; ++ } else if (result == -E_NO_NEIGHBOR || result == -ENOENT) ++ result = 0; ++ reiser4_tap_done(&tap); ++ reiser4_detach_fsdata(f); ++ ++ /* try to update directory's atime */ ++ if (reiser4_grab_space_force(inode_file_plugin(inode)->estimate.update(inode), ++ BA_CAN_COMMIT) != 0) ++ warning("", "failed to update atime on readdir: %llu", ++ get_inode_oid(inode)); ++ else ++ file_accessed(f); ++ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ ++ return (result <= 0) ? result : 0; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/file_plugin_common.c b/fs/reiser4/plugin/file_plugin_common.c +new file mode 100644 +index 0000000..55d9047 +--- /dev/null ++++ b/fs/reiser4/plugin/file_plugin_common.c +@@ -0,0 +1,1007 @@ ++/* Copyright 2005 by Hans Reiser, licensing governed by ++ reiser4/README */ ++ ++/* this file contains typical implementations for most of methods of ++ file plugin ++*/ ++ ++#include "../inode.h" ++#include "object.h" ++#include "../safe_link.h" ++ ++#include ++ ++static int insert_new_sd(struct inode *inode); ++static int update_sd(struct inode *inode); ++ ++/* this is common implementation of write_sd_by_inode method of file plugin ++ either insert stat data or update it ++ */ ++int write_sd_by_inode_common(struct inode *inode /* object to save */ ) ++{ ++ int result; ++ ++ assert("nikita-730", inode != NULL); ++ ++ if (reiser4_inode_get_flag(inode, REISER4_NO_SD)) ++ /* object doesn't have stat-data yet */ ++ result = insert_new_sd(inode); ++ else ++ result = update_sd(inode); ++ if (result != 0 && result != -ENAMETOOLONG && result != -ENOMEM) ++ /* Don't issue warnings about "name is too long" */ ++ warning("nikita-2221", "Failed to save sd for %llu: %i", ++ (unsigned long long)get_inode_oid(inode), result); ++ return result; ++} ++ ++/* this is common implementation of key_by_inode method of file plugin ++ */ ++int ++key_by_inode_and_offset_common(struct inode *inode, loff_t off, ++ reiser4_key * key) ++{ ++ reiser4_key_init(key); ++ set_key_locality(key, reiser4_inode_data(inode)->locality_id); ++ set_key_ordering(key, get_inode_ordering(inode)); ++ set_key_objectid(key, get_inode_oid(inode)); /*FIXME: inode->i_ino */ ++ set_key_type(key, KEY_BODY_MINOR); ++ set_key_offset(key, (__u64) off); ++ return 0; ++} ++ ++/* this is common implementation of set_plug_in_inode method of file plugin ++ */ ++int set_plug_in_inode_common(struct inode *object /* inode to set plugin on */ , ++ struct inode *parent /* parent object */ , ++ reiser4_object_create_data * data /* creational ++ * data */ ) ++{ ++ __u64 mask; ++ ++ object->i_mode = data->mode; ++ /* this should be plugin decision */ ++ object->i_uid = current->fsuid; ++ object->i_mtime = object->i_atime = object->i_ctime = CURRENT_TIME; ++ ++ /* support for BSD style group-id assignment. See mount's manual page ++ description of bsdgroups ext2 mount options for more details */ ++ if (reiser4_is_set(object->i_sb, REISER4_BSD_GID)) ++ object->i_gid = parent->i_gid; ++ else if (parent->i_mode & S_ISGID) { ++ /* parent directory has sguid bit */ ++ object->i_gid = parent->i_gid; ++ if (S_ISDIR(object->i_mode)) ++ /* sguid is inherited by sub-directories */ ++ object->i_mode |= S_ISGID; ++ } else ++ object->i_gid = current->fsgid; ++ ++ /* this object doesn't have stat-data yet */ ++ reiser4_inode_set_flag(object, REISER4_NO_SD); ++#if 0 ++ /* this is now called after all inode plugins are initialized: ++ do_create_vfs_child after adjust_to_parent */ ++ /* setup inode and file-operations for this inode */ ++ setup_inode_ops(object, data); ++#endif ++ object->i_nlink = 0; ++ reiser4_seal_init(&reiser4_inode_data(object)->sd_seal, NULL, NULL); ++ mask = (1 << UNIX_STAT) | (1 << LIGHT_WEIGHT_STAT); ++ if (!reiser4_is_set(object->i_sb, REISER4_32_BIT_TIMES)) ++ mask |= (1 << LARGE_TIMES_STAT); ++ ++ reiser4_inode_data(object)->extmask = mask; ++ return 0; ++} ++ ++/* this is common implementation of adjust_to_parent method of file plugin for ++ regular files ++ */ ++int adjust_to_parent_common(struct inode *object /* new object */ , ++ struct inode *parent /* parent directory */ , ++ struct inode *root /* root directory */ ) ++{ ++ assert("nikita-2165", object != NULL); ++ if (parent == NULL) ++ parent = root; ++ assert("nikita-2069", parent != NULL); ++ ++ /* ++ * inherit missing plugins from parent ++ */ ++ ++ grab_plugin_pset(object, parent, PSET_FILE); ++ grab_plugin_pset(object, parent, PSET_SD); ++ grab_plugin_pset(object, parent, PSET_FORMATTING); ++ grab_plugin_pset(object, parent, PSET_PERM); ++ return 0; ++} ++ ++/* this is common implementation of adjust_to_parent method of file plugin for ++ typical directories ++ */ ++int adjust_to_parent_common_dir(struct inode *object /* new object */ , ++ struct inode *parent /* parent directory */ , ++ struct inode *root /* root directory */ ) ++{ ++ int result = 0; ++ pset_member memb; ++ ++ assert("nikita-2166", object != NULL); ++ if (parent == NULL) ++ parent = root; ++ assert("nikita-2167", parent != NULL); ++ ++ /* ++ * inherit missing plugins from parent ++ */ ++ for (memb = 0; memb < PSET_LAST; ++memb) { ++ result = grab_plugin_pset(object, parent, memb); ++ if (result != 0) ++ break; ++ } ++ return result; ++} ++ ++int adjust_to_parent_cryptcompress(struct inode *object /* new object */ , ++ struct inode *parent /* parent directory */, ++ struct inode *root /* root directory */) ++{ ++ int result; ++ result = adjust_to_parent_common(object, parent, root); ++ if (result) ++ return result; ++ assert("edward-1416", parent != NULL); ++ ++ grab_plugin_pset(object, parent, PSET_CLUSTER); ++ grab_plugin_pset(object, parent, PSET_CIPHER); ++ grab_plugin_pset(object, parent, PSET_DIGEST); ++ grab_plugin_pset(object, parent, PSET_COMPRESSION); ++ grab_plugin_pset(object, parent, PSET_COMPRESSION_MODE); ++ ++ return 0; ++} ++ ++/* this is common implementation of create_object method of file plugin ++ */ ++int reiser4_create_object_common(struct inode *object, struct inode *parent, ++ reiser4_object_create_data * data) ++{ ++ reiser4_block_nr reserve; ++ assert("nikita-744", object != NULL); ++ assert("nikita-745", parent != NULL); ++ assert("nikita-747", data != NULL); ++ assert("nikita-748", reiser4_inode_get_flag(object, REISER4_NO_SD)); ++ ++ reserve = estimate_create_common(object); ++ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) ++ return RETERR(-ENOSPC); ++ return write_sd_by_inode_common(object); ++} ++ ++static int common_object_delete_no_reserve(struct inode *inode); ++ ++/** ++ * reiser4_delete_object_common - delete_object of file_plugin ++ * @inode: inode to be deleted ++ * ++ * This is common implementation of delete_object method of file_plugin. It ++ * applies to object its deletion consists of removing two items - stat data ++ * and safe-link. ++ */ ++int reiser4_delete_object_common(struct inode *inode) ++{ ++ int result; ++ ++ assert("nikita-1477", inode != NULL); ++ /* FIXME: if file body deletion failed (i/o error, for instance), ++ inode->i_size can be != 0 here */ ++ assert("nikita-3420", inode->i_size == 0 || S_ISLNK(inode->i_mode)); ++ assert("nikita-3421", inode->i_nlink == 0); ++ ++ if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) { ++ reiser4_block_nr reserve; ++ ++ /* grab space which is needed to remove 2 items from the tree: ++ stat data and safe-link */ ++ reserve = 2 * ++ estimate_one_item_removal(reiser4_tree_by_inode(inode)); ++ if (reiser4_grab_space_force(reserve, ++ BA_RESERVED | BA_CAN_COMMIT)) ++ return RETERR(-ENOSPC); ++ result = common_object_delete_no_reserve(inode); ++ } else ++ result = 0; ++ return result; ++} ++ ++/** ++ * reiser4_delete_dir_common - delete_object of file_plugin ++ * @inode: inode to be deleted ++ * ++ * This is common implementation of delete_object method of file_plugin for ++ * typical directory. It calls done method of dir_plugin to remove "." and ++ * removes stat data and safe-link. ++ */ ++int reiser4_delete_dir_common(struct inode *inode) ++{ ++ int result; ++ dir_plugin *dplug; ++ ++ assert("", (get_current_context() && ++ get_current_context()->trans->atom == NULL)); ++ ++ dplug = inode_dir_plugin(inode); ++ assert("vs-1101", dplug && dplug->done); ++ ++ /* kill cursors which might be attached to inode */ ++ reiser4_kill_cursors(inode); ++ ++ /* grab space enough for removing two items */ ++ if (reiser4_grab_space ++ (2 * estimate_one_item_removal(reiser4_tree_by_inode(inode)), ++ BA_RESERVED | BA_CAN_COMMIT)) ++ return RETERR(-ENOSPC); ++ ++ result = dplug->done(inode); ++ if (!result) ++ result = common_object_delete_no_reserve(inode); ++ return result; ++} ++ ++/* this is common implementation of add_link method of file plugin ++ */ ++int reiser4_add_link_common(struct inode *object, struct inode *parent) ++{ ++ /* ++ * increment ->i_nlink and update ->i_ctime ++ */ ++ ++ INODE_INC_FIELD(object, i_nlink); ++ object->i_ctime = CURRENT_TIME; ++ return 0; ++} ++ ++/* this is common implementation of rem_link method of file plugin ++ */ ++int reiser4_rem_link_common(struct inode *object, struct inode *parent) ++{ ++ assert("nikita-2021", object != NULL); ++ assert("nikita-2163", object->i_nlink > 0); ++ ++ /* ++ * decrement ->i_nlink and update ->i_ctime ++ */ ++ ++ INODE_DEC_FIELD(object, i_nlink); ++ object->i_ctime = CURRENT_TIME; ++ return 0; ++} ++ ++/* this is common implementation of rem_link method of file plugin for typical ++ directory ++*/ ++int rem_link_common_dir(struct inode *object, struct inode *parent UNUSED_ARG) ++{ ++ assert("nikita-20211", object != NULL); ++ assert("nikita-21631", object->i_nlink > 0); ++ ++ /* ++ * decrement ->i_nlink and update ->i_ctime ++ */ ++ INODE_DEC_FIELD(object, i_nlink); ++ if (object->i_nlink == 1) ++ INODE_DEC_FIELD(object, i_nlink); ++ object->i_ctime = CURRENT_TIME; ++ return 0; ++} ++ ++/* this is common implementation of owns_item method of file plugin ++ compare objectids of keys in inode and coord */ ++int owns_item_common(const struct inode *inode, /* object to check ++ * against */ ++ const coord_t * coord /* coord to check */ ) ++{ ++ reiser4_key item_key; ++ reiser4_key file_key; ++ ++ assert("nikita-760", inode != NULL); ++ assert("nikita-761", coord != NULL); ++ ++ return coord_is_existing_item(coord) && ++ (get_key_objectid(build_sd_key(inode, &file_key)) == ++ get_key_objectid(item_key_by_coord(coord, &item_key))); ++} ++ ++/* this is common implementation of owns_item method of file plugin ++ for typical directory ++*/ ++int owns_item_common_dir(const struct inode *inode, /* object to check against */ ++ const coord_t * coord /* coord of item to check */ ) ++{ ++ reiser4_key item_key; ++ ++ assert("nikita-1335", inode != NULL); ++ assert("nikita-1334", coord != NULL); ++ ++ if (plugin_of_group(item_plugin_by_coord(coord), DIR_ENTRY_ITEM_TYPE)) ++ return get_key_locality(item_key_by_coord(coord, &item_key)) == ++ get_inode_oid(inode); ++ else ++ return owns_item_common(inode, coord); ++} ++ ++/* this is common implementation of can_add_link method of file plugin ++ checks whether yet another hard links to this object can be added ++*/ ++int can_add_link_common(const struct inode *object /* object to check */ ) ++{ ++ assert("nikita-732", object != NULL); ++ ++ /* inode->i_nlink is unsigned int, so just check for integer ++ overflow */ ++ return object->i_nlink + 1 != 0; ++} ++ ++/* this is common implementation of can_rem_link method of file plugin for ++ typical directory ++*/ ++int can_rem_link_common_dir(const struct inode *inode) ++{ ++ /* is_dir_empty() returns 0 is dir is empty */ ++ return !is_dir_empty(inode); ++} ++ ++/* this is common implementation of detach method of file plugin for typical ++ directory ++*/ ++int reiser4_detach_common_dir(struct inode *child, struct inode *parent) ++{ ++ dir_plugin *dplug; ++ ++ dplug = inode_dir_plugin(child); ++ assert("nikita-2883", dplug != NULL); ++ assert("nikita-2884", dplug->detach != NULL); ++ return dplug->detach(child, parent); ++} ++ ++/* this is common implementation of bind method of file plugin for typical ++ directory ++*/ ++int reiser4_bind_common_dir(struct inode *child, struct inode *parent) ++{ ++ dir_plugin *dplug; ++ ++ dplug = inode_dir_plugin(child); ++ assert("nikita-2646", dplug != NULL); ++ return dplug->attach(child, parent); ++} ++ ++static int process_truncate(struct inode *, __u64 size); ++ ++/* this is common implementation of safelink method of file plugin ++ */ ++int safelink_common(struct inode *object, reiser4_safe_link_t link, __u64 value) ++{ ++ int result; ++ ++ assert("vs-1705", get_current_context()->trans->atom == NULL); ++ if (link == SAFE_UNLINK) ++ /* nothing to do. iput() in the caller (process_safelink) will ++ * finish with file */ ++ result = 0; ++ else if (link == SAFE_TRUNCATE) ++ result = process_truncate(object, value); ++ else { ++ warning("nikita-3438", "Unrecognized safe-link type: %i", link); ++ result = RETERR(-EIO); ++ } ++ return result; ++} ++ ++/* this is common implementation of estimate.create method of file plugin ++ can be used when object creation involves insertion of one item (usually stat ++ data) into tree ++*/ ++reiser4_block_nr estimate_create_common(const struct inode * object) ++{ ++ return estimate_one_insert_item(reiser4_tree_by_inode(object)); ++} ++ ++/* this is common implementation of estimate.create method of file plugin for ++ typical directory ++ can be used when directory creation involves insertion of two items (usually ++ stat data and item containing "." and "..") into tree ++*/ ++reiser4_block_nr estimate_create_common_dir(const struct inode * object) ++{ ++ return 2 * estimate_one_insert_item(reiser4_tree_by_inode(object)); ++} ++ ++/* this is common implementation of estimate.update method of file plugin ++ can be used when stat data update does not do more than inserting a unit ++ into a stat data item which is probably true for most cases ++*/ ++reiser4_block_nr estimate_update_common(const struct inode * inode) ++{ ++ return estimate_one_insert_into_item(reiser4_tree_by_inode(inode)); ++} ++ ++/* this is common implementation of estimate.unlink method of file plugin ++ */ ++reiser4_block_nr ++estimate_unlink_common(const struct inode * object UNUSED_ARG, ++ const struct inode * parent UNUSED_ARG) ++{ ++ return 0; ++} ++ ++/* this is common implementation of estimate.unlink method of file plugin for ++ typical directory ++*/ ++reiser4_block_nr ++estimate_unlink_common_dir(const struct inode * object, ++ const struct inode * parent) ++{ ++ dir_plugin *dplug; ++ ++ dplug = inode_dir_plugin(object); ++ assert("nikita-2888", dplug != NULL); ++ assert("nikita-2887", dplug->estimate.unlink != NULL); ++ return dplug->estimate.unlink(object, parent); ++} ++ ++char *wire_write_common(struct inode *inode, char *start) ++{ ++ return build_inode_onwire(inode, start); ++} ++ ++char *wire_read_common(char *addr, reiser4_object_on_wire * obj) ++{ ++ return extract_obj_key_id_from_onwire(addr, &obj->u.std.key_id); ++} ++ ++struct dentry *wire_get_common(struct super_block *sb, ++ reiser4_object_on_wire * obj) ++{ ++ struct inode *inode; ++ struct dentry *dentry; ++ reiser4_key key; ++ ++ extract_key_from_id(&obj->u.std.key_id, &key); ++ inode = reiser4_iget(sb, &key, 1); ++ if (!IS_ERR(inode)) { ++ reiser4_iget_complete(inode); ++ dentry = d_alloc_anon(inode); ++ if (dentry == NULL) { ++ iput(inode); ++ dentry = ERR_PTR(-ENOMEM); ++ } else ++ dentry->d_op = &get_super_private(sb)->ops.dentry; ++ } else if (PTR_ERR(inode) == -ENOENT) ++ /* ++ * inode wasn't found at the key encoded in the file ++ * handle. Hence, file handle is stale. ++ */ ++ dentry = ERR_PTR(RETERR(-ESTALE)); ++ else ++ dentry = (void *)inode; ++ return dentry; ++} ++ ++int wire_size_common(struct inode *inode) ++{ ++ return inode_onwire_size(inode); ++} ++ ++void wire_done_common(reiser4_object_on_wire * obj) ++{ ++ /* nothing to do */ ++} ++ ++/* helper function to print errors */ ++static void key_warning(const reiser4_key * key /* key to print */ , ++ const struct inode *inode, ++ int code /* error code to print */ ) ++{ ++ assert("nikita-716", key != NULL); ++ ++ if (code != -ENOMEM) { ++ warning("nikita-717", "Error for inode %llu (%i)", ++ (unsigned long long)get_key_objectid(key), code); ++ reiser4_print_key("for key", key); ++ } ++} ++ ++/* NIKITA-FIXME-HANS: perhaps this function belongs in another file? */ ++#if REISER4_DEBUG ++static void ++check_inode_seal(const struct inode *inode, ++ const coord_t * coord, const reiser4_key * key) ++{ ++ reiser4_key unit_key; ++ ++ unit_key_by_coord(coord, &unit_key); ++ assert("nikita-2752", ++ WITH_DATA_RET(coord->node, 1, keyeq(key, &unit_key))); ++ assert("nikita-2753", get_inode_oid(inode) == get_key_objectid(key)); ++} ++ ++static void check_sd_coord(coord_t * coord, const reiser4_key * key) ++{ ++ reiser4_key ukey; ++ ++ coord_clear_iplug(coord); ++ if (zload(coord->node)) ++ return; ++ ++ if (!coord_is_existing_unit(coord) || ++ !item_plugin_by_coord(coord) || ++ !keyeq(unit_key_by_coord(coord, &ukey), key) || ++ (znode_get_level(coord->node) != LEAF_LEVEL) || ++ !item_is_statdata(coord)) { ++ warning("nikita-1901", "Conspicuous seal"); ++ reiser4_print_key("key", key); ++ print_coord("coord", coord, 1); ++ impossible("nikita-2877", "no way"); ++ } ++ zrelse(coord->node); ++} ++ ++#else ++#define check_inode_seal(inode, coord, key) noop ++#define check_sd_coord(coord, key) noop ++#endif ++ ++/* insert new stat-data into tree. Called with inode state ++ locked. Return inode state locked. */ ++static int insert_new_sd(struct inode *inode /* inode to create sd for */ ) ++{ ++ int result; ++ reiser4_key key; ++ coord_t coord; ++ reiser4_item_data data; ++ char *area; ++ reiser4_inode *ref; ++ lock_handle lh; ++ oid_t oid; ++ ++ assert("nikita-723", inode != NULL); ++ assert("nikita-3406", reiser4_inode_get_flag(inode, REISER4_NO_SD)); ++ ++ ref = reiser4_inode_data(inode); ++ spin_lock_inode(inode); ++ ++ if (ref->plugin_mask != 0) ++ /* inode has non-standard plugins */ ++ inode_set_extension(inode, PLUGIN_STAT); ++ /* ++ * prepare specification of new item to be inserted ++ */ ++ ++ data.iplug = inode_sd_plugin(inode); ++ data.length = data.iplug->s.sd.save_len(inode); ++ spin_unlock_inode(inode); ++ ++ data.data = NULL; ++ data.user = 0; ++/* could be optimized for case where there is only one node format in ++ * use in the filesystem, probably there are lots of such ++ * places we could optimize for only one node layout.... -Hans */ ++ if (data.length > reiser4_tree_by_inode(inode)->nplug->max_item_size()){ ++ /* This is silly check, but we don't know actual node where ++ insertion will go into. */ ++ return RETERR(-ENAMETOOLONG); ++ } ++ oid = oid_allocate(inode->i_sb); ++/* NIKITA-FIXME-HANS: what is your opinion on whether this error check should be encapsulated into oid_allocate? */ ++ if (oid == ABSOLUTE_MAX_OID) ++ return RETERR(-EOVERFLOW); ++ ++ set_inode_oid(inode, oid); ++ ++ coord_init_zero(&coord); ++ init_lh(&lh); ++ ++ result = insert_by_key(reiser4_tree_by_inode(inode), ++ build_sd_key(inode, &key), &data, &coord, &lh, ++ /* stat data lives on a leaf level */ ++ LEAF_LEVEL, CBK_UNIQUE); ++ ++ /* we don't want to re-check that somebody didn't insert ++ stat-data while we were doing io, because if it did, ++ insert_by_key() returned error. */ ++ /* but what _is_ possible is that plugin for inode's stat-data, ++ list of non-standard plugins or their state would change ++ during io, so that stat-data wouldn't fit into sd. To avoid ++ this race we keep inode_state lock. This lock has to be ++ taken each time you access inode in a way that would cause ++ changes in sd size: changing plugins etc. ++ */ ++ ++ if (result == IBK_INSERT_OK) { ++ coord_clear_iplug(&coord); ++ result = zload(coord.node); ++ if (result == 0) { ++ /* have we really inserted stat data? */ ++ assert("nikita-725", item_is_statdata(&coord)); ++ ++ /* inode was just created. It is inserted into hash ++ table, but no directory entry was yet inserted into ++ parent. So, inode is inaccessible through ++ ->lookup(). All places that directly grab inode ++ from hash-table (like old knfsd), should check ++ IMMUTABLE flag that is set by common_create_child. ++ */ ++ assert("nikita-3240", data.iplug != NULL); ++ assert("nikita-3241", data.iplug->s.sd.save != NULL); ++ area = item_body_by_coord(&coord); ++ result = data.iplug->s.sd.save(inode, &area); ++ znode_make_dirty(coord.node); ++ if (result == 0) { ++ /* object has stat-data now */ ++ reiser4_inode_clr_flag(inode, REISER4_NO_SD); ++ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN); ++ /* initialise stat-data seal */ ++ reiser4_seal_init(&ref->sd_seal, &coord, &key); ++ ref->sd_coord = coord; ++ check_inode_seal(inode, &coord, &key); ++ } else if (result != -ENOMEM) ++ /* ++ * convert any other error code to -EIO to ++ * avoid confusing user level with unexpected ++ * errors. ++ */ ++ result = RETERR(-EIO); ++ zrelse(coord.node); ++ } ++ } ++ done_lh(&lh); ++ ++ if (result != 0) ++ key_warning(&key, inode, result); ++ else ++ oid_count_allocated(); ++ ++ return result; ++} ++ ++/* find sd of inode in a tree, deal with errors */ ++int lookup_sd(struct inode *inode /* inode to look sd for */ , ++ znode_lock_mode lock_mode /* lock mode */ , ++ coord_t * coord /* resulting coord */ , ++ lock_handle * lh /* resulting lock handle */ , ++ const reiser4_key * key /* resulting key */ , ++ int silent) ++{ ++ int result; ++ __u32 flags; ++ ++ assert("nikita-1692", inode != NULL); ++ assert("nikita-1693", coord != NULL); ++ assert("nikita-1694", key != NULL); ++ ++ /* look for the object's stat data in a tree. ++ This returns in "node" pointer to a locked znode and in "pos" ++ position of an item found in node. Both are only valid if ++ coord_found is returned. */ ++ flags = (lock_mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0; ++ flags |= CBK_UNIQUE; ++ /* ++ * traverse tree to find stat data. We cannot use vroot here, because ++ * it only covers _body_ of the file, and stat data don't belong ++ * there. ++ */ ++ result = coord_by_key(reiser4_tree_by_inode(inode), ++ key, ++ coord, ++ lh, ++ lock_mode, ++ FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL, flags, NULL); ++ if (REISER4_DEBUG && result == 0) ++ check_sd_coord(coord, key); ++ ++ if (result != 0 && !silent) ++ key_warning(key, inode, result); ++ return result; ++} ++ ++static int ++locate_inode_sd(struct inode *inode, ++ reiser4_key * key, coord_t * coord, lock_handle * lh) ++{ ++ reiser4_inode *state; ++ seal_t seal; ++ int result; ++ ++ assert("nikita-3483", inode != NULL); ++ ++ state = reiser4_inode_data(inode); ++ spin_lock_inode(inode); ++ *coord = state->sd_coord; ++ coord_clear_iplug(coord); ++ seal = state->sd_seal; ++ spin_unlock_inode(inode); ++ ++ build_sd_key(inode, key); ++ if (reiser4_seal_is_set(&seal)) { ++ /* first, try to use seal */ ++ result = reiser4_seal_validate(&seal, ++ coord, ++ key, ++ lh, ZNODE_WRITE_LOCK, ++ ZNODE_LOCK_LOPRI); ++ if (result == 0) ++ check_sd_coord(coord, key); ++ } else ++ result = -E_REPEAT; ++ ++ if (result != 0) { ++ coord_init_zero(coord); ++ result = lookup_sd(inode, ZNODE_WRITE_LOCK, coord, lh, key, 0); ++ } ++ return result; ++} ++ ++#if REISER4_DEBUG ++static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2) ++{ ++ return (get_key_locality(k1) == get_key_locality(k2) && ++ get_key_type(k1) == get_key_type(k2) && ++ get_key_band(k1) == get_key_band(k2) && ++ get_key_ordering(k1) == get_key_ordering(k2) && ++ get_key_objectid(k1) == get_key_objectid(k2)); ++} ++ ++#include "../tree_walk.h" ++ ++/* make some checks before and after stat-data resize operation */ ++static int check_sd_resize(struct inode * inode, coord_t * coord, ++ int length, int progress /* 1 means after resize */) ++{ ++ int ret = 0; ++ lock_handle left_lock; ++ coord_t left_coord; ++ reiser4_key left_key; ++ reiser4_key key; ++ ++ if (inode_file_plugin(inode) != ++ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)) ++ return 0; ++ if (!length) ++ return 0; ++ if (coord->item_pos != 0) ++ return 0; ++ ++ init_lh(&left_lock); ++ ret = reiser4_get_left_neighbor(&left_lock, ++ coord->node, ++ ZNODE_WRITE_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR || ++ ret == -ENOENT || ret == -EINVAL ++ || ret == -E_DEADLOCK) { ++ ret = 0; ++ goto exit; ++ } ++ ret = zload(left_lock.node); ++ if (ret) ++ goto exit; ++ coord_init_last_unit(&left_coord, left_lock.node); ++ item_key_by_coord(&left_coord, &left_key); ++ item_key_by_coord(coord, &key); ++ ++ if (all_but_offset_key_eq(&key, &left_key)) ++ /* corruption occured */ ++ ret = 1; ++ zrelse(left_lock.node); ++ exit: ++ done_lh(&left_lock); ++ return ret; ++} ++#endif ++ ++/* update stat-data at @coord */ ++static int ++update_sd_at(struct inode *inode, coord_t * coord, reiser4_key * key, ++ lock_handle * lh) ++{ ++ int result; ++ reiser4_item_data data; ++ char *area; ++ reiser4_inode *state; ++ znode *loaded; ++ ++ state = reiser4_inode_data(inode); ++ ++ coord_clear_iplug(coord); ++ result = zload(coord->node); ++ if (result != 0) ++ return result; ++ loaded = coord->node; ++ ++ spin_lock_inode(inode); ++ assert("nikita-728", inode_sd_plugin(inode) != NULL); ++ data.iplug = inode_sd_plugin(inode); ++ ++ /* if inode has non-standard plugins, add appropriate stat data ++ * extension */ ++ if (state->extmask & (1 << PLUGIN_STAT)) { ++ if (state->plugin_mask == 0) ++ inode_clr_extension(inode, PLUGIN_STAT); ++ } else if (state->plugin_mask != 0) ++ inode_set_extension(inode, PLUGIN_STAT); ++ ++ if (state->extmask & (1 << HEIR_STAT)) { ++ if (state->heir_mask == 0) ++ inode_clr_extension(inode, HEIR_STAT); ++ } else if (state->heir_mask != 0) ++ inode_set_extension(inode, HEIR_STAT); ++ ++ /* data.length is how much space to add to (or remove ++ from if negative) sd */ ++ if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) { ++ /* recalculate stat-data length */ ++ data.length = ++ data.iplug->s.sd.save_len(inode) - ++ item_length_by_coord(coord); ++ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN); ++ } else ++ data.length = 0; ++ spin_unlock_inode(inode); ++ ++ /* if on-disk stat data is of different length than required ++ for this inode, resize it */ ++ ++ if (data.length != 0) { ++ data.data = NULL; ++ data.user = 0; ++ ++ assert("edward-1441", ++ !check_sd_resize(inode, coord, ++ data.length, 0/* before resize */)); ++ ++ /* insertion code requires that insertion point (coord) was ++ * between units. */ ++ coord->between = AFTER_UNIT; ++ result = reiser4_resize_item(coord, &data, key, lh, ++ COPI_DONT_SHIFT_LEFT); ++ if (result != 0) { ++ key_warning(key, inode, result); ++ zrelse(loaded); ++ return result; ++ } ++ if (loaded != coord->node) { ++ /* reiser4_resize_item moved coord to another node. ++ Zload it */ ++ zrelse(loaded); ++ coord_clear_iplug(coord); ++ result = zload(coord->node); ++ if (result != 0) ++ return result; ++ loaded = coord->node; ++ } ++ assert("edward-1442", ++ !check_sd_resize(inode, coord, ++ data.length, 1/* after resize */)); ++ } ++ area = item_body_by_coord(coord); ++ spin_lock_inode(inode); ++ result = data.iplug->s.sd.save(inode, &area); ++ znode_make_dirty(coord->node); ++ ++ /* re-initialise stat-data seal */ ++ ++ /* ++ * coord.between was possibly skewed from AT_UNIT when stat-data size ++ * was changed and new extensions were pasted into item. ++ */ ++ coord->between = AT_UNIT; ++ reiser4_seal_init(&state->sd_seal, coord, key); ++ state->sd_coord = *coord; ++ spin_unlock_inode(inode); ++ check_inode_seal(inode, coord, key); ++ zrelse(loaded); ++ return result; ++} ++ ++/* Update existing stat-data in a tree. Called with inode state locked. Return ++ inode state locked. */ ++static int update_sd(struct inode *inode /* inode to update sd for */ ) ++{ ++ int result; ++ reiser4_key key; ++ coord_t coord; ++ lock_handle lh; ++ ++ assert("nikita-726", inode != NULL); ++ ++ /* no stat-data, nothing to update?! */ ++ assert("nikita-3482", !reiser4_inode_get_flag(inode, REISER4_NO_SD)); ++ ++ init_lh(&lh); ++ ++ result = locate_inode_sd(inode, &key, &coord, &lh); ++ if (result == 0) ++ result = update_sd_at(inode, &coord, &key, &lh); ++ done_lh(&lh); ++ ++ return result; ++} ++ ++/* helper for reiser4_delete_object_common and reiser4_delete_dir_common. ++ Remove object stat data. Space for that must be reserved by caller before ++*/ ++static int ++common_object_delete_no_reserve(struct inode *inode /* object to remove */ ) ++{ ++ int result; ++ ++ assert("nikita-1477", inode != NULL); ++ ++ if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) { ++ reiser4_key sd_key; ++ ++ DQUOT_FREE_INODE(inode); ++ DQUOT_DROP(inode); ++ ++ build_sd_key(inode, &sd_key); ++ result = ++ reiser4_cut_tree(reiser4_tree_by_inode(inode), ++ &sd_key, &sd_key, NULL, 0); ++ if (result == 0) { ++ reiser4_inode_set_flag(inode, REISER4_NO_SD); ++ result = oid_release(inode->i_sb, get_inode_oid(inode)); ++ if (result == 0) { ++ oid_count_released(); ++ ++ result = safe_link_del(reiser4_tree_by_inode(inode), ++ get_inode_oid(inode), ++ SAFE_UNLINK); ++ } ++ } ++ } else ++ result = 0; ++ return result; ++} ++ ++/* helper for safelink_common */ ++static int process_truncate(struct inode *inode, __u64 size) ++{ ++ int result; ++ struct iattr attr; ++ file_plugin *fplug; ++ reiser4_context *ctx; ++ struct dentry dentry; ++ ++ assert("vs-21", is_in_reiser4_context()); ++ ctx = reiser4_init_context(inode->i_sb); ++ assert("vs-22", !IS_ERR(ctx)); ++ ++ attr.ia_size = size; ++ attr.ia_valid = ATTR_SIZE | ATTR_CTIME; ++ fplug = inode_file_plugin(inode); ++ ++ mutex_lock(&inode->i_mutex); ++ assert("vs-1704", get_current_context()->trans->atom == NULL); ++ dentry.d_inode = inode; ++ result = inode->i_op->setattr(&dentry, &attr); ++ mutex_unlock(&inode->i_mutex); ++ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ ++ return result; ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/hash.c b/fs/reiser4/plugin/hash.c +new file mode 100644 +index 0000000..70f1e40 +--- /dev/null ++++ b/fs/reiser4/plugin/hash.c +@@ -0,0 +1,353 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Hash functions */ ++ ++#include "../debug.h" ++#include "plugin_header.h" ++#include "plugin.h" ++#include "../super.h" ++#include "../inode.h" ++ ++#include ++ ++/* old rupasov (yura) hash */ ++static __u64 hash_rupasov(const unsigned char *name /* name to hash */ , ++ int len /* @name's length */ ) ++{ ++ int i; ++ int j; ++ int pow; ++ __u64 a; ++ __u64 c; ++ ++ assert("nikita-672", name != NULL); ++ assert("nikita-673", len >= 0); ++ ++ for (pow = 1, i = 1; i < len; ++i) ++ pow = pow * 10; ++ ++ if (len == 1) ++ a = name[0] - 48; ++ else ++ a = (name[0] - 48) * pow; ++ ++ for (i = 1; i < len; ++i) { ++ c = name[i] - 48; ++ for (pow = 1, j = i; j < len - 1; ++j) ++ pow = pow * 10; ++ a = a + c * pow; ++ } ++ for (; i < 40; ++i) { ++ c = '0' - 48; ++ for (pow = 1, j = i; j < len - 1; ++j) ++ pow = pow * 10; ++ a = a + c * pow; ++ } ++ ++ for (; i < 256; ++i) { ++ c = i; ++ for (pow = 1, j = i; j < len - 1; ++j) ++ pow = pow * 10; ++ a = a + c * pow; ++ } ++ ++ a = a << 7; ++ return a; ++} ++ ++/* r5 hash */ ++static __u64 hash_r5(const unsigned char *name /* name to hash */ , ++ int len UNUSED_ARG /* @name's length */ ) ++{ ++ __u64 a = 0; ++ ++ assert("nikita-674", name != NULL); ++ assert("nikita-675", len >= 0); ++ ++ while (*name) { ++ a += *name << 4; ++ a += *name >> 4; ++ a *= 11; ++ name++; ++ } ++ return a; ++} ++ ++/* Keyed 32-bit hash function using TEA in a Davis-Meyer function ++ H0 = Key ++ Hi = E Mi(Hi-1) + Hi-1 ++ ++ (see Applied Cryptography, 2nd edition, p448). ++ ++ Jeremy Fitzhardinge 1998 ++ ++ Jeremy has agreed to the contents of reiserfs/README. -Hans ++ ++ This code was blindly upgraded to __u64 by s/__u32/__u64/g. ++*/ ++static __u64 hash_tea(const unsigned char *name /* name to hash */ , ++ int len /* @name's length */ ) ++{ ++ __u64 k[] = { 0x9464a485u, 0x542e1a94u, 0x3e846bffu, 0xb75bcfc3u }; ++ ++ __u64 h0 = k[0], h1 = k[1]; ++ __u64 a, b, c, d; ++ __u64 pad; ++ int i; ++ ++ assert("nikita-676", name != NULL); ++ assert("nikita-677", len >= 0); ++ ++#define DELTA 0x9E3779B9u ++#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ ++#define PARTROUNDS 6 /* 6 gets complete mixing */ ++ ++/* a, b, c, d - data; h0, h1 - accumulated hash */ ++#define TEACORE(rounds) \ ++ do { \ ++ __u64 sum = 0; \ ++ int n = rounds; \ ++ __u64 b0, b1; \ ++ \ ++ b0 = h0; \ ++ b1 = h1; \ ++ \ ++ do \ ++ { \ ++ sum += DELTA; \ ++ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); \ ++ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); \ ++ } while(--n); \ ++ \ ++ h0 += b0; \ ++ h1 += b1; \ ++ } while(0) ++ ++ pad = (__u64) len | ((__u64) len << 8); ++ pad |= pad << 16; ++ ++ while (len >= 16) { ++ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] << ++ 16 | (__u64) name[3] << 24; ++ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] << ++ 16 | (__u64) name[7] << 24; ++ c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] << ++ 16 | (__u64) name[11] << 24; ++ d = (__u64) name[12] | (__u64) name[13] << 8 | (__u64) name[14] ++ << 16 | (__u64) name[15] << 24; ++ ++ TEACORE(PARTROUNDS); ++ ++ len -= 16; ++ name += 16; ++ } ++ ++ if (len >= 12) { ++ //assert(len < 16); ++ if (len >= 16) ++ *(int *)0 = 0; ++ ++ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] << ++ 16 | (__u64) name[3] << 24; ++ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] << ++ 16 | (__u64) name[7] << 24; ++ c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] << ++ 16 | (__u64) name[11] << 24; ++ ++ d = pad; ++ for (i = 12; i < len; i++) { ++ d <<= 8; ++ d |= name[i]; ++ } ++ } else if (len >= 8) { ++ //assert(len < 12); ++ if (len >= 12) ++ *(int *)0 = 0; ++ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] << ++ 16 | (__u64) name[3] << 24; ++ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] << ++ 16 | (__u64) name[7] << 24; ++ ++ c = d = pad; ++ for (i = 8; i < len; i++) { ++ c <<= 8; ++ c |= name[i]; ++ } ++ } else if (len >= 4) { ++ //assert(len < 8); ++ if (len >= 8) ++ *(int *)0 = 0; ++ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] << ++ 16 | (__u64) name[3] << 24; ++ ++ b = c = d = pad; ++ for (i = 4; i < len; i++) { ++ b <<= 8; ++ b |= name[i]; ++ } ++ } else { ++ //assert(len < 4); ++ if (len >= 4) ++ *(int *)0 = 0; ++ a = b = c = d = pad; ++ for (i = 0; i < len; i++) { ++ a <<= 8; ++ a |= name[i]; ++ } ++ } ++ ++ TEACORE(FULLROUNDS); ++ ++/* return 0;*/ ++ return h0 ^ h1; ++ ++} ++ ++/* classical 64 bit Fowler/Noll/Vo-1 (FNV-1) hash. ++ ++ See http://www.isthe.com/chongo/tech/comp/fnv/ for details. ++ ++ Excerpts: ++ ++ FNV hashes are designed to be fast while maintaining a low collision ++ rate. ++ ++ [This version also seems to preserve lexicographical order locally.] ++ ++ FNV hash algorithms and source code have been released into the public ++ domain. ++ ++*/ ++static __u64 hash_fnv1(const unsigned char *name /* name to hash */ , ++ int len UNUSED_ARG /* @name's length */ ) ++{ ++ unsigned long long a = 0xcbf29ce484222325ull; ++ const unsigned long long fnv_64_prime = 0x100000001b3ull; ++ ++ assert("nikita-678", name != NULL); ++ assert("nikita-679", len >= 0); ++ ++ /* FNV-1 hash each octet in the buffer */ ++ for (; *name; ++name) { ++ /* multiply by the 32 bit FNV magic prime mod 2^64 */ ++ a *= fnv_64_prime; ++ /* xor the bottom with the current octet */ ++ a ^= (unsigned long long)(*name); ++ } ++ /* return our new hash value */ ++ return a; ++} ++ ++/* degenerate hash function used to simplify testing of non-unique key ++ handling */ ++static __u64 hash_deg(const unsigned char *name UNUSED_ARG /* name to hash */ , ++ int len UNUSED_ARG /* @name's length */ ) ++{ ++ return 0xc0c0c0c010101010ull; ++} ++ ++static int change_hash(struct inode *inode, ++ reiser4_plugin * plugin, ++ pset_member memb) ++{ ++ int result; ++ ++ assert("nikita-3503", inode != NULL); ++ assert("nikita-3504", plugin != NULL); ++ ++ assert("nikita-3505", is_reiser4_inode(inode)); ++ assert("nikita-3507", plugin->h.type_id == REISER4_HASH_PLUGIN_TYPE); ++ ++ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE)) ++ return RETERR(-EINVAL); ++ ++ result = 0; ++ if (inode_hash_plugin(inode) == NULL || ++ inode_hash_plugin(inode)->h.id != plugin->h.id) { ++ if (is_dir_empty(inode) == 0) ++ result = aset_set_unsafe(&reiser4_inode_data(inode)->pset, ++ PSET_HASH, plugin); ++ else ++ result = RETERR(-ENOTEMPTY); ++ ++ } ++ return result; ++} ++ ++static reiser4_plugin_ops hash_plugin_ops = { ++ .init = NULL, ++ .load = NULL, ++ .save_len = NULL, ++ .save = NULL, ++ .change = change_hash ++}; ++ ++/* hash plugins */ ++hash_plugin hash_plugins[LAST_HASH_ID] = { ++ [RUPASOV_HASH_ID] = { ++ .h = { ++ .type_id = REISER4_HASH_PLUGIN_TYPE, ++ .id = RUPASOV_HASH_ID, ++ .pops = &hash_plugin_ops, ++ .label = "rupasov", ++ .desc = "Original Yura's hash", ++ .linkage = {NULL, NULL} ++ }, ++ .hash = hash_rupasov ++ }, ++ [R5_HASH_ID] = { ++ .h = { ++ .type_id = REISER4_HASH_PLUGIN_TYPE, ++ .id = R5_HASH_ID, ++ .pops = &hash_plugin_ops, ++ .label = "r5", ++ .desc = "r5 hash", ++ .linkage = {NULL, NULL} ++ }, ++ .hash = hash_r5 ++ }, ++ [TEA_HASH_ID] = { ++ .h = { ++ .type_id = REISER4_HASH_PLUGIN_TYPE, ++ .id = TEA_HASH_ID, ++ .pops = &hash_plugin_ops, ++ .label = "tea", ++ .desc = "tea hash", ++ .linkage = {NULL, NULL} ++ }, ++ .hash = hash_tea ++ }, ++ [FNV1_HASH_ID] = { ++ .h = { ++ .type_id = REISER4_HASH_PLUGIN_TYPE, ++ .id = FNV1_HASH_ID, ++ .pops = &hash_plugin_ops, ++ .label = "fnv1", ++ .desc = "fnv1 hash", ++ .linkage = {NULL, NULL} ++ }, ++ .hash = hash_fnv1 ++ }, ++ [DEGENERATE_HASH_ID] = { ++ .h = { ++ .type_id = REISER4_HASH_PLUGIN_TYPE, ++ .id = DEGENERATE_HASH_ID, ++ .pops = &hash_plugin_ops, ++ .label = "degenerate hash", ++ .desc = "Degenerate hash: only for testing", ++ .linkage = {NULL, NULL} ++ }, ++ .hash = hash_deg ++ } ++}; ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/inode_ops.c b/fs/reiser4/plugin/inode_ops.c +new file mode 100644 +index 0000000..48430f7 +--- /dev/null ++++ b/fs/reiser4/plugin/inode_ops.c +@@ -0,0 +1,897 @@ ++/* ++ * Copyright 2005 by Hans Reiser, licensing governed by reiser4/README ++ */ ++ ++/* ++ * this file contains typical implementations for most of methods of struct ++ * inode_operations ++ */ ++ ++#include "../inode.h" ++#include "../safe_link.h" ++ ++#include ++#include ++ ++static int create_vfs_object(struct inode *parent, struct dentry *dentry, ++ reiser4_object_create_data *data); ++ ++/** ++ * reiser4_create_common - create of inode operations ++ * @parent: inode of parent directory ++ * @dentry: dentry of new object to create ++ * @mode: the permissions to use ++ * @nameidata: ++ * ++ * This is common implementation of vfs's create method of struct ++ * inode_operations. ++ * Creates regular file using file plugin from parent directory plugin set. ++ */ ++int reiser4_create_common(struct inode *parent, struct dentry *dentry, ++ int mode, struct nameidata *nameidata) ++{ ++ reiser4_object_create_data data; ++ file_plugin *fplug; ++ ++ memset(&data, 0, sizeof data); ++ data.mode = S_IFREG | mode; ++ fplug = child_create_plugin(parent) ? : inode_create_plugin(parent); ++ if (!plugin_of_group(fplug, REISER4_REGULAR_FILE)) { ++ warning("vpf-1900", "'%s' is not a regular file plugin.", ++ fplug->h.label); ++ return RETERR(-EIO); ++ } ++ data.id = fplug->h.id; ++ return create_vfs_object(parent, dentry, &data); ++} ++ ++int reiser4_lookup_name(struct inode *dir, struct dentry *, reiser4_key *); ++void check_light_weight(struct inode *inode, struct inode *parent); ++ ++/** ++ * reiser4_lookup_common - lookup of inode operations ++ * @parent: inode of directory to lookup into ++ * @dentry: name to look for ++ * @nameidata: ++ * ++ * This is common implementation of vfs's lookup method of struct ++ * inode_operations. ++ */ ++struct dentry *reiser4_lookup_common(struct inode *parent, ++ struct dentry *dentry, ++ struct nameidata *nameidata) ++{ ++ reiser4_context *ctx; ++ int result; ++ struct dentry *new; ++ struct inode *inode; ++ reiser4_dir_entry_desc entry; ++ ++ ctx = reiser4_init_context(parent->i_sb); ++ if (IS_ERR(ctx)) ++ return (struct dentry *)ctx; ++ ++ /* set up operations on dentry. */ ++ dentry->d_op = &get_super_private(parent->i_sb)->ops.dentry; ++ ++ result = reiser4_lookup_name(parent, dentry, &entry.key); ++ if (result) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ if (result == -ENOENT) { ++ /* object not found */ ++ if (!IS_DEADDIR(parent)) ++ d_add(dentry, NULL); ++ return NULL; ++ } ++ return ERR_PTR(result); ++ } ++ ++ inode = reiser4_iget(parent->i_sb, &entry.key, 0); ++ if (IS_ERR(inode)) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return ERR_PTR(PTR_ERR(inode)); ++ } ++ ++ /* success */ ++ check_light_weight(inode, parent); ++ new = d_splice_alias(inode, dentry); ++ reiser4_iget_complete(inode); ++ ++ /* prevent balance_dirty_pages() from being called: we don't want to ++ * do this under directory i_mutex. */ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return new; ++} ++ ++static reiser4_block_nr common_estimate_link(struct inode *parent, ++ struct inode *object); ++int reiser4_update_dir(struct inode *); ++ ++/** ++ * reiser4_link_common - link of inode operations ++ * @existing: dentry of object which is to get new name ++ * @parent: directory where new name is to be created ++ * @newname: new name ++ * ++ * This is common implementation of vfs's link method of struct ++ * inode_operations. ++ */ ++int reiser4_link_common(struct dentry *existing, struct inode *parent, ++ struct dentry *newname) ++{ ++ reiser4_context *ctx; ++ int result; ++ struct inode *object; ++ dir_plugin *parent_dplug; ++ reiser4_dir_entry_desc entry; ++ reiser4_object_create_data data; ++ reiser4_block_nr reserve; ++ ++ ctx = reiser4_init_context(parent->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ assert("nikita-1431", existing != NULL); ++ assert("nikita-1432", parent != NULL); ++ assert("nikita-1433", newname != NULL); ++ ++ object = existing->d_inode; ++ assert("nikita-1434", object != NULL); ++ ++ /* check for race with create_object() */ ++ if (reiser4_inode_get_flag(object, REISER4_IMMUTABLE)) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return RETERR(-E_REPEAT); ++ } ++ ++ parent_dplug = inode_dir_plugin(parent); ++ ++ memset(&entry, 0, sizeof entry); ++ entry.obj = object; ++ ++ data.mode = object->i_mode; ++ data.id = inode_file_plugin(object)->h.id; ++ ++ reserve = common_estimate_link(parent, existing->d_inode); ++ if ((__s64) reserve < 0) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return reserve; ++ } ++ ++ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return RETERR(-ENOSPC); ++ } ++ ++ /* ++ * Subtle race handling: sys_link() doesn't take i_mutex on @parent. It ++ * means that link(2) can race against unlink(2) or rename(2), and ++ * inode is dead (->i_nlink == 0) when reiser4_link() is entered. ++ * ++ * For such inode we have to undo special processing done in ++ * reiser4_unlink() viz. creation of safe-link. ++ */ ++ if (unlikely(object->i_nlink == 0)) { ++ result = safe_link_del(reiser4_tree_by_inode(object), ++ get_inode_oid(object), SAFE_UNLINK); ++ if (result != 0) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ } ++ ++ /* increment nlink of @existing and update its stat data */ ++ result = reiser4_add_nlink(object, parent, 1); ++ if (result == 0) { ++ /* add entry to the parent */ ++ result = ++ parent_dplug->add_entry(parent, newname, &data, &entry); ++ if (result != 0) { ++ /* failed to add entry to the parent, decrement nlink ++ of @existing */ ++ reiser4_del_nlink(object, parent, 1); ++ /* ++ * now, if that failed, we have a file with too big ++ * nlink---space leak, much better than directory ++ * entry pointing to nowhere ++ */ ++ } ++ } ++ if (result == 0) { ++ atomic_inc(&object->i_count); ++ /* ++ * Upon successful completion, link() shall mark for update ++ * the st_ctime field of the file. Also, the st_ctime and ++ * st_mtime fields of the directory that contains the new ++ * entry shall be marked for update. --SUS ++ */ ++ result = reiser4_update_dir(parent); ++ } ++ if (result == 0) ++ d_instantiate(newname, existing->d_inode); ++ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++static int unlink_check_and_grab(struct inode *parent, struct dentry *victim); ++ ++/** ++ * reiser4_unlink_common - unlink of inode operations ++ * @parent: inode of directory to remove name from ++ * @victim: name to be removed ++ * ++ * This is common implementation of vfs's unlink method of struct ++ * inode_operations. ++ */ ++int reiser4_unlink_common(struct inode *parent, struct dentry *victim) ++{ ++ reiser4_context *ctx; ++ int result; ++ struct inode *object; ++ file_plugin *fplug; ++ ++ ctx = reiser4_init_context(parent->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ object = victim->d_inode; ++ fplug = inode_file_plugin(object); ++ assert("nikita-2882", fplug->detach != NULL); ++ ++ result = unlink_check_and_grab(parent, victim); ++ if (result != 0) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ result = fplug->detach(object, parent); ++ if (result == 0) { ++ dir_plugin *parent_dplug; ++ reiser4_dir_entry_desc entry; ++ ++ parent_dplug = inode_dir_plugin(parent); ++ memset(&entry, 0, sizeof entry); ++ ++ /* first, delete directory entry */ ++ result = parent_dplug->rem_entry(parent, victim, &entry); ++ if (result == 0) { ++ /* ++ * if name was removed successfully, we _have_ to ++ * return 0 from this function, because upper level ++ * caller (vfs_{rmdir,unlink}) expect this. ++ * ++ * now that directory entry is removed, update ++ * stat-data ++ */ ++ reiser4_del_nlink(object, parent, 1); ++ /* ++ * Upon successful completion, unlink() shall mark for ++ * update the st_ctime and st_mtime fields of the ++ * parent directory. Also, if the file's link count is ++ * not 0, the st_ctime field of the file shall be ++ * marked for update. --SUS ++ */ ++ reiser4_update_dir(parent); ++ /* add safe-link for this file */ ++ if (object->i_nlink == 0) ++ safe_link_add(object, SAFE_UNLINK); ++ } ++ } ++ ++ if (unlikely(result != 0)) { ++ if (result != -ENOMEM) ++ warning("nikita-3398", "Cannot unlink %llu (%i)", ++ (unsigned long long)get_inode_oid(object), ++ result); ++ /* if operation failed commit pending inode modifications to ++ * the stat-data */ ++ reiser4_update_sd(object); ++ reiser4_update_sd(parent); ++ } ++ ++ reiser4_release_reserved(object->i_sb); ++ ++ /* @object's i_ctime was updated by ->rem_link() method(). */ ++ ++ /* @victim can be already removed from the disk by this time. Inode is ++ then marked so that iput() wouldn't try to remove stat data. But ++ inode itself is still there. ++ */ ++ ++ /* ++ * we cannot release directory semaphore here, because name has ++ * already been deleted, but dentry (@victim) still exists. Prevent ++ * balance_dirty_pages() from being called on exiting this context: we ++ * don't want to do this under directory i_mutex. ++ */ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/** ++ * reiser4_symlink_common - symlink of inode operations ++ * @parent: inode of parent directory ++ * @dentry: dentry of object to be created ++ * @linkname: string symlink is to contain ++ * ++ * This is common implementation of vfs's symlink method of struct ++ * inode_operations. ++ * Creates object using file plugin SYMLINK_FILE_PLUGIN_ID. ++ */ ++int reiser4_symlink_common(struct inode *parent, struct dentry *dentry, ++ const char *linkname) ++{ ++ reiser4_object_create_data data; ++ ++ memset(&data, 0, sizeof data); ++ data.name = linkname; ++ data.id = SYMLINK_FILE_PLUGIN_ID; ++ data.mode = S_IFLNK | S_IRWXUGO; ++ return create_vfs_object(parent, dentry, &data); ++} ++ ++/** ++ * reiser4_mkdir_common - mkdir of inode operations ++ * @parent: inode of parent directory ++ * @dentry: dentry of object to be created ++ * @mode: the permissions to use ++ * ++ * This is common implementation of vfs's mkdir method of struct ++ * inode_operations. ++ * Creates object using file plugin DIRECTORY_FILE_PLUGIN_ID. ++ */ ++int reiser4_mkdir_common(struct inode *parent, struct dentry *dentry, int mode) ++{ ++ reiser4_object_create_data data; ++ ++ memset(&data, 0, sizeof data); ++ data.mode = S_IFDIR | mode; ++ data.id = DIRECTORY_FILE_PLUGIN_ID; ++ return create_vfs_object(parent, dentry, &data); ++} ++ ++/** ++ * reiser4_mknod_common - mknod of inode operations ++ * @parent: inode of parent directory ++ * @dentry: dentry of object to be created ++ * @mode: the permissions to use and file type ++ * @rdev: minor and major of new device file ++ * ++ * This is common implementation of vfs's mknod method of struct ++ * inode_operations. ++ * Creates object using file plugin SPECIAL_FILE_PLUGIN_ID. ++ */ ++int reiser4_mknod_common(struct inode *parent, struct dentry *dentry, ++ int mode, dev_t rdev) ++{ ++ reiser4_object_create_data data; ++ ++ memset(&data, 0, sizeof data); ++ data.mode = mode; ++ data.rdev = rdev; ++ data.id = SPECIAL_FILE_PLUGIN_ID; ++ return create_vfs_object(parent, dentry, &data); ++} ++ ++/* ++ * implementation of vfs's rename method of struct inode_operations for typical ++ * directory is in inode_ops_rename.c ++ */ ++ ++/** ++ * reiser4_follow_link_common - follow_link of inode operations ++ * @dentry: dentry of symlink ++ * @data: ++ * ++ * This is common implementation of vfs's followlink method of struct ++ * inode_operations. ++ * Assumes that inode's i_private points to the content of symbolic link. ++ */ ++void *reiser4_follow_link_common(struct dentry *dentry, struct nameidata *nd) ++{ ++ assert("vs-851", S_ISLNK(dentry->d_inode->i_mode)); ++ ++ if (!dentry->d_inode->i_private ++ || !reiser4_inode_get_flag(dentry->d_inode, ++ REISER4_GENERIC_PTR_USED)) ++ return ERR_PTR(RETERR(-EINVAL)); ++ nd_set_link(nd, dentry->d_inode->i_private); ++ return NULL; ++} ++ ++/** ++ * reiser4_permission_common - permission of inode operations ++ * @inode: inode to check permissions for ++ * @mask: mode bits to check permissions for ++ * @nameidata: ++ * ++ * Uses generic function to check for rwx permissions. ++ */ ++int reiser4_permission_common(struct inode *inode, int mask, ++ struct nameidata *nameidata) ++{ ++ return generic_permission(inode, mask, NULL); ++} ++ ++static int setattr_reserve(reiser4_tree *); ++ ++/* this is common implementation of vfs's setattr method of struct ++ inode_operations ++*/ ++int reiser4_setattr_common(struct dentry *dentry, struct iattr *attr) ++{ ++ reiser4_context *ctx; ++ struct inode *inode; ++ int result; ++ ++ inode = dentry->d_inode; ++ result = inode_change_ok(inode, attr); ++ if (result) ++ return result; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ assert("nikita-3119", !(attr->ia_valid & ATTR_SIZE)); ++ ++ /* ++ * grab disk space and call standard inode_setattr(). ++ */ ++ result = setattr_reserve(reiser4_tree_by_inode(inode)); ++ if (!result) { ++ if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ++ || (attr->ia_valid & ATTR_GID ++ && attr->ia_gid != inode->i_gid)) { ++ result = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; ++ if (result) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ } ++ result = inode_setattr(inode, attr); ++ if (!result) ++ reiser4_update_sd(inode); ++ } ++ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* this is common implementation of vfs's getattr method of struct ++ inode_operations ++*/ ++int reiser4_getattr_common(struct vfsmount *mnt UNUSED_ARG, ++ struct dentry *dentry, struct kstat *stat) ++{ ++ struct inode *obj; ++ ++ assert("nikita-2298", dentry != NULL); ++ assert("nikita-2299", stat != NULL); ++ assert("nikita-2300", dentry->d_inode != NULL); ++ ++ obj = dentry->d_inode; ++ ++ stat->dev = obj->i_sb->s_dev; ++ stat->ino = oid_to_uino(get_inode_oid(obj)); ++ stat->mode = obj->i_mode; ++ /* don't confuse userland with huge nlink. This is not entirely ++ * correct, because nlink_t is not necessary 16 bit signed. */ ++ stat->nlink = min(obj->i_nlink, (typeof(obj->i_nlink)) 0x7fff); ++ stat->uid = obj->i_uid; ++ stat->gid = obj->i_gid; ++ stat->rdev = obj->i_rdev; ++ stat->atime = obj->i_atime; ++ stat->mtime = obj->i_mtime; ++ stat->ctime = obj->i_ctime; ++ stat->size = obj->i_size; ++ stat->blocks = ++ (inode_get_bytes(obj) + VFS_BLKSIZE - 1) >> VFS_BLKSIZE_BITS; ++ /* "preferred" blocksize for efficient file system I/O */ ++ stat->blksize = get_super_private(obj->i_sb)->optimal_io_size; ++ ++ return 0; ++} ++ ++/* Estimate the maximum amount of nodes which might be allocated or changed on ++ typical new object creation. Typical creation consists of calling create ++ method of file plugin, adding directory entry to parent and update parent ++ directory's stat data. ++*/ ++static reiser4_block_nr estimate_create_vfs_object(struct inode *parent, /* parent object */ ++ struct inode *object ++ /* object */ ) ++{ ++ assert("vpf-309", parent != NULL); ++ assert("vpf-307", object != NULL); ++ ++ return ++ /* object creation estimation */ ++ inode_file_plugin(object)->estimate.create(object) + ++ /* stat data of parent directory estimation */ ++ inode_file_plugin(parent)->estimate.update(parent) + ++ /* adding entry estimation */ ++ inode_dir_plugin(parent)->estimate.add_entry(parent) + ++ /* to undo in the case of failure */ ++ inode_dir_plugin(parent)->estimate.rem_entry(parent); ++} ++ ++/* Create child in directory. ++ ++ . get object's plugin ++ . get fresh inode ++ . initialize inode ++ . add object's stat-data ++ . initialize object's directory ++ . add entry to the parent ++ . instantiate dentry ++ ++*/ ++static int do_create_vfs_child(reiser4_object_create_data * data, /* parameters of new ++ object */ ++ struct inode **retobj) ++{ ++ int result; ++ ++ struct dentry *dentry; /* parent object */ ++ struct inode *parent; /* new name */ ++ ++ dir_plugin *par_dir; /* directory plugin on the parent */ ++ dir_plugin *obj_dir; /* directory plugin on the new object */ ++ file_plugin *obj_plug; /* object plugin on the new object */ ++ struct inode *object; /* new object */ ++ reiser4_block_nr reserve; ++ ++ reiser4_dir_entry_desc entry; /* new directory entry */ ++ ++ assert("nikita-1420", data != NULL); ++ parent = data->parent; ++ dentry = data->dentry; ++ ++ assert("nikita-1418", parent != NULL); ++ assert("nikita-1419", dentry != NULL); ++ ++ /* check, that name is acceptable for parent */ ++ par_dir = inode_dir_plugin(parent); ++ if (par_dir->is_name_acceptable && ++ !par_dir->is_name_acceptable(parent, ++ dentry->d_name.name, ++ (int)dentry->d_name.len)) ++ return RETERR(-ENAMETOOLONG); ++ ++ result = 0; ++ obj_plug = file_plugin_by_id((int)data->id); ++ if (obj_plug == NULL) { ++ warning("nikita-430", "Cannot find plugin %i", data->id); ++ return RETERR(-ENOENT); ++ } ++ object = new_inode(parent->i_sb); ++ if (object == NULL) ++ return RETERR(-ENOMEM); ++ /* we'll update i_nlink below */ ++ object->i_nlink = 0; ++ /* new_inode() initializes i_ino to "arbitrary" value. Reset it to 0, ++ * to simplify error handling: if some error occurs before i_ino is ++ * initialized with oid, i_ino should already be set to some ++ * distinguished value. */ ++ object->i_ino = 0; ++ ++ /* So that on error iput will be called. */ ++ *retobj = object; ++ ++ if (DQUOT_ALLOC_INODE(object)) { ++ DQUOT_DROP(object); ++ object->i_flags |= S_NOQUOTA; ++ return RETERR(-EDQUOT); ++ } ++ ++ memset(&entry, 0, sizeof entry); ++ entry.obj = object; ++ ++ set_plugin(&reiser4_inode_data(object)->pset, PSET_FILE, ++ file_plugin_to_plugin(obj_plug)); ++ result = obj_plug->set_plug_in_inode(object, parent, data); ++ if (result) { ++ warning("nikita-431", "Cannot install plugin %i on %llx", ++ data->id, (unsigned long long)get_inode_oid(object)); ++ DQUOT_FREE_INODE(object); ++ object->i_flags |= S_NOQUOTA; ++ return result; ++ } ++ ++ /* reget plugin after installation */ ++ obj_plug = inode_file_plugin(object); ++ ++ if (obj_plug->create_object == NULL) { ++ DQUOT_FREE_INODE(object); ++ object->i_flags |= S_NOQUOTA; ++ return RETERR(-EPERM); ++ } ++ ++ /* if any of hash, tail, sd or permission plugins for newly created ++ object are not set yet set them here inheriting them from parent ++ directory ++ */ ++ assert("nikita-2070", obj_plug->adjust_to_parent != NULL); ++ result = obj_plug->adjust_to_parent(object, ++ parent, ++ object->i_sb->s_root->d_inode); ++ if (result == 0) ++ result = finish_pset(object); ++ if (result != 0) { ++ warning("nikita-432", "Cannot inherit from %llx to %llx", ++ (unsigned long long)get_inode_oid(parent), ++ (unsigned long long)get_inode_oid(object)); ++ DQUOT_FREE_INODE(object); ++ object->i_flags |= S_NOQUOTA; ++ return result; ++ } ++ ++ /* setup inode and file-operations for this inode */ ++ setup_inode_ops(object, data); ++ ++ /* call file plugin's method to initialize plugin specific part of ++ * inode */ ++ if (obj_plug->init_inode_data) ++ obj_plug->init_inode_data(object, data, 1 /*create */ ); ++ ++ /* obtain directory plugin (if any) for new object. */ ++ obj_dir = inode_dir_plugin(object); ++ if (obj_dir != NULL && obj_dir->init == NULL) { ++ DQUOT_FREE_INODE(object); ++ object->i_flags |= S_NOQUOTA; ++ return RETERR(-EPERM); ++ } ++ ++ reiser4_inode_data(object)->locality_id = get_inode_oid(parent); ++ ++ reserve = estimate_create_vfs_object(parent, object); ++ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) { ++ DQUOT_FREE_INODE(object); ++ object->i_flags |= S_NOQUOTA; ++ return RETERR(-ENOSPC); ++ } ++ ++ /* mark inode `immutable'. We disable changes to the file being ++ created until valid directory entry for it is inserted. Otherwise, ++ if file were expanded and insertion of directory entry fails, we ++ have to remove file, but we only alloted enough space in ++ transaction to remove _empty_ file. 3.x code used to remove stat ++ data in different transaction thus possibly leaking disk space on ++ crash. This all only matters if it's possible to access file ++ without name, for example, by inode number ++ */ ++ reiser4_inode_set_flag(object, REISER4_IMMUTABLE); ++ ++ /* create empty object, this includes allocation of new objectid. For ++ directories this implies creation of dot and dotdot */ ++ assert("nikita-2265", reiser4_inode_get_flag(object, REISER4_NO_SD)); ++ ++ /* mark inode as `loaded'. From this point onward ++ reiser4_delete_inode() will try to remove its stat-data. */ ++ reiser4_inode_set_flag(object, REISER4_LOADED); ++ ++ result = obj_plug->create_object(object, parent, data); ++ if (result != 0) { ++ reiser4_inode_clr_flag(object, REISER4_IMMUTABLE); ++ if (result != -ENAMETOOLONG && result != -ENOMEM) ++ warning("nikita-2219", ++ "Failed to create sd for %llu", ++ (unsigned long long)get_inode_oid(object)); ++ DQUOT_FREE_INODE(object); ++ object->i_flags |= S_NOQUOTA; ++ return result; ++ } ++ ++ if (obj_dir != NULL) ++ result = obj_dir->init(object, parent, data); ++ if (result == 0) { ++ assert("nikita-434", !reiser4_inode_get_flag(object, ++ REISER4_NO_SD)); ++ /* insert inode into VFS hash table */ ++ insert_inode_hash(object); ++ /* create entry */ ++ result = par_dir->add_entry(parent, dentry, data, &entry); ++ if (result == 0) { ++ result = reiser4_add_nlink(object, parent, 0); ++ /* If O_CREAT is set and the file did not previously ++ exist, upon successful completion, open() shall ++ mark for update the st_atime, st_ctime, and ++ st_mtime fields of the file and the st_ctime and ++ st_mtime fields of the parent directory. --SUS ++ */ ++ /* @object times are already updated by ++ reiser4_add_nlink() */ ++ if (result == 0) ++ reiser4_update_dir(parent); ++ if (result != 0) ++ /* cleanup failure to add nlink */ ++ par_dir->rem_entry(parent, dentry, &entry); ++ } ++ if (result != 0) ++ /* cleanup failure to add entry */ ++ obj_plug->detach(object, parent); ++ } else if (result != -ENOMEM) ++ warning("nikita-2219", "Failed to initialize dir for %llu: %i", ++ (unsigned long long)get_inode_oid(object), result); ++ ++ /* ++ * update stat-data, committing all pending modifications to the inode ++ * fields. ++ */ ++ reiser4_update_sd(object); ++ if (result != 0) { ++ DQUOT_FREE_INODE(object); ++ object->i_flags |= S_NOQUOTA; ++ /* if everything was ok (result == 0), parent stat-data is ++ * already updated above (update_parent_dir()) */ ++ reiser4_update_sd(parent); ++ /* failure to create entry, remove object */ ++ obj_plug->delete_object(object); ++ } ++ ++ /* file has name now, clear immutable flag */ ++ reiser4_inode_clr_flag(object, REISER4_IMMUTABLE); ++ ++ /* on error, iput() will call ->delete_inode(). We should keep track ++ of the existence of stat-data for this inode and avoid attempt to ++ remove it in reiser4_delete_inode(). This is accomplished through ++ REISER4_NO_SD bit in inode.u.reiser4_i.plugin.flags ++ */ ++ return result; ++} ++ ++/* this is helper for common implementations of reiser4_mkdir, reiser4_create, ++ reiser4_mknod and reiser4_symlink ++*/ ++static int ++create_vfs_object(struct inode *parent, ++ struct dentry *dentry, reiser4_object_create_data * data) ++{ ++ reiser4_context *ctx; ++ int result; ++ struct inode *child; ++ ++ ctx = reiser4_init_context(parent->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ context_set_commit_async(ctx); ++ ++ data->parent = parent; ++ data->dentry = dentry; ++ child = NULL; ++ result = do_create_vfs_child(data, &child); ++ if (unlikely(result != 0)) { ++ if (child != NULL) { ++ reiser4_make_bad_inode(child); ++ iput(child); ++ } ++ } else ++ d_instantiate(dentry, child); ++ ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++/* helper for link_common. Estimate disk space necessary to add a link ++ from @parent to @object ++*/ ++static reiser4_block_nr common_estimate_link(struct inode *parent, /* parent directory */ ++ struct inode *object ++ /* object to which new link is being cerated */ ++ ) ++{ ++ reiser4_block_nr res = 0; ++ file_plugin *fplug; ++ dir_plugin *dplug; ++ ++ assert("vpf-317", object != NULL); ++ assert("vpf-318", parent != NULL); ++ ++ fplug = inode_file_plugin(object); ++ dplug = inode_dir_plugin(parent); ++ /* VS-FIXME-HANS: why do we do fplug->estimate.update(object) twice instead of multiplying by 2? */ ++ /* reiser4_add_nlink(object) */ ++ res += fplug->estimate.update(object); ++ /* add_entry(parent) */ ++ res += dplug->estimate.add_entry(parent); ++ /* reiser4_del_nlink(object) */ ++ res += fplug->estimate.update(object); ++ /* update_dir(parent) */ ++ res += inode_file_plugin(parent)->estimate.update(parent); ++ /* safe-link */ ++ res += estimate_one_item_removal(reiser4_tree_by_inode(object)); ++ ++ return res; ++} ++ ++/* Estimate disk space necessary to remove a link between @parent and ++ @object. ++*/ ++static reiser4_block_nr estimate_unlink(struct inode *parent, /* parent directory */ ++ struct inode *object ++ /* object to which new link is being cerated */ ++ ) ++{ ++ reiser4_block_nr res = 0; ++ file_plugin *fplug; ++ dir_plugin *dplug; ++ ++ assert("vpf-317", object != NULL); ++ assert("vpf-318", parent != NULL); ++ ++ fplug = inode_file_plugin(object); ++ dplug = inode_dir_plugin(parent); ++ ++ /* rem_entry(parent) */ ++ res += dplug->estimate.rem_entry(parent); ++ /* reiser4_del_nlink(object) */ ++ res += fplug->estimate.update(object); ++ /* update_dir(parent) */ ++ res += inode_file_plugin(parent)->estimate.update(parent); ++ /* fplug->unlink */ ++ res += fplug->estimate.unlink(object, parent); ++ /* safe-link */ ++ res += estimate_one_insert_item(reiser4_tree_by_inode(object)); ++ ++ return res; ++} ++ ++/* helper for reiser4_unlink_common. Estimate and grab space for unlink. */ ++static int unlink_check_and_grab(struct inode *parent, struct dentry *victim) ++{ ++ file_plugin *fplug; ++ struct inode *child; ++ int result; ++ ++ result = 0; ++ child = victim->d_inode; ++ fplug = inode_file_plugin(child); ++ ++ /* check for race with create_object() */ ++ if (reiser4_inode_get_flag(child, REISER4_IMMUTABLE)) ++ return RETERR(-E_REPEAT); ++ /* object being deleted should have stat data */ ++ assert("vs-949", !reiser4_inode_get_flag(child, REISER4_NO_SD)); ++ ++ /* ask object plugin */ ++ if (fplug->can_rem_link != NULL && !fplug->can_rem_link(child)) ++ return RETERR(-ENOTEMPTY); ++ ++ result = (int)estimate_unlink(parent, child); ++ if (result < 0) ++ return result; ++ ++ return reiser4_grab_reserved(child->i_sb, result, BA_CAN_COMMIT); ++} ++ ++/* helper for reiser4_setattr_common */ ++static int setattr_reserve(reiser4_tree * tree) ++{ ++ assert("vs-1096", is_grab_enabled(get_current_context())); ++ return reiser4_grab_space(estimate_one_insert_into_item(tree), ++ BA_CAN_COMMIT); ++} ++ ++/* helper function. Standards require that for many file-system operations ++ on success ctime and mtime of parent directory is to be updated. */ ++int reiser4_update_dir(struct inode *dir) ++{ ++ assert("nikita-2525", dir != NULL); ++ ++ dir->i_ctime = dir->i_mtime = CURRENT_TIME; ++ return reiser4_update_sd(dir); ++} +diff --git a/fs/reiser4/plugin/inode_ops_rename.c b/fs/reiser4/plugin/inode_ops_rename.c +new file mode 100644 +index 0000000..a64e777 +--- /dev/null ++++ b/fs/reiser4/plugin/inode_ops_rename.c +@@ -0,0 +1,914 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#include "../inode.h" ++#include "../safe_link.h" ++ ++static const char *possible_leak = "Possible disk space leak."; ++ ++/* re-bind existing name at @from_coord in @from_dir to point to @to_inode. ++ ++ Helper function called from hashed_rename() */ ++static int replace_name(struct inode *to_inode, /* inode where @from_coord is ++ * to be re-targeted at */ ++ struct inode *from_dir, /* directory where @from_coord ++ * lives */ ++ struct inode *from_inode, /* inode @from_coord ++ * originally point to */ ++ coord_t * from_coord, /* where directory entry is in ++ * the tree */ ++ lock_handle * from_lh /* lock handle on @from_coord */ ) ++{ ++ item_plugin *from_item; ++ int result; ++ znode *node; ++ ++ coord_clear_iplug(from_coord); ++ node = from_coord->node; ++ result = zload(node); ++ if (result != 0) ++ return result; ++ from_item = item_plugin_by_coord(from_coord); ++ if (plugin_of_group(item_plugin_by_coord(from_coord), ++ DIR_ENTRY_ITEM_TYPE)) ++ { ++ reiser4_key to_key; ++ ++ build_sd_key(to_inode, &to_key); ++ ++ /* everything is found and prepared to change directory entry ++ at @from_coord to point to @to_inode. ++ ++ @to_inode is just about to get new name, so bump its link ++ counter. ++ ++ */ ++ result = reiser4_add_nlink(to_inode, from_dir, 0); ++ if (result != 0) { ++ /* Don't issue warning: this may be plain -EMLINK */ ++ zrelse(node); ++ return result; ++ } ++ ++ result = ++ from_item->s.dir.update_key(from_coord, &to_key, from_lh); ++ if (result != 0) { ++ reiser4_del_nlink(to_inode, from_dir, 0); ++ zrelse(node); ++ return result; ++ } ++ ++ /* @from_inode just lost its name, he-he. ++ ++ If @from_inode was directory, it contained dotdot pointing ++ to @from_dir. @from_dir i_nlink will be decreased when ++ iput() will be called on @from_inode. ++ ++ If file-system is not ADG (hard-links are ++ supported on directories), iput(from_inode) will not remove ++ @from_inode, and thus above is incorrect, but hard-links on ++ directories are problematic in many other respects. ++ */ ++ result = reiser4_del_nlink(from_inode, from_dir, 0); ++ if (result != 0) { ++ warning("nikita-2330", ++ "Cannot remove link from source: %i. %s", ++ result, possible_leak); ++ } ++ /* Has to return success, because entry is already ++ * modified. */ ++ result = 0; ++ ++ /* NOTE-NIKITA consider calling plugin method in stead of ++ accessing inode fields directly. */ ++ from_dir->i_mtime = CURRENT_TIME; ++ } else { ++ warning("nikita-2326", "Unexpected item type"); ++ result = RETERR(-EIO); ++ } ++ zrelse(node); ++ return result; ++} ++ ++/* add new entry pointing to @inode into @dir at @coord, locked by @lh ++ ++ Helper function used by hashed_rename(). */ ++static int add_name(struct inode *inode, /* inode where @coord is to be ++ * re-targeted at */ ++ struct inode *dir, /* directory where @coord lives */ ++ struct dentry *name, /* new name */ ++ coord_t * coord, /* where directory entry is in the tree */ ++ lock_handle * lh, /* lock handle on @coord */ ++ int is_dir /* true, if @inode is directory */ ) ++{ ++ int result; ++ reiser4_dir_entry_desc entry; ++ ++ assert("nikita-2333", lh->node == coord->node); ++ assert("nikita-2334", is_dir == S_ISDIR(inode->i_mode)); ++ ++ memset(&entry, 0, sizeof entry); ++ entry.obj = inode; ++ /* build key of directory entry description */ ++ inode_dir_plugin(dir)->build_entry_key(dir, &name->d_name, &entry.key); ++ ++ /* ext2 does this in different order: first inserts new entry, ++ then increases directory nlink. We don't want do this, ++ because reiser4_add_nlink() calls ->add_link() plugin ++ method that can fail for whatever reason, leaving as with ++ cleanup problems. ++ */ ++ /* @inode is getting new name */ ++ reiser4_add_nlink(inode, dir, 0); ++ /* create @new_name in @new_dir pointing to ++ @old_inode */ ++ result = WITH_COORD(coord, ++ inode_dir_item_plugin(dir)->s.dir.add_entry(dir, ++ coord, ++ lh, ++ name, ++ &entry)); ++ if (result != 0) { ++ int result2; ++ result2 = reiser4_del_nlink(inode, dir, 0); ++ if (result2 != 0) { ++ warning("nikita-2327", ++ "Cannot drop link on %lli %i. %s", ++ (unsigned long long)get_inode_oid(inode), ++ result2, possible_leak); ++ } ++ } else ++ INODE_INC_FIELD(dir, i_size); ++ return result; ++} ++ ++static reiser4_block_nr estimate_rename(struct inode *old_dir, /* directory where @old is located */ ++ struct dentry *old_name, /* old name */ ++ struct inode *new_dir, /* directory where @new is located */ ++ struct dentry *new_name /* new name */ ) ++{ ++ reiser4_block_nr res1, res2; ++ dir_plugin *p_parent_old, *p_parent_new; ++ file_plugin *p_child_old, *p_child_new; ++ ++ assert("vpf-311", old_dir != NULL); ++ assert("vpf-312", new_dir != NULL); ++ assert("vpf-313", old_name != NULL); ++ assert("vpf-314", new_name != NULL); ++ ++ p_parent_old = inode_dir_plugin(old_dir); ++ p_parent_new = inode_dir_plugin(new_dir); ++ p_child_old = inode_file_plugin(old_name->d_inode); ++ if (new_name->d_inode) ++ p_child_new = inode_file_plugin(new_name->d_inode); ++ else ++ p_child_new = NULL; ++ ++ /* find_entry - can insert one leaf. */ ++ res1 = res2 = 1; ++ ++ /* replace_name */ ++ { ++ /* reiser4_add_nlink(p_child_old) and reiser4_del_nlink(p_child_old) */ ++ res1 += 2 * p_child_old->estimate.update(old_name->d_inode); ++ /* update key */ ++ res1 += 1; ++ /* reiser4_del_nlink(p_child_new) */ ++ if (p_child_new) ++ res1 += p_child_new->estimate.update(new_name->d_inode); ++ } ++ ++ /* else add_name */ ++ { ++ /* reiser4_add_nlink(p_parent_new) and reiser4_del_nlink(p_parent_new) */ ++ res2 += ++ 2 * inode_file_plugin(new_dir)->estimate.update(new_dir); ++ /* reiser4_add_nlink(p_parent_old) */ ++ res2 += p_child_old->estimate.update(old_name->d_inode); ++ /* add_entry(p_parent_new) */ ++ res2 += p_parent_new->estimate.add_entry(new_dir); ++ /* reiser4_del_nlink(p_parent_old) */ ++ res2 += p_child_old->estimate.update(old_name->d_inode); ++ } ++ ++ res1 = res1 < res2 ? res2 : res1; ++ ++ /* reiser4_write_sd(p_parent_new) */ ++ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir); ++ ++ /* reiser4_write_sd(p_child_new) */ ++ if (p_child_new) ++ res1 += p_child_new->estimate.update(new_name->d_inode); ++ ++ /* hashed_rem_entry(p_parent_old) */ ++ res1 += p_parent_old->estimate.rem_entry(old_dir); ++ ++ /* reiser4_del_nlink(p_child_old) */ ++ res1 += p_child_old->estimate.update(old_name->d_inode); ++ ++ /* replace_name */ ++ { ++ /* reiser4_add_nlink(p_parent_dir_new) */ ++ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir); ++ /* update_key */ ++ res1 += 1; ++ /* reiser4_del_nlink(p_parent_new) */ ++ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir); ++ /* reiser4_del_nlink(p_parent_old) */ ++ res1 += inode_file_plugin(old_dir)->estimate.update(old_dir); ++ } ++ ++ /* reiser4_write_sd(p_parent_old) */ ++ res1 += inode_file_plugin(old_dir)->estimate.update(old_dir); ++ ++ /* reiser4_write_sd(p_child_old) */ ++ res1 += p_child_old->estimate.update(old_name->d_inode); ++ ++ return res1; ++} ++ ++static int hashed_rename_estimate_and_grab(struct inode *old_dir, /* directory where @old is located */ ++ struct dentry *old_name, /* old name */ ++ struct inode *new_dir, /* directory where @new is located */ ++ struct dentry *new_name ++ /* new name */ ) ++{ ++ reiser4_block_nr reserve; ++ ++ reserve = estimate_rename(old_dir, old_name, new_dir, new_name); ++ ++ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) ++ return RETERR(-ENOSPC); ++ ++ return 0; ++} ++ ++/* check whether @old_inode and @new_inode can be moved within file system ++ * tree. This singles out attempts to rename pseudo-files, for example. */ ++static int can_rename(struct inode *old_dir, struct inode *old_inode, ++ struct inode *new_dir, struct inode *new_inode) ++{ ++ file_plugin *fplug; ++ dir_plugin *dplug; ++ ++ assert("nikita-3370", old_inode != NULL); ++ ++ dplug = inode_dir_plugin(new_dir); ++ fplug = inode_file_plugin(old_inode); ++ ++ if (dplug == NULL) ++ return RETERR(-ENOTDIR); ++ else if (new_dir->i_op->create == NULL) ++ return RETERR(-EPERM); ++ else if (!fplug->can_add_link(old_inode)) ++ return RETERR(-EMLINK); ++ else if (new_inode != NULL) { ++ fplug = inode_file_plugin(new_inode); ++ if (fplug->can_rem_link != NULL && ++ !fplug->can_rem_link(new_inode)) ++ return RETERR(-EBUSY); ++ } ++ return 0; ++} ++ ++int reiser4_find_entry(struct inode *, struct dentry *, lock_handle *, ++ znode_lock_mode, reiser4_dir_entry_desc *); ++int reiser4_update_dir(struct inode *); ++ ++/* this is common implementation of vfs's rename method of struct ++ inode_operations ++ See comments in the body. ++ ++ It is arguable that this function can be made generic so, that it ++ will be applicable to any kind of directory plugin that deals with ++ directories composed out of directory entries. The only obstacle ++ here is that we don't have any data-type to represent directory ++ entry. This should be re-considered when more than one different ++ directory plugin will be implemented. ++*/ ++int reiser4_rename_common(struct inode *old_dir /* directory where @old ++ * is located */ , ++ struct dentry *old_name /* old name */ , ++ struct inode *new_dir /* directory where @new ++ * is located */ , ++ struct dentry *new_name /* new name */ ) ++{ ++ /* From `The Open Group Base Specifications Issue 6' ++ ++ If either the old or new argument names a symbolic link, rename() ++ shall operate on the symbolic link itself, and shall not resolve ++ the last component of the argument. If the old argument and the new ++ argument resolve to the same existing file, rename() shall return ++ successfully and perform no other action. ++ ++ [this is done by VFS: vfs_rename()] ++ ++ If the old argument points to the pathname of a file that is not a ++ directory, the new argument shall not point to the pathname of a ++ directory. ++ ++ [checked by VFS: vfs_rename->may_delete()] ++ ++ If the link named by the new argument exists, it shall ++ be removed and old renamed to new. In this case, a link named new ++ shall remain visible to other processes throughout the renaming ++ operation and refer either to the file referred to by new or old ++ before the operation began. ++ ++ [we should assure this] ++ ++ Write access permission is required for ++ both the directory containing old and the directory containing new. ++ ++ [checked by VFS: vfs_rename->may_delete(), may_create()] ++ ++ If the old argument points to the pathname of a directory, the new ++ argument shall not point to the pathname of a file that is not a ++ directory. ++ ++ [checked by VFS: vfs_rename->may_delete()] ++ ++ If the directory named by the new argument exists, it ++ shall be removed and old renamed to new. In this case, a link named ++ new shall exist throughout the renaming operation and shall refer ++ either to the directory referred to by new or old before the ++ operation began. ++ ++ [we should assure this] ++ ++ If new names an existing directory, it shall be ++ required to be an empty directory. ++ ++ [we should check this] ++ ++ If the old argument points to a pathname of a symbolic link, the ++ symbolic link shall be renamed. If the new argument points to a ++ pathname of a symbolic link, the symbolic link shall be removed. ++ ++ The new pathname shall not contain a path prefix that names ++ old. Write access permission is required for the directory ++ containing old and the directory containing new. If the old ++ argument points to the pathname of a directory, write access ++ permission may be required for the directory named by old, and, if ++ it exists, the directory named by new. ++ ++ [checked by VFS: vfs_rename(), vfs_rename_dir()] ++ ++ If the link named by the new argument exists and the file's link ++ count becomes 0 when it is removed and no process has the file ++ open, the space occupied by the file shall be freed and the file ++ shall no longer be accessible. If one or more processes have the ++ file open when the last link is removed, the link shall be removed ++ before rename() returns, but the removal of the file contents shall ++ be postponed until all references to the file are closed. ++ ++ [iput() handles this, but we can do this manually, a la ++ reiser4_unlink()] ++ ++ Upon successful completion, rename() shall mark for update the ++ st_ctime and st_mtime fields of the parent directory of each file. ++ ++ [N/A] ++ ++ */ ++ reiser4_context *ctx; ++ int result; ++ int is_dir; /* is @old_name directory */ ++ ++ struct inode *old_inode; ++ struct inode *new_inode; ++ coord_t *new_coord; ++ ++ reiser4_dentry_fsdata *new_fsdata; ++ dir_plugin *dplug; ++ file_plugin *fplug; ++ ++ reiser4_dir_entry_desc *old_entry, *new_entry, *dotdot_entry; ++ lock_handle *new_lh, *dotdot_lh; ++ struct dentry *dotdot_name; ++ reiser4_dentry_fsdata *dataonstack; ++ ++ ctx = reiser4_init_context(old_dir->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ old_entry = kmalloc(3 * sizeof(*old_entry) + 2 * sizeof(*new_lh) + ++ sizeof(*dotdot_name) + sizeof(*dataonstack), ++ reiser4_ctx_gfp_mask_get()); ++ if (old_entry == NULL) { ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return RETERR(-ENOMEM); ++ } ++ memset(old_entry, 0, 3 * sizeof(*old_entry) + 2 * sizeof(*new_lh) + ++ sizeof(*dotdot_name) + sizeof(*dataonstack)); ++ ++ new_entry = old_entry + 1; ++ dotdot_entry = old_entry + 2; ++ new_lh = (lock_handle *)(old_entry + 3); ++ dotdot_lh = new_lh + 1; ++ dotdot_name = (struct dentry *)(new_lh + 2); ++ dataonstack = (reiser4_dentry_fsdata *)(dotdot_name + 1); ++ ++ assert("nikita-2318", old_dir != NULL); ++ assert("nikita-2319", new_dir != NULL); ++ assert("nikita-2320", old_name != NULL); ++ assert("nikita-2321", new_name != NULL); ++ ++ old_inode = old_name->d_inode; ++ new_inode = new_name->d_inode; ++ ++ dplug = inode_dir_plugin(old_dir); ++ fplug = NULL; ++ ++ new_fsdata = reiser4_get_dentry_fsdata(new_name); ++ if (IS_ERR(new_fsdata)) { ++ kfree(old_entry); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return PTR_ERR(new_fsdata); ++ } ++ ++ new_coord = &new_fsdata->dec.entry_coord; ++ coord_clear_iplug(new_coord); ++ ++ is_dir = S_ISDIR(old_inode->i_mode); ++ ++ assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir); ++ ++ /* if target is existing directory and it's not empty---return error. ++ ++ This check is done specifically, because is_dir_empty() requires ++ tree traversal and have to be done before locks are taken. ++ */ ++ if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0) { ++ kfree(old_entry); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return RETERR(-ENOTEMPTY); ++ } ++ ++ result = can_rename(old_dir, old_inode, new_dir, new_inode); ++ if (result != 0) { ++ kfree(old_entry); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ result = hashed_rename_estimate_and_grab(old_dir, old_name, ++ new_dir, new_name); ++ if (result != 0) { ++ kfree(old_entry); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ init_lh(new_lh); ++ ++ /* find entry for @new_name */ ++ result = reiser4_find_entry(new_dir, new_name, new_lh, ZNODE_WRITE_LOCK, ++ new_entry); ++ ++ if (IS_CBKERR(result)) { ++ done_lh(new_lh); ++ kfree(old_entry); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++ } ++ ++ reiser4_seal_done(&new_fsdata->dec.entry_seal); ++ ++ /* add or replace name for @old_inode as @new_name */ ++ if (new_inode != NULL) { ++ /* target (@new_name) exists. */ ++ /* Not clear what to do with objects that are ++ both directories and files at the same time. */ ++ if (result == CBK_COORD_FOUND) { ++ result = replace_name(old_inode, ++ new_dir, ++ new_inode, new_coord, new_lh); ++ if (result == 0) ++ fplug = inode_file_plugin(new_inode); ++ } else if (result == CBK_COORD_NOTFOUND) { ++ /* VFS told us that @new_name is bound to existing ++ inode, but we failed to find directory entry. */ ++ warning("nikita-2324", "Target not found"); ++ result = RETERR(-ENOENT); ++ } ++ } else { ++ /* target (@new_name) doesn't exists. */ ++ if (result == CBK_COORD_NOTFOUND) ++ result = add_name(old_inode, ++ new_dir, ++ new_name, new_coord, new_lh, is_dir); ++ else if (result == CBK_COORD_FOUND) { ++ /* VFS told us that @new_name is "negative" dentry, ++ but we found directory entry. */ ++ warning("nikita-2331", "Target found unexpectedly"); ++ result = RETERR(-EIO); ++ } ++ } ++ ++ assert("nikita-3462", ergo(result == 0, ++ old_inode->i_nlink >= 2 + !!is_dir)); ++ ++ /* We are done with all modifications to the @new_dir, release lock on ++ node. */ ++ done_lh(new_lh); ++ ++ if (fplug != NULL) { ++ /* detach @new_inode from name-space */ ++ result = fplug->detach(new_inode, new_dir); ++ if (result != 0) ++ warning("nikita-2330", "Cannot detach %lli: %i. %s", ++ (unsigned long long)get_inode_oid(new_inode), ++ result, possible_leak); ++ } ++ ++ if (new_inode != NULL) ++ reiser4_update_sd(new_inode); ++ ++ if (result == 0) { ++ old_entry->obj = old_inode; ++ ++ dplug->build_entry_key(old_dir, ++ &old_name->d_name, &old_entry->key); ++ ++ /* At this stage new name was introduced for ++ @old_inode. @old_inode, @new_dir, and @new_inode i_nlink ++ counters were updated. ++ ++ We want to remove @old_name now. If @old_inode wasn't ++ directory this is simple. ++ */ ++ result = dplug->rem_entry(old_dir, old_name, old_entry); ++ if (result != 0 && result != -ENOMEM) { ++ warning("nikita-2335", ++ "Cannot remove old name: %i", result); ++ } else { ++ result = reiser4_del_nlink(old_inode, old_dir, 0); ++ if (result != 0 && result != -ENOMEM) { ++ warning("nikita-2337", ++ "Cannot drop link on old: %i", result); ++ } ++ } ++ ++ if (result == 0 && is_dir) { ++ /* @old_inode is directory. We also have to update ++ dotdot entry. */ ++ coord_t *dotdot_coord; ++ ++ memset(dataonstack, 0, sizeof dataonstack); ++ memset(dotdot_entry, 0, sizeof dotdot_entry); ++ dotdot_entry->obj = old_dir; ++ memset(dotdot_name, 0, sizeof dotdot_name); ++ dotdot_name->d_name.name = ".."; ++ dotdot_name->d_name.len = 2; ++ /* ++ * allocate ->d_fsdata on the stack to avoid using ++ * reiser4_get_dentry_fsdata(). Locking is not needed, ++ * because dentry is private to the current thread. ++ */ ++ dotdot_name->d_fsdata = dataonstack; ++ init_lh(dotdot_lh); ++ ++ dotdot_coord = &dataonstack->dec.entry_coord; ++ coord_clear_iplug(dotdot_coord); ++ ++ result = reiser4_find_entry(old_inode, dotdot_name, ++ dotdot_lh, ZNODE_WRITE_LOCK, ++ dotdot_entry); ++ if (result == 0) { ++ /* replace_name() decreases i_nlink on ++ * @old_dir */ ++ result = replace_name(new_dir, ++ old_inode, ++ old_dir, ++ dotdot_coord, dotdot_lh); ++ } else ++ result = RETERR(-EIO); ++ done_lh(dotdot_lh); ++ } ++ } ++ reiser4_update_dir(new_dir); ++ reiser4_update_dir(old_dir); ++ reiser4_update_sd(old_inode); ++ if (result == 0) { ++ file_plugin *fplug; ++ ++ if (new_inode != NULL) { ++ /* add safe-link for target file (in case we removed ++ * last reference to the poor fellow */ ++ fplug = inode_file_plugin(new_inode); ++ if (new_inode->i_nlink == 0) ++ result = safe_link_add(new_inode, SAFE_UNLINK); ++ } ++ } ++ kfree(old_entry); ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++ ++#if 0 ++int reiser4_rename_common(struct inode *old_dir /* directory where @old ++ * is located */ , ++ struct dentry *old_name /* old name */ , ++ struct inode *new_dir /* directory where @new ++ * is located */ , ++ struct dentry *new_name /* new name */ ) ++{ ++ /* From `The Open Group Base Specifications Issue 6' ++ ++ If either the old or new argument names a symbolic link, rename() ++ shall operate on the symbolic link itself, and shall not resolve ++ the last component of the argument. If the old argument and the new ++ argument resolve to the same existing file, rename() shall return ++ successfully and perform no other action. ++ ++ [this is done by VFS: vfs_rename()] ++ ++ If the old argument points to the pathname of a file that is not a ++ directory, the new argument shall not point to the pathname of a ++ directory. ++ ++ [checked by VFS: vfs_rename->may_delete()] ++ ++ If the link named by the new argument exists, it shall ++ be removed and old renamed to new. In this case, a link named new ++ shall remain visible to other processes throughout the renaming ++ operation and refer either to the file referred to by new or old ++ before the operation began. ++ ++ [we should assure this] ++ ++ Write access permission is required for ++ both the directory containing old and the directory containing new. ++ ++ [checked by VFS: vfs_rename->may_delete(), may_create()] ++ ++ If the old argument points to the pathname of a directory, the new ++ argument shall not point to the pathname of a file that is not a ++ directory. ++ ++ [checked by VFS: vfs_rename->may_delete()] ++ ++ If the directory named by the new argument exists, it ++ shall be removed and old renamed to new. In this case, a link named ++ new shall exist throughout the renaming operation and shall refer ++ either to the directory referred to by new or old before the ++ operation began. ++ ++ [we should assure this] ++ ++ If new names an existing directory, it shall be ++ required to be an empty directory. ++ ++ [we should check this] ++ ++ If the old argument points to a pathname of a symbolic link, the ++ symbolic link shall be renamed. If the new argument points to a ++ pathname of a symbolic link, the symbolic link shall be removed. ++ ++ The new pathname shall not contain a path prefix that names ++ old. Write access permission is required for the directory ++ containing old and the directory containing new. If the old ++ argument points to the pathname of a directory, write access ++ permission may be required for the directory named by old, and, if ++ it exists, the directory named by new. ++ ++ [checked by VFS: vfs_rename(), vfs_rename_dir()] ++ ++ If the link named by the new argument exists and the file's link ++ count becomes 0 when it is removed and no process has the file ++ open, the space occupied by the file shall be freed and the file ++ shall no longer be accessible. If one or more processes have the ++ file open when the last link is removed, the link shall be removed ++ before rename() returns, but the removal of the file contents shall ++ be postponed until all references to the file are closed. ++ ++ [iput() handles this, but we can do this manually, a la ++ reiser4_unlink()] ++ ++ Upon successful completion, rename() shall mark for update the ++ st_ctime and st_mtime fields of the parent directory of each file. ++ ++ [N/A] ++ ++ */ ++ reiser4_context *ctx; ++ int result; ++ int is_dir; /* is @old_name directory */ ++ struct inode *old_inode; ++ struct inode *new_inode; ++ reiser4_dir_entry_desc old_entry; ++ reiser4_dir_entry_desc new_entry; ++ coord_t *new_coord; ++ reiser4_dentry_fsdata *new_fsdata; ++ lock_handle new_lh; ++ dir_plugin *dplug; ++ file_plugin *fplug; ++ ++ ctx = reiser4_init_context(old_dir->i_sb); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ assert("nikita-2318", old_dir != NULL); ++ assert("nikita-2319", new_dir != NULL); ++ assert("nikita-2320", old_name != NULL); ++ assert("nikita-2321", new_name != NULL); ++ ++ old_inode = old_name->d_inode; ++ new_inode = new_name->d_inode; ++ ++ dplug = inode_dir_plugin(old_dir); ++ fplug = NULL; ++ ++ new_fsdata = reiser4_get_dentry_fsdata(new_name); ++ if (IS_ERR(new_fsdata)) { ++ result = PTR_ERR(new_fsdata); ++ goto exit; ++ } ++ ++ new_coord = &new_fsdata->dec.entry_coord; ++ coord_clear_iplug(new_coord); ++ ++ is_dir = S_ISDIR(old_inode->i_mode); ++ ++ assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir); ++ ++ /* if target is existing directory and it's not empty---return error. ++ ++ This check is done specifically, because is_dir_empty() requires ++ tree traversal and have to be done before locks are taken. ++ */ ++ if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0) ++ return RETERR(-ENOTEMPTY); ++ ++ result = can_rename(old_dir, old_inode, new_dir, new_inode); ++ if (result != 0) ++ goto exit; ++ ++ result = hashed_rename_estimate_and_grab(old_dir, old_name, ++ new_dir, new_name); ++ if (result != 0) ++ goto exit; ++ ++ init_lh(&new_lh); ++ ++ /* find entry for @new_name */ ++ result = reiser4_find_entry(new_dir, new_name, &new_lh, ++ ZNODE_WRITE_LOCK, &new_entry); ++ ++ if (IS_CBKERR(result)) { ++ done_lh(&new_lh); ++ goto exit; ++ } ++ ++ reiser4_seal_done(&new_fsdata->dec.entry_seal); ++ ++ /* add or replace name for @old_inode as @new_name */ ++ if (new_inode != NULL) { ++ /* target (@new_name) exists. */ ++ /* Not clear what to do with objects that are ++ both directories and files at the same time. */ ++ if (result == CBK_COORD_FOUND) { ++ result = replace_name(old_inode, ++ new_dir, ++ new_inode, new_coord, &new_lh); ++ if (result == 0) ++ fplug = inode_file_plugin(new_inode); ++ } else if (result == CBK_COORD_NOTFOUND) { ++ /* VFS told us that @new_name is bound to existing ++ inode, but we failed to find directory entry. */ ++ warning("nikita-2324", "Target not found"); ++ result = RETERR(-ENOENT); ++ } ++ } else { ++ /* target (@new_name) doesn't exists. */ ++ if (result == CBK_COORD_NOTFOUND) ++ result = add_name(old_inode, ++ new_dir, ++ new_name, new_coord, &new_lh, is_dir); ++ else if (result == CBK_COORD_FOUND) { ++ /* VFS told us that @new_name is "negative" dentry, ++ but we found directory entry. */ ++ warning("nikita-2331", "Target found unexpectedly"); ++ result = RETERR(-EIO); ++ } ++ } ++ ++ assert("nikita-3462", ergo(result == 0, ++ old_inode->i_nlink >= 2 + !!is_dir)); ++ ++ /* We are done with all modifications to the @new_dir, release lock on ++ node. */ ++ done_lh(&new_lh); ++ ++ if (fplug != NULL) { ++ /* detach @new_inode from name-space */ ++ result = fplug->detach(new_inode, new_dir); ++ if (result != 0) ++ warning("nikita-2330", "Cannot detach %lli: %i. %s", ++ (unsigned long long)get_inode_oid(new_inode), ++ result, possible_leak); ++ } ++ ++ if (new_inode != NULL) ++ reiser4_update_sd(new_inode); ++ ++ if (result == 0) { ++ memset(&old_entry, 0, sizeof old_entry); ++ old_entry.obj = old_inode; ++ ++ dplug->build_entry_key(old_dir, ++ &old_name->d_name, &old_entry.key); ++ ++ /* At this stage new name was introduced for ++ @old_inode. @old_inode, @new_dir, and @new_inode i_nlink ++ counters were updated. ++ ++ We want to remove @old_name now. If @old_inode wasn't ++ directory this is simple. ++ */ ++ result = dplug->rem_entry(old_dir, old_name, &old_entry); ++ /*result = rem_entry_hashed(old_dir, old_name, &old_entry); */ ++ if (result != 0 && result != -ENOMEM) { ++ warning("nikita-2335", ++ "Cannot remove old name: %i", result); ++ } else { ++ result = reiser4_del_nlink(old_inode, old_dir, 0); ++ if (result != 0 && result != -ENOMEM) { ++ warning("nikita-2337", ++ "Cannot drop link on old: %i", result); ++ } ++ } ++ ++ if (result == 0 && is_dir) { ++ /* @old_inode is directory. We also have to update ++ dotdot entry. */ ++ coord_t *dotdot_coord; ++ lock_handle dotdot_lh; ++ struct dentry dotdot_name; ++ reiser4_dir_entry_desc dotdot_entry; ++ reiser4_dentry_fsdata dataonstack; ++ reiser4_dentry_fsdata *fsdata; ++ ++ memset(&dataonstack, 0, sizeof dataonstack); ++ memset(&dotdot_entry, 0, sizeof dotdot_entry); ++ dotdot_entry.obj = old_dir; ++ memset(&dotdot_name, 0, sizeof dotdot_name); ++ dotdot_name.d_name.name = ".."; ++ dotdot_name.d_name.len = 2; ++ /* ++ * allocate ->d_fsdata on the stack to avoid using ++ * reiser4_get_dentry_fsdata(). Locking is not needed, ++ * because dentry is private to the current thread. ++ */ ++ dotdot_name.d_fsdata = &dataonstack; ++ init_lh(&dotdot_lh); ++ ++ fsdata = &dataonstack; ++ dotdot_coord = &fsdata->dec.entry_coord; ++ coord_clear_iplug(dotdot_coord); ++ ++ result = reiser4_find_entry(old_inode, ++ &dotdot_name, ++ &dotdot_lh, ++ ZNODE_WRITE_LOCK, ++ &dotdot_entry); ++ if (result == 0) { ++ /* replace_name() decreases i_nlink on ++ * @old_dir */ ++ result = replace_name(new_dir, ++ old_inode, ++ old_dir, ++ dotdot_coord, &dotdot_lh); ++ } else ++ result = RETERR(-EIO); ++ done_lh(&dotdot_lh); ++ } ++ } ++ reiser4_update_dir(new_dir); ++ reiser4_update_dir(old_dir); ++ reiser4_update_sd(old_inode); ++ if (result == 0) { ++ file_plugin *fplug; ++ ++ if (new_inode != NULL) { ++ /* add safe-link for target file (in case we removed ++ * last reference to the poor fellow */ ++ fplug = inode_file_plugin(new_inode); ++ if (new_inode->i_nlink == 0) ++ result = safe_link_add(new_inode, SAFE_UNLINK); ++ } ++ } ++ exit: ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++ return result; ++} ++#endif +diff --git a/fs/reiser4/plugin/item/Makefile b/fs/reiser4/plugin/item/Makefile +new file mode 100644 +index 0000000..1bae623 +--- /dev/null ++++ b/fs/reiser4/plugin/item/Makefile +@@ -0,0 +1,18 @@ ++obj-$(CONFIG_REISER4_FS) += item_plugins.o ++ ++item_plugins-objs := \ ++ item.o \ ++ static_stat.o \ ++ sde.o \ ++ cde.o \ ++ blackbox.o \ ++ internal.o \ ++ tail.o \ ++ ctail.o \ ++ extent.o \ ++ extent_item_ops.o \ ++ extent_file_ops.o \ ++ extent_flush_ops.o ++ ++ ++ +diff --git a/fs/reiser4/plugin/item/acl.h b/fs/reiser4/plugin/item/acl.h +new file mode 100644 +index 0000000..f26762a +--- /dev/null ++++ b/fs/reiser4/plugin/item/acl.h +@@ -0,0 +1,66 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Directory entry. */ ++ ++#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ ) ++#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ ++ ++#include "../../forward.h" ++#include "../../dformat.h" ++#include "../../kassign.h" ++#include "../../key.h" ++ ++#include ++#include /* for struct dentry */ ++ ++typedef struct directory_entry_format { ++ /* key of object stat-data. It's not necessary to store whole ++ key here, because it's always key of stat-data, so minor ++ packing locality and offset can be omitted here. But this ++ relies on particular key allocation scheme for stat-data, so, ++ for extensibility sake, whole key can be stored here. ++ ++ We store key as array of bytes, because we don't want 8-byte ++ alignment of dir entries. ++ */ ++ obj_key_id id; ++ /* file name. Null terminated string. */ ++ d8 name[0]; ++} directory_entry_format; ++ ++void print_de(const char *prefix, coord_t * coord); ++int extract_key_de(const coord_t * coord, reiser4_key * key); ++int update_key_de(const coord_t * coord, const reiser4_key * key, ++ lock_handle * lh); ++char *extract_name_de(const coord_t * coord, char *buf); ++unsigned extract_file_type_de(const coord_t * coord); ++int add_entry_de(struct inode *dir, coord_t * coord, ++ lock_handle * lh, const struct dentry *name, ++ reiser4_dir_entry_desc * entry); ++int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord, ++ lock_handle * lh, reiser4_dir_entry_desc * entry); ++int max_name_len_de(const struct inode *dir); ++ ++int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length); ++ ++char *extract_dent_name(const coord_t * coord, ++ directory_entry_format * dent, char *buf); ++ ++#if REISER4_LARGE_KEY ++#define DE_NAME_BUF_LEN (24) ++#else ++#define DE_NAME_BUF_LEN (16) ++#endif ++ ++/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/blackbox.c b/fs/reiser4/plugin/item/blackbox.c +new file mode 100644 +index 0000000..f13ff64 +--- /dev/null ++++ b/fs/reiser4/plugin/item/blackbox.c +@@ -0,0 +1,142 @@ ++/* Copyright 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Black box item implementation */ ++ ++#include "../../forward.h" ++#include "../../debug.h" ++#include "../../dformat.h" ++#include "../../kassign.h" ++#include "../../coord.h" ++#include "../../tree.h" ++#include "../../lock.h" ++ ++#include "blackbox.h" ++#include "item.h" ++#include "../plugin.h" ++ ++int ++store_black_box(reiser4_tree * tree, ++ const reiser4_key * key, void *data, int length) ++{ ++ int result; ++ reiser4_item_data idata; ++ coord_t coord; ++ lock_handle lh; ++ ++ memset(&idata, 0, sizeof idata); ++ ++ idata.data = data; ++ idata.user = 0; ++ idata.length = length; ++ idata.iplug = item_plugin_by_id(BLACK_BOX_ID); ++ ++ init_lh(&lh); ++ result = insert_by_key(tree, key, ++ &idata, &coord, &lh, LEAF_LEVEL, CBK_UNIQUE); ++ ++ assert("nikita-3413", ++ ergo(result == 0, ++ WITH_COORD(&coord, ++ item_length_by_coord(&coord) == length))); ++ ++ done_lh(&lh); ++ return result; ++} ++ ++int ++load_black_box(reiser4_tree * tree, ++ reiser4_key * key, void *data, int length, int exact) ++{ ++ int result; ++ coord_t coord; ++ lock_handle lh; ++ ++ init_lh(&lh); ++ result = coord_by_key(tree, key, ++ &coord, &lh, ZNODE_READ_LOCK, ++ exact ? FIND_EXACT : FIND_MAX_NOT_MORE_THAN, ++ LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL); ++ ++ if (result == 0) { ++ int ilen; ++ ++ result = zload(coord.node); ++ if (result == 0) { ++ ilen = item_length_by_coord(&coord); ++ if (ilen <= length) { ++ memcpy(data, item_body_by_coord(&coord), ilen); ++ unit_key_by_coord(&coord, key); ++ } else if (exact) { ++ /* ++ * item is larger than buffer provided by the ++ * user. Only issue a warning if @exact is ++ * set. If @exact is false, we are iterating ++ * over all safe-links and here we are reaching ++ * the end of the iteration. ++ */ ++ warning("nikita-3415", ++ "Wrong black box length: %i > %i", ++ ilen, length); ++ result = RETERR(-EIO); ++ } ++ zrelse(coord.node); ++ } ++ } ++ ++ done_lh(&lh); ++ return result; ++ ++} ++ ++int ++update_black_box(reiser4_tree * tree, ++ const reiser4_key * key, void *data, int length) ++{ ++ int result; ++ coord_t coord; ++ lock_handle lh; ++ ++ init_lh(&lh); ++ result = coord_by_key(tree, key, ++ &coord, &lh, ZNODE_READ_LOCK, ++ FIND_EXACT, ++ LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL); ++ if (result == 0) { ++ int ilen; ++ ++ result = zload(coord.node); ++ if (result == 0) { ++ ilen = item_length_by_coord(&coord); ++ if (length <= ilen) { ++ memcpy(item_body_by_coord(&coord), data, ++ length); ++ } else { ++ warning("nikita-3437", ++ "Wrong black box length: %i < %i", ++ ilen, length); ++ result = RETERR(-EIO); ++ } ++ zrelse(coord.node); ++ } ++ } ++ ++ done_lh(&lh); ++ return result; ++ ++} ++ ++int kill_black_box(reiser4_tree * tree, const reiser4_key * key) ++{ ++ return reiser4_cut_tree(tree, key, key, NULL, 1); ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/blackbox.h b/fs/reiser4/plugin/item/blackbox.h +new file mode 100644 +index 0000000..f5b7af3 +--- /dev/null ++++ b/fs/reiser4/plugin/item/blackbox.h +@@ -0,0 +1,33 @@ ++/* Copyright 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* "Black box" entry to fixed-width contain user supplied data */ ++ ++#if !defined( __FS_REISER4_BLACK_BOX_H__ ) ++#define __FS_REISER4_BLACK_BOX_H__ ++ ++#include "../../forward.h" ++#include "../../dformat.h" ++#include "../../kassign.h" ++#include "../../key.h" ++ ++extern int store_black_box(reiser4_tree * tree, ++ const reiser4_key * key, void *data, int length); ++extern int load_black_box(reiser4_tree * tree, ++ reiser4_key * key, void *data, int length, int exact); ++extern int kill_black_box(reiser4_tree * tree, const reiser4_key * key); ++extern int update_black_box(reiser4_tree * tree, ++ const reiser4_key * key, void *data, int length); ++ ++/* __FS_REISER4_BLACK_BOX_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/cde.c b/fs/reiser4/plugin/item/cde.c +new file mode 100644 +index 0000000..05374ac +--- /dev/null ++++ b/fs/reiser4/plugin/item/cde.c +@@ -0,0 +1,1008 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Directory entry implementation */ ++ ++/* DESCRIPTION: ++ ++ This is "compound" directory item plugin implementation. This directory ++ item type is compound (as opposed to the "simple directory item" in ++ fs/reiser4/plugin/item/sde.[ch]), because it consists of several directory ++ entries. ++ ++ The reason behind this decision is disk space efficiency: all directory ++ entries inside the same directory have identical fragment in their ++ keys. This, of course, depends on key assignment policy. In our default key ++ assignment policy, all directory entries have the same locality which is ++ equal to the object id of their directory. ++ ++ Composing directory item out of several directory entries for the same ++ directory allows us to store said key fragment only once. That is, this is ++ some ad hoc form of key compression (stem compression) that is implemented ++ here, because general key compression is not supposed to be implemented in ++ v4.0. ++ ++ Another decision that was made regarding all directory item plugins, is ++ that they will store entry keys unaligned. This is for that sake of disk ++ space efficiency again. ++ ++ In should be noted, that storing keys unaligned increases CPU consumption, ++ at least on some architectures. ++ ++ Internal on-disk structure of the compound directory item is the following: ++ ++ HEADER cde_item_format. Here number of entries is stored. ++ ENTRY_HEADER_0 cde_unit_header. Here part of entry key and ++ ENTRY_HEADER_1 offset of entry body are stored. ++ ENTRY_HEADER_2 (basically two last parts of key) ++ ... ++ ENTRY_HEADER_N ++ ENTRY_BODY_0 directory_entry_format. Here part of stat data key and ++ ENTRY_BODY_1 NUL-terminated name are stored. ++ ENTRY_BODY_2 (part of statadta key in the ++ sence that since all SDs have ++ zero offset, this offset is not ++ stored on disk). ++ ... ++ ENTRY_BODY_N ++ ++ When it comes to the balancing, each directory entry in compound directory ++ item is unit, that is, something that can be cut from one item and pasted ++ into another item of the same type. Handling of unit cut and paste is major ++ reason for the complexity of code below. ++ ++*/ ++ ++#include "../../forward.h" ++#include "../../debug.h" ++#include "../../dformat.h" ++#include "../../kassign.h" ++#include "../../key.h" ++#include "../../coord.h" ++#include "sde.h" ++#include "cde.h" ++#include "item.h" ++#include "../node/node.h" ++#include "../plugin.h" ++#include "../../znode.h" ++#include "../../carry.h" ++#include "../../tree.h" ++#include "../../inode.h" ++ ++#include /* for struct inode */ ++#include /* for struct dentry */ ++#include ++ ++#if 0 ++#define CHECKME(coord) \ ++({ \ ++ const char *message; \ ++ coord_t dup; \ ++ \ ++ coord_dup_nocheck(&dup, (coord)); \ ++ dup.unit_pos = 0; \ ++ assert("nikita-2871", cde_check(&dup, &message) == 0); \ ++}) ++#else ++#define CHECKME(coord) noop ++#endif ++ ++/* return body of compound directory item at @coord */ ++static inline cde_item_format *formatted_at(const coord_t * coord) ++{ ++ assert("nikita-1282", coord != NULL); ++ return item_body_by_coord(coord); ++} ++ ++/* return entry header at @coord */ ++static inline cde_unit_header *header_at(const coord_t * ++ coord /* coord of item */ , ++ int idx /* index of unit */ ) ++{ ++ assert("nikita-1283", coord != NULL); ++ return &formatted_at(coord)->entry[idx]; ++} ++ ++/* return number of units in compound directory item at @coord */ ++static int units(const coord_t * coord /* coord of item */ ) ++{ ++ return le16_to_cpu(get_unaligned(&formatted_at(coord)->num_of_entries)); ++} ++ ++/* return offset of the body of @idx-th entry in @coord */ ++static unsigned int offset_of(const coord_t * coord /* coord of item */ , ++ int idx /* index of unit */ ) ++{ ++ if (idx < units(coord)) ++ return le16_to_cpu(get_unaligned(&header_at(coord, idx)->offset)); ++ else if (idx == units(coord)) ++ return item_length_by_coord(coord); ++ else ++ impossible("nikita-1308", "Wrong idx"); ++ return 0; ++} ++ ++/* set offset of the body of @idx-th entry in @coord */ ++static void set_offset(const coord_t * coord /* coord of item */ , ++ int idx /* index of unit */ , ++ unsigned int offset /* new offset */ ) ++{ ++ put_unaligned(cpu_to_le16((__u16) offset), &header_at(coord, idx)->offset); ++} ++ ++static void adj_offset(const coord_t * coord /* coord of item */ , ++ int idx /* index of unit */ , ++ int delta /* offset change */ ) ++{ ++ d16 *doffset; ++ __u16 offset; ++ ++ doffset = &header_at(coord, idx)->offset; ++ offset = le16_to_cpu(get_unaligned(doffset)); ++ offset += delta; ++ put_unaligned(cpu_to_le16((__u16) offset), doffset); ++} ++ ++/* return pointer to @offset-th byte from the beginning of @coord */ ++static char *address(const coord_t * coord /* coord of item */ , ++ int offset) ++{ ++ return ((char *)item_body_by_coord(coord)) + offset; ++} ++ ++/* return pointer to the body of @idx-th entry in @coord */ ++static directory_entry_format *entry_at(const coord_t * coord /* coord of ++ * item */ , ++ int idx /* index of unit */ ) ++{ ++ return (directory_entry_format *) address(coord, ++ (int)offset_of(coord, idx)); ++} ++ ++/* return number of unit referenced by @coord */ ++static int idx_of(const coord_t * coord /* coord of item */ ) ++{ ++ assert("nikita-1285", coord != NULL); ++ return coord->unit_pos; ++} ++ ++/* find position where entry with @entry_key would be inserted into @coord */ ++static int find(const coord_t * coord /* coord of item */ , ++ const reiser4_key * entry_key /* key to look for */ , ++ cmp_t * last /* result of last comparison */ ) ++{ ++ int entries; ++ ++ int left; ++ int right; ++ ++ cde_unit_header *header; ++ ++ assert("nikita-1295", coord != NULL); ++ assert("nikita-1296", entry_key != NULL); ++ assert("nikita-1297", last != NULL); ++ ++ entries = units(coord); ++ left = 0; ++ right = entries - 1; ++ while (right - left >= REISER4_SEQ_SEARCH_BREAK) { ++ int median; ++ ++ median = (left + right) >> 1; ++ ++ header = header_at(coord, median); ++ *last = de_id_key_cmp(&header->hash, entry_key); ++ switch (*last) { ++ case LESS_THAN: ++ left = median; ++ break; ++ case GREATER_THAN: ++ right = median; ++ break; ++ case EQUAL_TO:{ ++ do { ++ median--; ++ header--; ++ } while (median >= 0 && ++ de_id_key_cmp(&header->hash, ++ entry_key) == EQUAL_TO); ++ return median + 1; ++ } ++ } ++ } ++ header = header_at(coord, left); ++ for (; left < entries; ++left, ++header) { ++ prefetch(header + 1); ++ *last = de_id_key_cmp(&header->hash, entry_key); ++ if (*last != LESS_THAN) ++ break; ++ } ++ if (left < entries) ++ return left; ++ else ++ return RETERR(-ENOENT); ++ ++} ++ ++/* expand @coord as to accommodate for insertion of @no new entries starting ++ from @pos, with total bodies size @size. */ ++static int expand_item(const coord_t * coord /* coord of item */ , ++ int pos /* unit position */ , int no /* number of new ++ * units*/ , ++ int size /* total size of new units' data */ , ++ unsigned int data_size /* free space already reserved ++ * in the item for insertion */ ) ++{ ++ int entries; ++ cde_unit_header *header; ++ char *dent; ++ int i; ++ ++ assert("nikita-1310", coord != NULL); ++ assert("nikita-1311", pos >= 0); ++ assert("nikita-1312", no > 0); ++ assert("nikita-1313", data_size >= no * sizeof(directory_entry_format)); ++ assert("nikita-1343", ++ item_length_by_coord(coord) >= ++ (int)(size + data_size + no * sizeof *header)); ++ ++ entries = units(coord); ++ ++ if (pos == entries) ++ dent = address(coord, size); ++ else ++ dent = (char *)entry_at(coord, pos); ++ /* place where new header will be in */ ++ header = header_at(coord, pos); ++ /* free space for new entry headers */ ++ memmove(header + no, header, ++ (unsigned)(address(coord, size) - (char *)header)); ++ /* if adding to the end initialise first new header */ ++ if (pos == entries) { ++ set_offset(coord, pos, (unsigned)size); ++ } ++ ++ /* adjust entry pointer and size */ ++ dent = dent + no * sizeof *header; ++ size += no * sizeof *header; ++ /* free space for new entries */ ++ memmove(dent + data_size, dent, ++ (unsigned)(address(coord, size) - dent)); ++ ++ /* increase counter */ ++ entries += no; ++ put_unaligned(cpu_to_le16((__u16) entries), &formatted_at(coord)->num_of_entries); ++ ++ /* [ 0 ... pos ] entries were shifted by no * ( sizeof *header ) ++ bytes. */ ++ for (i = 0; i <= pos; ++i) ++ adj_offset(coord, i, no * sizeof *header); ++ /* [ pos + no ... +\infty ) entries were shifted by ( no * ++ sizeof *header + data_size ) bytes */ ++ for (i = pos + no; i < entries; ++i) ++ adj_offset(coord, i, no * sizeof *header + data_size); ++ return 0; ++} ++ ++/* insert new @entry into item */ ++static int expand(const coord_t * coord /* coord of item */ , ++ cde_entry * entry /* entry to insert */ , ++ int len /* length of @entry data */ , ++ int *pos /* position to insert */ , ++ reiser4_dir_entry_desc * dir_entry /* parameters for new ++ * entry */ ) ++{ ++ cmp_t cmp_res; ++ int datasize; ++ ++ *pos = find(coord, &dir_entry->key, &cmp_res); ++ if (*pos < 0) ++ *pos = units(coord); ++ ++ datasize = sizeof(directory_entry_format); ++ if (is_longname(entry->name->name, entry->name->len)) ++ datasize += entry->name->len + 1; ++ ++ expand_item(coord, *pos, 1, item_length_by_coord(coord) - len, ++ datasize); ++ return 0; ++} ++ ++/* paste body of @entry into item */ ++static int paste_entry(const coord_t * coord /* coord of item */ , ++ cde_entry * entry /* new entry */ , ++ int pos /* position to insert */ , ++ reiser4_dir_entry_desc * dir_entry /* parameters for ++ * new entry */ ) ++{ ++ cde_unit_header *header; ++ directory_entry_format *dent; ++ const char *name; ++ int len; ++ ++ header = header_at(coord, pos); ++ dent = entry_at(coord, pos); ++ ++ build_de_id_by_key(&dir_entry->key, &header->hash); ++ build_inode_key_id(entry->obj, &dent->id); ++ /* AUDIT unsafe strcpy() operation! It should be replaced with ++ much less CPU hungry ++ memcpy( ( char * ) dent -> name, entry -> name -> name , entry -> name -> len ); ++ ++ Also a more major thing is that there should be a way to figure out ++ amount of space in dent -> name and be able to check that we are ++ not going to overwrite more than we supposed to */ ++ name = entry->name->name; ++ len = entry->name->len; ++ if (is_longname(name, len)) { ++ strcpy((unsigned char *)dent->name, name); ++ put_unaligned(0, &dent->name[len]); ++ } ++ return 0; ++} ++ ++/* estimate how much space is necessary in item to insert/paste set of entries ++ described in @data. */ ++int estimate_cde(const coord_t * coord /* coord of item */ , ++ const reiser4_item_data * data /* parameters for new item */ ) ++{ ++ cde_entry_data *e; ++ int result; ++ int i; ++ ++ e = (cde_entry_data *) data->data; ++ ++ assert("nikita-1288", e != NULL); ++ assert("nikita-1289", e->num_of_entries >= 0); ++ ++ if (coord == NULL) ++ /* insert */ ++ result = sizeof(cde_item_format); ++ else ++ /* paste */ ++ result = 0; ++ ++ result += e->num_of_entries * ++ (sizeof(cde_unit_header) + sizeof(directory_entry_format)); ++ for (i = 0; i < e->num_of_entries; ++i) { ++ const char *name; ++ int len; ++ ++ name = e->entry[i].name->name; ++ len = e->entry[i].name->len; ++ assert("nikita-2054", strlen(name) == len); ++ if (is_longname(name, len)) ++ result += len + 1; ++ } ++ ((reiser4_item_data *) data)->length = result; ++ return result; ++} ++ ++/* ->nr_units() method for this item plugin. */ ++pos_in_node_t nr_units_cde(const coord_t * coord /* coord of item */ ) ++{ ++ return units(coord); ++} ++ ++/* ->unit_key() method for this item plugin. */ ++reiser4_key *unit_key_cde(const coord_t * coord /* coord of item */ , ++ reiser4_key * key /* resulting key */ ) ++{ ++ assert("nikita-1452", coord != NULL); ++ assert("nikita-1345", idx_of(coord) < units(coord)); ++ assert("nikita-1346", key != NULL); ++ ++ item_key_by_coord(coord, key); ++ extract_key_from_de_id(extract_dir_id_from_key(key), ++ &header_at(coord, idx_of(coord))->hash, key); ++ return key; ++} ++ ++/* mergeable_cde(): implementation of ->mergeable() item method. ++ ++ Two directory items are mergeable iff they are from the same ++ directory. That simple. ++ ++*/ ++int mergeable_cde(const coord_t * p1 /* coord of first item */ , ++ const coord_t * p2 /* coord of second item */ ) ++{ ++ reiser4_key k1; ++ reiser4_key k2; ++ ++ assert("nikita-1339", p1 != NULL); ++ assert("nikita-1340", p2 != NULL); ++ ++ return ++ (item_plugin_by_coord(p1) == item_plugin_by_coord(p2)) && ++ (extract_dir_id_from_key(item_key_by_coord(p1, &k1)) == ++ extract_dir_id_from_key(item_key_by_coord(p2, &k2))); ++ ++} ++ ++/* ->max_key_inside() method for this item plugin. */ ++reiser4_key *max_key_inside_cde(const coord_t * coord /* coord of item */ , ++ reiser4_key * result /* resulting key */ ) ++{ ++ assert("nikita-1342", coord != NULL); ++ ++ item_key_by_coord(coord, result); ++ set_key_ordering(result, get_key_ordering(reiser4_max_key())); ++ set_key_fulloid(result, get_key_fulloid(reiser4_max_key())); ++ set_key_offset(result, get_key_offset(reiser4_max_key())); ++ return result; ++} ++ ++/* @data contains data which are to be put into tree */ ++int can_contain_key_cde(const coord_t * coord /* coord of item */ , ++ const reiser4_key * key /* key to check */ , ++ const reiser4_item_data * data /* parameters of new ++ * item/unit being ++ * created */ ) ++{ ++ reiser4_key item_key; ++ ++ /* FIXME-VS: do not rely on anything but iplug field of @data. Only ++ data->iplug is initialized */ ++ assert("vs-457", data && data->iplug); ++/* assert( "vs-553", data -> user == 0 );*/ ++ item_key_by_coord(coord, &item_key); ++ ++ return (item_plugin_by_coord(coord) == data->iplug) && ++ (extract_dir_id_from_key(&item_key) == ++ extract_dir_id_from_key(key)); ++} ++ ++#if REISER4_DEBUG ++/* cde_check ->check() method for compressed directory items ++ ++ used for debugging, every item should have here the most complete ++ possible check of the consistency of the item that the inventor can ++ construct ++*/ ++int reiser4_check_cde(const coord_t * coord /* coord of item to check */, ++ const char **error /* where to store error message */) ++{ ++ int i; ++ int result; ++ char *item_start; ++ char *item_end; ++ reiser4_key key; ++ ++ coord_t c; ++ ++ assert("nikita-1357", coord != NULL); ++ assert("nikita-1358", error != NULL); ++ ++ if (!ergo(coord->item_pos != 0, ++ is_dot_key(item_key_by_coord(coord, &key)))) { ++ *error = "CDE doesn't start with dot"; ++ return -1; ++ } ++ item_start = item_body_by_coord(coord); ++ item_end = item_start + item_length_by_coord(coord); ++ ++ coord_dup(&c, coord); ++ result = 0; ++ for (i = 0; i < units(coord); ++i) { ++ directory_entry_format *entry; ++ ++ if ((char *)(header_at(coord, i) + 1) > ++ item_end - units(coord) * sizeof *entry) { ++ *error = "CDE header is out of bounds"; ++ result = -1; ++ break; ++ } ++ entry = entry_at(coord, i); ++ if ((char *)entry < item_start + sizeof(cde_item_format)) { ++ *error = "CDE header is too low"; ++ result = -1; ++ break; ++ } ++ if ((char *)(entry + 1) > item_end) { ++ *error = "CDE header is too high"; ++ result = -1; ++ break; ++ } ++ } ++ ++ return result; ++} ++#endif ++ ++/* ->init() method for this item plugin. */ ++int init_cde(coord_t * coord /* coord of item */ , ++ coord_t * from UNUSED_ARG, reiser4_item_data * data /* structure used for insertion */ ++ UNUSED_ARG) ++{ ++ put_unaligned(cpu_to_le16(0), &formatted_at(coord)->num_of_entries); ++ return 0; ++} ++ ++/* ->lookup() method for this item plugin. */ ++lookup_result lookup_cde(const reiser4_key * key /* key to search for */ , ++ lookup_bias bias /* search bias */ , ++ coord_t * coord /* coord of item to lookup in */ ) ++{ ++ cmp_t last_comp; ++ int pos; ++ ++ reiser4_key utmost_key; ++ ++ assert("nikita-1293", coord != NULL); ++ assert("nikita-1294", key != NULL); ++ ++ CHECKME(coord); ++ ++ if (keygt(item_key_by_coord(coord, &utmost_key), key)) { ++ coord->unit_pos = 0; ++ coord->between = BEFORE_UNIT; ++ return CBK_COORD_NOTFOUND; ++ } ++ pos = find(coord, key, &last_comp); ++ if (pos >= 0) { ++ coord->unit_pos = (int)pos; ++ switch (last_comp) { ++ case EQUAL_TO: ++ coord->between = AT_UNIT; ++ return CBK_COORD_FOUND; ++ case GREATER_THAN: ++ coord->between = BEFORE_UNIT; ++ return RETERR(-ENOENT); ++ case LESS_THAN: ++ default: ++ impossible("nikita-1298", "Broken find"); ++ return RETERR(-EIO); ++ } ++ } else { ++ coord->unit_pos = units(coord) - 1; ++ coord->between = AFTER_UNIT; ++ return (bias == ++ FIND_MAX_NOT_MORE_THAN) ? CBK_COORD_FOUND : ++ CBK_COORD_NOTFOUND; ++ } ++} ++ ++/* ->paste() method for this item plugin. */ ++int paste_cde(coord_t * coord /* coord of item */ , ++ reiser4_item_data * data /* parameters of new unit being ++ * inserted */ , ++ carry_plugin_info * info UNUSED_ARG /* todo carry queue */ ) ++{ ++ cde_entry_data *e; ++ int result; ++ int i; ++ ++ CHECKME(coord); ++ e = (cde_entry_data *) data->data; ++ ++ result = 0; ++ for (i = 0; i < e->num_of_entries; ++i) { ++ int pos; ++ int phantom_size; ++ ++ phantom_size = data->length; ++ if (units(coord) == 0) ++ phantom_size -= sizeof(cde_item_format); ++ ++ result = ++ expand(coord, e->entry + i, phantom_size, &pos, data->arg); ++ if (result != 0) ++ break; ++ result = paste_entry(coord, e->entry + i, pos, data->arg); ++ if (result != 0) ++ break; ++ } ++ CHECKME(coord); ++ return result; ++} ++ ++/* amount of space occupied by all entries starting from @idx both headers and ++ bodies. */ ++static unsigned int part_size(const coord_t * coord /* coord of item */ , ++ int idx /* index of unit */ ) ++{ ++ assert("nikita-1299", coord != NULL); ++ assert("nikita-1300", idx < (int)units(coord)); ++ ++ return sizeof(cde_item_format) + ++ (idx + 1) * sizeof(cde_unit_header) + offset_of(coord, ++ idx + 1) - ++ offset_of(coord, 0); ++} ++ ++/* how many but not more than @want units of @source can be merged with ++ item in @target node. If pend == append - we try to append last item ++ of @target by first units of @source. If pend == prepend - we try to ++ "prepend" first item in @target by last units of @source. @target ++ node has @free_space bytes of free space. Total size of those units ++ are returned via @size */ ++int can_shift_cde(unsigned free_space /* free space in item */ , ++ coord_t * coord /* coord of source item */ , ++ znode * target /* target node */ , ++ shift_direction pend /* shift direction */ , ++ unsigned *size /* resulting number of shifted bytes */ , ++ unsigned want /* maximal number of bytes to shift */ ) ++{ ++ int shift; ++ ++ CHECKME(coord); ++ if (want == 0) { ++ *size = 0; ++ return 0; ++ } ++ ++ /* pend == SHIFT_LEFT <==> shifting to the left */ ++ if (pend == SHIFT_LEFT) { ++ for (shift = min((int)want - 1, units(coord)); shift >= 0; ++ --shift) { ++ *size = part_size(coord, shift); ++ if (target != NULL) ++ *size -= sizeof(cde_item_format); ++ if (*size <= free_space) ++ break; ++ } ++ shift = shift + 1; ++ } else { ++ int total_size; ++ ++ assert("nikita-1301", pend == SHIFT_RIGHT); ++ ++ total_size = item_length_by_coord(coord); ++ for (shift = units(coord) - want - 1; shift < units(coord) - 1; ++ ++shift) { ++ *size = total_size - part_size(coord, shift); ++ if (target == NULL) ++ *size += sizeof(cde_item_format); ++ if (*size <= free_space) ++ break; ++ } ++ shift = units(coord) - shift - 1; ++ } ++ if (shift == 0) ++ *size = 0; ++ CHECKME(coord); ++ return shift; ++} ++ ++/* ->copy_units() method for this item plugin. */ ++void copy_units_cde(coord_t * target /* coord of target item */ , ++ coord_t * source /* coord of source item */ , ++ unsigned from /* starting unit */ , ++ unsigned count /* how many units to copy */ , ++ shift_direction where_is_free_space /* shift direction */ , ++ unsigned free_space /* free space in item */ ) ++{ ++ char *header_from; ++ char *header_to; ++ ++ char *entry_from; ++ char *entry_to; ++ ++ int pos_in_target; ++ int data_size; ++ int data_delta; ++ int i; ++ ++ assert("nikita-1303", target != NULL); ++ assert("nikita-1304", source != NULL); ++ assert("nikita-1305", (int)from < units(source)); ++ assert("nikita-1307", (int)(from + count) <= units(source)); ++ ++ if (where_is_free_space == SHIFT_LEFT) { ++ assert("nikita-1453", from == 0); ++ pos_in_target = units(target); ++ } else { ++ assert("nikita-1309", (int)(from + count) == units(source)); ++ pos_in_target = 0; ++ memmove(item_body_by_coord(target), ++ (char *)item_body_by_coord(target) + free_space, ++ item_length_by_coord(target) - free_space); ++ } ++ ++ CHECKME(target); ++ CHECKME(source); ++ ++ /* expand @target */ ++ data_size = ++ offset_of(source, (int)(from + count)) - offset_of(source, ++ (int)from); ++ ++ if (units(target) == 0) ++ free_space -= sizeof(cde_item_format); ++ ++ expand_item(target, pos_in_target, (int)count, ++ (int)(item_length_by_coord(target) - free_space), ++ (unsigned)data_size); ++ ++ /* copy first @count units of @source into @target */ ++ data_delta = ++ offset_of(target, pos_in_target) - offset_of(source, (int)from); ++ ++ /* copy entries */ ++ entry_from = (char *)entry_at(source, (int)from); ++ entry_to = (char *)entry_at(source, (int)(from + count)); ++ memmove(entry_at(target, pos_in_target), entry_from, ++ (unsigned)(entry_to - entry_from)); ++ ++ /* copy headers */ ++ header_from = (char *)header_at(source, (int)from); ++ header_to = (char *)header_at(source, (int)(from + count)); ++ memmove(header_at(target, pos_in_target), header_from, ++ (unsigned)(header_to - header_from)); ++ ++ /* update offsets */ ++ for (i = pos_in_target; i < (int)(pos_in_target + count); ++i) ++ adj_offset(target, i, data_delta); ++ CHECKME(target); ++ CHECKME(source); ++} ++ ++/* ->cut_units() method for this item plugin. */ ++int cut_units_cde(coord_t * coord /* coord of item */ , ++ pos_in_node_t from /* start unit pos */ , ++ pos_in_node_t to /* stop unit pos */ , ++ struct carry_cut_data *cdata UNUSED_ARG, ++ reiser4_key * smallest_removed, reiser4_key * new_first) ++{ ++ char *header_from; ++ char *header_to; ++ ++ char *entry_from; ++ char *entry_to; ++ ++ int size; ++ int entry_delta; ++ int header_delta; ++ int i; ++ ++ unsigned count; ++ ++ CHECKME(coord); ++ ++ count = to - from + 1; ++ ++ assert("nikita-1454", coord != NULL); ++ assert("nikita-1455", (int)(from + count) <= units(coord)); ++ ++ if (smallest_removed) ++ unit_key_by_coord(coord, smallest_removed); ++ ++ if (new_first) { ++ coord_t next; ++ ++ /* not everything is cut from item head */ ++ assert("vs-1527", from == 0); ++ assert("vs-1528", to < units(coord) - 1); ++ ++ coord_dup(&next, coord); ++ next.unit_pos++; ++ unit_key_by_coord(&next, new_first); ++ } ++ ++ size = item_length_by_coord(coord); ++ if (count == (unsigned)units(coord)) { ++ return size; ++ } ++ ++ header_from = (char *)header_at(coord, (int)from); ++ header_to = (char *)header_at(coord, (int)(from + count)); ++ ++ entry_from = (char *)entry_at(coord, (int)from); ++ entry_to = (char *)entry_at(coord, (int)(from + count)); ++ ++ /* move headers */ ++ memmove(header_from, header_to, ++ (unsigned)(address(coord, size) - header_to)); ++ ++ header_delta = header_to - header_from; ++ ++ entry_from -= header_delta; ++ entry_to -= header_delta; ++ size -= header_delta; ++ ++ /* copy entries */ ++ memmove(entry_from, entry_to, ++ (unsigned)(address(coord, size) - entry_to)); ++ ++ entry_delta = entry_to - entry_from; ++ size -= entry_delta; ++ ++ /* update offsets */ ++ ++ for (i = 0; i < (int)from; ++i) ++ adj_offset(coord, i, -header_delta); ++ ++ for (i = from; i < units(coord) - (int)count; ++i) ++ adj_offset(coord, i, -header_delta - entry_delta); ++ ++ put_unaligned(cpu_to_le16((__u16) units(coord) - count), ++ &formatted_at(coord)->num_of_entries); ++ ++ if (from == 0) { ++ /* entries from head was removed - move remaining to right */ ++ memmove((char *)item_body_by_coord(coord) + ++ header_delta + entry_delta, item_body_by_coord(coord), ++ (unsigned)size); ++ if (REISER4_DEBUG) ++ memset(item_body_by_coord(coord), 0, ++ (unsigned)header_delta + entry_delta); ++ } else { ++ /* freed space is already at the end of item */ ++ if (REISER4_DEBUG) ++ memset((char *)item_body_by_coord(coord) + size, 0, ++ (unsigned)header_delta + entry_delta); ++ } ++ ++ return header_delta + entry_delta; ++} ++ ++int kill_units_cde(coord_t * coord /* coord of item */ , ++ pos_in_node_t from /* start unit pos */ , ++ pos_in_node_t to /* stop unit pos */ , ++ struct carry_kill_data *kdata UNUSED_ARG, ++ reiser4_key * smallest_removed, reiser4_key * new_first) ++{ ++ return cut_units_cde(coord, from, to, NULL, smallest_removed, new_first); ++} ++ ++/* ->s.dir.extract_key() method for this item plugin. */ ++int extract_key_cde(const coord_t * coord /* coord of item */ , ++ reiser4_key * key /* resulting key */ ) ++{ ++ directory_entry_format *dent; ++ ++ assert("nikita-1155", coord != NULL); ++ assert("nikita-1156", key != NULL); ++ ++ dent = entry_at(coord, idx_of(coord)); ++ return extract_key_from_id(&dent->id, key); ++} ++ ++int ++update_key_cde(const coord_t * coord, const reiser4_key * key, ++ lock_handle * lh UNUSED_ARG) ++{ ++ directory_entry_format *dent; ++ obj_key_id obj_id; ++ int result; ++ ++ assert("nikita-2344", coord != NULL); ++ assert("nikita-2345", key != NULL); ++ ++ dent = entry_at(coord, idx_of(coord)); ++ result = build_obj_key_id(key, &obj_id); ++ if (result == 0) { ++ dent->id = obj_id; ++ znode_make_dirty(coord->node); ++ } ++ return 0; ++} ++ ++/* ->s.dir.extract_name() method for this item plugin. */ ++char *extract_name_cde(const coord_t * coord /* coord of item */ , char *buf) ++{ ++ directory_entry_format *dent; ++ ++ assert("nikita-1157", coord != NULL); ++ ++ dent = entry_at(coord, idx_of(coord)); ++ return extract_dent_name(coord, dent, buf); ++} ++ ++static int cde_bytes(int pasting, const reiser4_item_data * data) ++{ ++ int result; ++ ++ result = data->length; ++ if (!pasting) ++ result -= sizeof(cde_item_format); ++ return result; ++} ++ ++/* ->s.dir.add_entry() method for this item plugin */ ++int add_entry_cde(struct inode *dir /* directory object */ , ++ coord_t * coord /* coord of item */ , ++ lock_handle * lh /* lock handle for insertion */ , ++ const struct dentry *name /* name to insert */ , ++ reiser4_dir_entry_desc * dir_entry /* parameters of new ++ * directory entry */ ) ++{ ++ reiser4_item_data data; ++ cde_entry entry; ++ cde_entry_data edata; ++ int result; ++ ++ assert("nikita-1656", coord->node == lh->node); ++ assert("nikita-1657", znode_is_write_locked(coord->node)); ++ ++ edata.num_of_entries = 1; ++ edata.entry = &entry; ++ ++ entry.dir = dir; ++ entry.obj = dir_entry->obj; ++ entry.name = &name->d_name; ++ ++ data.data = (char *)&edata; ++ data.user = 0; /* &edata is not user space */ ++ data.iplug = item_plugin_by_id(COMPOUND_DIR_ID); ++ data.arg = dir_entry; ++ assert("nikita-1302", data.iplug != NULL); ++ ++ result = is_dot_key(&dir_entry->key); ++ data.length = estimate_cde(result ? coord : NULL, &data); ++ ++ /* NOTE-NIKITA quota plugin? */ ++ if (DQUOT_ALLOC_SPACE_NODIRTY(dir, cde_bytes(result, &data))) ++ return RETERR(-EDQUOT); ++ ++ if (result) ++ result = insert_by_coord(coord, &data, &dir_entry->key, lh, 0); ++ else ++ result = reiser4_resize_item(coord, &data, &dir_entry->key, ++ lh, 0); ++ return result; ++} ++ ++/* ->s.dir.rem_entry() */ ++int rem_entry_cde(struct inode *dir /* directory of item */ , ++ const struct qstr *name, coord_t * coord /* coord of item */ , ++ lock_handle * lh UNUSED_ARG /* lock handle for ++ * removal */ , ++ reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of ++ * directory entry ++ * being removed */ ) ++{ ++ coord_t shadow; ++ int result; ++ int length; ++ ON_DEBUG(char buf[DE_NAME_BUF_LEN]); ++ ++ assert("nikita-2870", strlen(name->name) == name->len); ++ assert("nikita-2869", ++ !strcmp(name->name, extract_name_cde(coord, buf))); ++ ++ length = sizeof(directory_entry_format) + sizeof(cde_unit_header); ++ if (is_longname(name->name, name->len)) ++ length += name->len + 1; ++ ++ if (inode_get_bytes(dir) < length) { ++ warning("nikita-2628", "Dir is broke: %llu: %llu", ++ (unsigned long long)get_inode_oid(dir), ++ inode_get_bytes(dir)); ++ ++ return RETERR(-EIO); ++ } ++ ++ /* cut_node() is supposed to take pointers to _different_ ++ coords, because it will modify them without respect to ++ possible aliasing. To work around this, create temporary copy ++ of @coord. ++ */ ++ coord_dup(&shadow, coord); ++ result = ++ kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0); ++ if (result == 0) { ++ /* NOTE-NIKITA quota plugin? */ ++ DQUOT_FREE_SPACE_NODIRTY(dir, length); ++ } ++ return result; ++} ++ ++/* ->s.dir.max_name_len() method for this item plugin */ ++int max_name_len_cde(const struct inode *dir /* directory */ ) ++{ ++ return ++ reiser4_tree_by_inode(dir)->nplug->max_item_size() - ++ sizeof(directory_entry_format) - sizeof(cde_item_format) - ++ sizeof(cde_unit_header) - 2; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/cde.h b/fs/reiser4/plugin/item/cde.h +new file mode 100644 +index 0000000..73a30d5 +--- /dev/null ++++ b/fs/reiser4/plugin/item/cde.h +@@ -0,0 +1,87 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Compound directory item. See cde.c for description. */ ++ ++#if !defined( __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ ) ++#define __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ ++ ++#include "../../forward.h" ++#include "../../kassign.h" ++#include "../../dformat.h" ++ ++#include /* for struct inode */ ++#include /* for struct dentry, etc */ ++ ++typedef struct cde_unit_header { ++ de_id hash; ++ d16 offset; ++} cde_unit_header; ++ ++typedef struct cde_item_format { ++ d16 num_of_entries; ++ cde_unit_header entry[0]; ++} cde_item_format; ++ ++typedef struct cde_entry { ++ const struct inode *dir; ++ const struct inode *obj; ++ const struct qstr *name; ++} cde_entry; ++ ++typedef struct cde_entry_data { ++ int num_of_entries; ++ cde_entry *entry; ++} cde_entry_data; ++ ++/* plugin->item.b.* */ ++reiser4_key *max_key_inside_cde(const coord_t * coord, reiser4_key * result); ++int can_contain_key_cde(const coord_t * coord, const reiser4_key * key, ++ const reiser4_item_data *); ++int mergeable_cde(const coord_t * p1, const coord_t * p2); ++pos_in_node_t nr_units_cde(const coord_t * coord); ++reiser4_key *unit_key_cde(const coord_t * coord, reiser4_key * key); ++int estimate_cde(const coord_t * coord, const reiser4_item_data * data); ++void print_cde(const char *prefix, coord_t * coord); ++int init_cde(coord_t * coord, coord_t * from, reiser4_item_data * data); ++lookup_result lookup_cde(const reiser4_key * key, lookup_bias bias, ++ coord_t * coord); ++int paste_cde(coord_t * coord, reiser4_item_data * data, ++ carry_plugin_info * info UNUSED_ARG); ++int can_shift_cde(unsigned free_space, coord_t * coord, znode * target, ++ shift_direction pend, unsigned *size, unsigned want); ++void copy_units_cde(coord_t * target, coord_t * source, unsigned from, ++ unsigned count, shift_direction where_is_free_space, ++ unsigned free_space); ++int cut_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ struct carry_cut_data *, reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++int kill_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ struct carry_kill_data *, reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++void print_cde(const char *prefix, coord_t * coord); ++int reiser4_check_cde(const coord_t * coord, const char **error); ++ ++/* plugin->u.item.s.dir.* */ ++int extract_key_cde(const coord_t * coord, reiser4_key * key); ++int update_key_cde(const coord_t * coord, const reiser4_key * key, ++ lock_handle * lh); ++char *extract_name_cde(const coord_t * coord, char *buf); ++int add_entry_cde(struct inode *dir, coord_t * coord, ++ lock_handle * lh, const struct dentry *name, ++ reiser4_dir_entry_desc * entry); ++int rem_entry_cde(struct inode *dir, const struct qstr *name, coord_t * coord, ++ lock_handle * lh, reiser4_dir_entry_desc * entry); ++int max_name_len_cde(const struct inode *dir); ++ ++/* __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/ctail.c b/fs/reiser4/plugin/item/ctail.c +new file mode 100644 +index 0000000..9cb8eca +--- /dev/null ++++ b/fs/reiser4/plugin/item/ctail.c +@@ -0,0 +1,1570 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* ctails (aka "clustered tails") are items for cryptcompress objects */ ++ ++/* DESCRIPTION: ++ ++Each cryptcompress object is stored on disk as a set of clusters sliced ++into ctails. ++ ++Internal on-disk structure: ++ ++ HEADER (1) Here stored disk cluster shift ++ BODY ++*/ ++ ++#include "../../forward.h" ++#include "../../debug.h" ++#include "../../dformat.h" ++#include "../../kassign.h" ++#include "../../key.h" ++#include "../../coord.h" ++#include "item.h" ++#include "../node/node.h" ++#include "../plugin.h" ++#include "../object.h" ++#include "../../znode.h" ++#include "../../carry.h" ++#include "../../tree.h" ++#include "../../inode.h" ++#include "../../super.h" ++#include "../../context.h" ++#include "../../page_cache.h" ++#include "../cluster.h" ++#include "../../flush.h" ++#include "../../tree_walk.h" ++ ++#include ++#include ++#include ++ ++/* return body of ctail item at @coord */ ++static ctail_item_format *ctail_formatted_at(const coord_t * coord) ++{ ++ assert("edward-60", coord != NULL); ++ return item_body_by_coord(coord); ++} ++ ++static int cluster_shift_by_coord(const coord_t * coord) ++{ ++ return get_unaligned(&ctail_formatted_at(coord)->cluster_shift); ++} ++ ++static inline void dclust_set_extension_shift(hint_t * hint) ++{ ++ assert("edward-1270", ++ item_id_by_coord(&hint->ext_coord.coord) == CTAIL_ID); ++ hint->ext_coord.extension.ctail.shift = ++ cluster_shift_by_coord(&hint->ext_coord.coord); ++} ++ ++static loff_t off_by_coord(const coord_t * coord) ++{ ++ reiser4_key key; ++ return get_key_offset(item_key_by_coord(coord, &key)); ++} ++ ++int coord_is_unprepped_ctail(const coord_t * coord) ++{ ++ assert("edward-1233", coord != NULL); ++ assert("edward-1234", item_id_by_coord(coord) == CTAIL_ID); ++ assert("edward-1235", ++ ergo((int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT, ++ nr_units_ctail(coord) == (pos_in_node_t) UCTAIL_NR_UNITS)); ++ ++ return (int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT; ++} ++ ++static cloff_t clust_by_coord(const coord_t * coord, struct inode *inode) ++{ ++ int shift; ++ ++ if (inode != NULL) { ++ shift = inode_cluster_shift(inode); ++ assert("edward-1236", ++ ergo(!coord_is_unprepped_ctail(coord), ++ shift == cluster_shift_by_coord(coord))); ++ } else { ++ assert("edward-1237", !coord_is_unprepped_ctail(coord)); ++ shift = cluster_shift_by_coord(coord); ++ } ++ return off_by_coord(coord) >> shift; ++} ++ ++static int disk_cluster_size(const coord_t * coord) ++{ ++ assert("edward-1156", ++ item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID)); ++ /* calculation of disk cluster size ++ is meaninless if ctail is unprepped */ ++ assert("edward-1238", !coord_is_unprepped_ctail(coord)); ++ ++ return 1 << cluster_shift_by_coord(coord); ++} ++ ++/* true if the key is of first disk cluster item */ ++static int is_disk_cluster_key(const reiser4_key * key, const coord_t * coord) ++{ ++ assert("edward-1239", item_id_by_coord(coord) == CTAIL_ID); ++ ++ return coord_is_unprepped_ctail(coord) || ++ ((get_key_offset(key) & ++ ((loff_t) disk_cluster_size(coord) - 1)) == 0); ++} ++ ++static char *first_unit(coord_t * coord) ++{ ++ /* FIXME: warning: pointer of type `void *' used in arithmetic */ ++ return (char *)item_body_by_coord(coord) + sizeof(ctail_item_format); ++} ++ ++/* plugin->u.item.b.max_key_inside : ++ tail_max_key_inside */ ++ ++/* plugin->u.item.b.can_contain_key */ ++int ++can_contain_key_ctail(const coord_t * coord, const reiser4_key * key, ++ const reiser4_item_data * data) ++{ ++ reiser4_key item_key; ++ ++ if (item_plugin_by_coord(coord) != data->iplug) ++ return 0; ++ ++ item_key_by_coord(coord, &item_key); ++ if (get_key_locality(key) != get_key_locality(&item_key) || ++ get_key_objectid(key) != get_key_objectid(&item_key)) ++ return 0; ++ if (get_key_offset(&item_key) + nr_units_ctail(coord) != ++ get_key_offset(key)) ++ return 0; ++ if (is_disk_cluster_key(key, coord)) ++ return 0; ++ return 1; ++} ++ ++/* plugin->u.item.b.mergeable ++ c-tails of different clusters are not mergeable */ ++int mergeable_ctail(const coord_t * p1, const coord_t * p2) ++{ ++ reiser4_key key1, key2; ++ ++ assert("edward-62", item_id_by_coord(p1) == CTAIL_ID); ++ assert("edward-61", plugin_of_group(item_plugin_by_coord(p1), ++ UNIX_FILE_METADATA_ITEM_TYPE)); ++ ++ if (item_id_by_coord(p2) != CTAIL_ID) { ++ /* second item is of another type */ ++ return 0; ++ } ++ ++ item_key_by_coord(p1, &key1); ++ item_key_by_coord(p2, &key2); ++ if (get_key_locality(&key1) != get_key_locality(&key2) || ++ get_key_objectid(&key1) != get_key_objectid(&key2) || ++ get_key_type(&key1) != get_key_type(&key2)) { ++ /* items of different objects */ ++ return 0; ++ } ++ if (get_key_offset(&key1) + nr_units_ctail(p1) != get_key_offset(&key2)) ++ /* not adjacent items */ ++ return 0; ++ if (is_disk_cluster_key(&key2, p2)) ++ return 0; ++ return 1; ++} ++ ++/* plugin->u.item.b.nr_units */ ++pos_in_node_t nr_units_ctail(const coord_t * coord) ++{ ++ return (item_length_by_coord(coord) - ++ sizeof(ctail_formatted_at(coord)->cluster_shift)); ++} ++ ++/* plugin->u.item.b.estimate: ++ estimate how much space is needed to insert/paste @data->length bytes ++ into ctail at @coord */ ++int estimate_ctail(const coord_t * coord /* coord of item */ , ++ const reiser4_item_data * ++ data /* parameters for new item */ ) ++{ ++ if (coord == NULL) ++ /* insert */ ++ return (sizeof(ctail_item_format) + data->length); ++ else ++ /* paste */ ++ return data->length; ++} ++ ++/* ->init() method for this item plugin. */ ++int init_ctail(coord_t * to /* coord of item */ , ++ coord_t * from /* old_item */ , ++ reiser4_item_data * data /* structure used for insertion */ ) ++{ ++ int cluster_shift; /* cpu value to convert */ ++ ++ if (data) { ++ assert("edward-463", data->length > sizeof(ctail_item_format)); ++ cluster_shift = *((int *)(data->arg)); ++ data->length -= sizeof(ctail_item_format); ++ } else { ++ assert("edward-464", from != NULL); ++ assert("edward-855", ctail_ok(from)); ++ cluster_shift = (int)(cluster_shift_by_coord(from)); ++ } ++ put_unaligned((d8)cluster_shift, &ctail_formatted_at(to)->cluster_shift); ++ assert("edward-856", ctail_ok(to)); ++ return 0; ++} ++ ++/* plugin->u.item.b.lookup: ++ NULL: We are looking for item keys only */ ++ ++#if REISER4_DEBUG ++int ctail_ok(const coord_t * coord) ++{ ++ return coord_is_unprepped_ctail(coord) || ++ cluster_shift_ok(cluster_shift_by_coord(coord)); ++} ++ ++/* plugin->u.item.b.check */ ++int check_ctail(const coord_t * coord, const char **error) ++{ ++ if (!ctail_ok(coord)) { ++ if (error) ++ *error = "bad cluster shift in ctail"; ++ return 1; ++ } ++ return 0; ++} ++#endif ++ ++/* plugin->u.item.b.paste */ ++int ++paste_ctail(coord_t * coord, reiser4_item_data * data, ++ carry_plugin_info * info UNUSED_ARG) ++{ ++ unsigned old_nr_units; ++ ++ assert("edward-268", data->data != NULL); ++ /* copy only from kernel space */ ++ assert("edward-66", data->user == 0); ++ ++ old_nr_units = ++ item_length_by_coord(coord) - sizeof(ctail_item_format) - ++ data->length; ++ ++ /* ctail items never get pasted in the middle */ ++ ++ if (coord->unit_pos == 0 && coord->between == AT_UNIT) { ++ ++ /* paste at the beginning when create new item */ ++ assert("edward-450", ++ item_length_by_coord(coord) == ++ data->length + sizeof(ctail_item_format)); ++ assert("edward-451", old_nr_units == 0); ++ } else if (coord->unit_pos == old_nr_units - 1 ++ && coord->between == AFTER_UNIT) { ++ ++ /* paste at the end */ ++ coord->unit_pos++; ++ } else ++ impossible("edward-453", "bad paste position"); ++ ++ memcpy(first_unit(coord) + coord->unit_pos, data->data, data->length); ++ ++ assert("edward-857", ctail_ok(coord)); ++ ++ return 0; ++} ++ ++/* plugin->u.item.b.fast_paste */ ++ ++/* plugin->u.item.b.can_shift ++ number of units is returned via return value, number of bytes via @size. For ++ ctail items they coincide */ ++int ++can_shift_ctail(unsigned free_space, coord_t * source, ++ znode * target, shift_direction direction UNUSED_ARG, ++ unsigned *size /* number of bytes */ , unsigned want) ++{ ++ /* make sure that that we do not want to shift more than we have */ ++ assert("edward-68", want > 0 && want <= nr_units_ctail(source)); ++ ++ *size = min(want, free_space); ++ ++ if (!target) { ++ /* new item will be created */ ++ if (*size <= sizeof(ctail_item_format)) { ++ *size = 0; ++ return 0; ++ } ++ return *size - sizeof(ctail_item_format); ++ } ++ return *size; ++} ++ ++/* plugin->u.item.b.copy_units ++ cooperates with ->can_shift() */ ++void ++copy_units_ctail(coord_t * target, coord_t * source, ++ unsigned from, unsigned count /* units */ , ++ shift_direction where_is_free_space, ++ unsigned free_space /* bytes */ ) ++{ ++ /* make sure that item @target is expanded already */ ++ assert("edward-69", (unsigned)item_length_by_coord(target) >= count); ++ assert("edward-70", free_space == count || free_space == count + 1); ++ ++ assert("edward-858", ctail_ok(source)); ++ ++ if (where_is_free_space == SHIFT_LEFT) { ++ /* append item @target with @count first bytes of @source: ++ this restriction came from ordinary tails */ ++ assert("edward-71", from == 0); ++ assert("edward-860", ctail_ok(target)); ++ ++ memcpy(first_unit(target) + nr_units_ctail(target) - count, ++ first_unit(source), count); ++ } else { ++ /* target item is moved to right already */ ++ reiser4_key key; ++ ++ assert("edward-72", nr_units_ctail(source) == from + count); ++ ++ if (free_space == count) { ++ init_ctail(target, source, NULL); ++ } else { ++ /* new item has been created */ ++ assert("edward-862", ctail_ok(target)); ++ } ++ memcpy(first_unit(target), first_unit(source) + from, count); ++ ++ assert("edward-863", ctail_ok(target)); ++ ++ /* new units are inserted before first unit in an item, ++ therefore, we have to update item key */ ++ item_key_by_coord(source, &key); ++ set_key_offset(&key, get_key_offset(&key) + from); ++ ++ node_plugin_by_node(target->node)->update_item_key(target, &key, ++ NULL /*info */); ++ } ++} ++ ++/* plugin->u.item.b.create_hook */ ++int create_hook_ctail(const coord_t * coord, void *arg) ++{ ++ assert("edward-864", znode_is_loaded(coord->node)); ++ ++ znode_set_convertible(coord->node); ++ return 0; ++} ++ ++/* plugin->u.item.b.kill_hook */ ++int ++kill_hook_ctail(const coord_t * coord, pos_in_node_t from, pos_in_node_t count, ++ carry_kill_data * kdata) ++{ ++ struct inode *inode; ++ ++ assert("edward-1157", item_id_by_coord(coord) == CTAIL_ID); ++ assert("edward-291", znode_is_write_locked(coord->node)); ++ ++ inode = kdata->inode; ++ if (inode) { ++ reiser4_key key; ++ item_key_by_coord(coord, &key); ++ ++ if (from == 0 && is_disk_cluster_key(&key, coord)) { ++ /* disk cluster is killed */ ++ cloff_t start = ++ off_to_clust(get_key_offset(&key), inode); ++ truncate_page_cluster_cryptcompress(inode, start, ++ kdata->params.truncate); ++ inode_sub_bytes(inode, inode_cluster_size(inode)); ++ } ++ } ++ return 0; ++} ++ ++/* for shift_hook_ctail(), ++ return true if the first disk cluster item has dirty child ++*/ ++static int ctail_convertible(const coord_t * coord) ++{ ++ int result; ++ reiser4_key key; ++ jnode *child = NULL; ++ ++ assert("edward-477", coord != NULL); ++ assert("edward-478", item_id_by_coord(coord) == CTAIL_ID); ++ ++ if (coord_is_unprepped_ctail(coord)) ++ /* unprepped ctail should be converted */ ++ return 1; ++ ++ item_key_by_coord(coord, &key); ++ child = jlookup(current_tree, ++ get_key_objectid(&key), ++ off_to_pg(off_by_coord(coord))); ++ if (!child) ++ return 0; ++ result = JF_ISSET(child, JNODE_DIRTY); ++ jput(child); ++ return result; ++} ++ ++/* FIXME-EDWARD */ ++/* plugin->u.item.b.shift_hook */ ++int shift_hook_ctail(const coord_t * item /* coord of item */ , ++ unsigned from UNUSED_ARG /* start unit */ , ++ unsigned count UNUSED_ARG /* stop unit */ , ++ znode * old_node /* old parent */ ) ++{ ++ assert("edward-479", item != NULL); ++ assert("edward-480", item->node != old_node); ++ ++ if (!znode_convertible(old_node) || znode_convertible(item->node)) ++ return 0; ++ if (ctail_convertible(item)) ++ znode_set_convertible(item->node); ++ return 0; ++} ++ ++static int ++cut_or_kill_ctail_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ int cut, void *p, reiser4_key * smallest_removed, ++ reiser4_key * new_first) ++{ ++ pos_in_node_t count; /* number of units to cut */ ++ char *item; ++ ++ count = to - from + 1; ++ item = item_body_by_coord(coord); ++ ++ assert("edward-74", ergo(from != 0, to == coord_last_unit_pos(coord))); ++ ++ if (smallest_removed) { ++ /* store smallest key removed */ ++ item_key_by_coord(coord, smallest_removed); ++ set_key_offset(smallest_removed, ++ get_key_offset(smallest_removed) + from); ++ } ++ ++ if (new_first) { ++ assert("vs-1531", from == 0); ++ ++ item_key_by_coord(coord, new_first); ++ set_key_offset(new_first, ++ get_key_offset(new_first) + from + count); ++ } ++ ++ if (!cut) ++ kill_hook_ctail(coord, from, 0, (struct carry_kill_data *)p); ++ ++ if (from == 0) { ++ if (count != nr_units_ctail(coord)) { ++ /* part of item is removed, so move free space at the beginning ++ of the item and update item key */ ++ reiser4_key key; ++ memcpy(item + to + 1, item, sizeof(ctail_item_format)); ++ item_key_by_coord(coord, &key); ++ set_key_offset(&key, get_key_offset(&key) + count); ++ node_plugin_by_node(coord->node)->update_item_key(coord, ++ &key, ++ NULL); ++ } else { ++ /* cut_units should not be called to cut evrything */ ++ assert("vs-1532", ergo(cut, 0)); ++ /* whole item is cut, so more then amount of space occupied ++ by units got freed */ ++ count += sizeof(ctail_item_format); ++ } ++ if (REISER4_DEBUG) ++ memset(item, 0, count); ++ } else if (REISER4_DEBUG) ++ memset(item + sizeof(ctail_item_format) + from, 0, count); ++ return count; ++} ++ ++/* plugin->u.item.b.cut_units */ ++int ++cut_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to, ++ carry_cut_data * cdata, reiser4_key * smallest_removed, ++ reiser4_key * new_first) ++{ ++ return cut_or_kill_ctail_units(item, from, to, 1, NULL, ++ smallest_removed, new_first); ++} ++ ++/* plugin->u.item.b.kill_units */ ++int ++kill_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to, ++ struct carry_kill_data *kdata, reiser4_key * smallest_removed, ++ reiser4_key * new_first) ++{ ++ return cut_or_kill_ctail_units(item, from, to, 0, kdata, ++ smallest_removed, new_first); ++} ++ ++/* plugin->u.item.s.file.read */ ++int read_ctail(struct file *file UNUSED_ARG, flow_t * f, hint_t * hint) ++{ ++ uf_coord_t *uf_coord; ++ coord_t *coord; ++ ++ uf_coord = &hint->ext_coord; ++ coord = &uf_coord->coord; ++ assert("edward-127", f->user == 0); ++ assert("edward-129", coord && coord->node); ++ assert("edward-130", coord_is_existing_unit(coord)); ++ assert("edward-132", znode_is_loaded(coord->node)); ++ ++ /* start read only from the beginning of ctail */ ++ assert("edward-133", coord->unit_pos == 0); ++ /* read only whole ctails */ ++ assert("edward-135", nr_units_ctail(coord) <= f->length); ++ ++ assert("edward-136", reiser4_schedulable()); ++ assert("edward-886", ctail_ok(coord)); ++ ++ if (f->data) ++ memcpy(f->data, (char *)first_unit(coord), ++ (size_t) nr_units_ctail(coord)); ++ ++ dclust_set_extension_shift(hint); ++ mark_page_accessed(znode_page(coord->node)); ++ move_flow_forward(f, nr_units_ctail(coord)); ++ ++ return 0; ++} ++ ++/* Reads a disk cluster consists of ctail items, ++ attaches a transform stream with plain text */ ++int ctail_read_disk_cluster(reiser4_cluster_t * clust, struct inode *inode, ++ znode_lock_mode mode) ++{ ++ int result; ++ assert("edward-1450", mode == ZNODE_READ_LOCK || ZNODE_WRITE_LOCK); ++ assert("edward-671", clust->hint != NULL); ++ assert("edward-140", clust->dstat == INVAL_DISK_CLUSTER); ++ assert("edward-672", cryptcompress_inode_ok(inode)); ++ ++ /* set input stream */ ++ result = grab_tfm_stream(inode, &clust->tc, INPUT_STREAM); ++ if (result) ++ return result; ++ ++ result = find_disk_cluster(clust, inode, 1 /* read items */, mode); ++ assert("edward-1340", !result); ++ if (result) ++ return result; ++ if (mode == ZNODE_READ_LOCK) ++ /* write still need the lock to insert unprepped ++ items, etc... */ ++ put_hint_cluster(clust, inode, ZNODE_READ_LOCK); ++ ++ if (clust->dstat == FAKE_DISK_CLUSTER || ++ clust->dstat == UNPR_DISK_CLUSTER) { ++ tfm_cluster_set_uptodate(&clust->tc); ++ return 0; ++ } ++ result = grab_coa(&clust->tc, inode_compression_plugin(inode)); ++ if (result) ++ return result; ++ result = reiser4_inflate_cluster(clust, inode); ++ if (result) ++ return result; ++ tfm_cluster_set_uptodate(&clust->tc); ++ return 0; ++} ++ ++/* read one locked page */ ++int do_readpage_ctail(struct inode * inode, reiser4_cluster_t * clust, ++ struct page *page, znode_lock_mode mode) ++{ ++ int ret; ++ unsigned cloff; ++ char *data; ++ size_t pgcnt; ++ tfm_cluster_t *tc = &clust->tc; ++ ++ assert("edward-212", PageLocked(page)); ++ ++ if (PageUptodate(page)) ++ goto exit; ++ ++ if (!tfm_cluster_is_uptodate(&clust->tc)) { ++ clust->index = pg_to_clust(page->index, inode); ++ unlock_page(page); ++ ret = ctail_read_disk_cluster(clust, inode, mode); ++ lock_page(page); ++ if (ret) ++ return ret; ++ } ++ if (PageUptodate(page)) ++ /* races with another read/write */ ++ goto exit; ++ ++ /* bytes in the page */ ++ pgcnt = cnt_to_pgcnt(i_size_read(inode), page->index); ++ ++ if (pgcnt == 0) { ++ assert("edward-1290", 0); ++ return RETERR(-EINVAL); ++ } ++ assert("edward-119", tfm_cluster_is_uptodate(tc)); ++ ++ switch (clust->dstat) { ++ case UNPR_DISK_CLUSTER: ++ assert("edward-1285", 0); ++#if REISER4_DEBUG ++ warning("edward-1168", ++ "page %lu is not uptodate and disk cluster %lu (inode %llu) is unprepped\n", ++ page->index, clust->index, ++ (unsigned long long)get_inode_oid(inode)); ++#endif ++ case FAKE_DISK_CLUSTER: ++ /* fill the page by zeroes */ ++ data = kmap_atomic(page, KM_USER0); ++ ++ memset(data, 0, PAGE_CACHE_SIZE); ++ flush_dcache_page(page); ++ kunmap_atomic(data, KM_USER0); ++ SetPageUptodate(page); ++ break; ++ case PREP_DISK_CLUSTER: ++ /* fill the page by transformed data */ ++ assert("edward-1058", !PageUptodate(page)); ++ assert("edward-120", tc->len <= inode_cluster_size(inode)); ++ ++ /* start page offset in the cluster */ ++ cloff = pg_to_off_to_cloff(page->index, inode); ++ ++ data = kmap(page); ++ memcpy(data, tfm_stream_data(tc, OUTPUT_STREAM) + cloff, pgcnt); ++ memset(data + pgcnt, 0, (size_t) PAGE_CACHE_SIZE - pgcnt); ++ flush_dcache_page(page); ++ kunmap(page); ++ SetPageUptodate(page); ++ break; ++ default: ++ impossible("edward-1169", "bad disk cluster state"); ++ } ++ exit: ++ return 0; ++} ++ ++/* plugin->u.item.s.file.readpage */ ++int readpage_ctail(void *vp, struct page *page) ++{ ++ int result; ++ hint_t *hint; ++ reiser4_cluster_t *clust = vp; ++ ++ assert("edward-114", clust != NULL); ++ assert("edward-115", PageLocked(page)); ++ assert("edward-116", !PageUptodate(page)); ++ assert("edward-117", !jprivate(page) && !PagePrivate(page)); ++ assert("edward-118", page->mapping && page->mapping->host); ++ assert("edward-867", !tfm_cluster_is_uptodate(&clust->tc)); ++ ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) { ++ unlock_page(page); ++ return RETERR(-ENOMEM); ++ } ++ clust->hint = hint; ++ result = load_file_hint(clust->file, hint); ++ if (result) { ++ kfree(hint); ++ unlock_page(page); ++ return result; ++ } ++ assert("vs-25", hint->ext_coord.lh == &hint->lh); ++ result = do_readpage_ctail(page->mapping->host, clust, page, ++ ZNODE_READ_LOCK); ++ ++ assert("edward-213", PageLocked(page)); ++ assert("edward-1163", ergo(!result, PageUptodate(page))); ++ assert("edward-868", ++ ergo(!result, tfm_cluster_is_uptodate(&clust->tc))); ++ ++ unlock_page(page); ++ done_lh(&hint->lh); ++ hint->ext_coord.valid = 0; ++ save_file_hint(clust->file, hint); ++ kfree(hint); ++ tfm_cluster_clr_uptodate(&clust->tc); ++ ++ return result; ++} ++ ++/* Helper function for ->readpages() */ ++static int ++ctail_read_page_cluster(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ int i; ++ int result; ++ assert("edward-779", clust != NULL); ++ assert("edward-1059", clust->win == NULL); ++ assert("edward-780", inode != NULL); ++ ++ result = prepare_page_cluster(inode, clust, 0 /* do not capture */ ); ++ if (result) ++ return result; ++ result = ctail_read_disk_cluster(clust, inode, ZNODE_READ_LOCK); ++ if (result) ++ goto out; ++ /* at this point stream with valid plain text is attached */ ++ assert("edward-781", tfm_cluster_is_uptodate(&clust->tc)); ++ ++ for (i = 0; i < clust->nr_pages; i++) { ++ struct page *page = clust->pages[i]; ++ lock_page(page); ++ result = do_readpage_ctail(inode, clust, page, ZNODE_READ_LOCK); ++ unlock_page(page); ++ if (result) ++ break; ++ } ++ tfm_cluster_clr_uptodate(&clust->tc); ++ out: ++ reiser4_release_cluster_pages(clust); ++ return result; ++} ++ ++/* filler for read_cache_pages() */ ++static int ctail_readpages_filler(void * data, struct page * page) ++{ ++ int ret = 0; ++ reiser4_cluster_t * clust = data; ++ struct inode * inode = clust->file->f_dentry->d_inode; ++ ++ if (PageUptodate(page)) { ++ unlock_page(page); ++ return 0; ++ } ++ unlock_page(page); ++ move_cluster_forward(clust, inode, page->index); ++ ret = ctail_read_page_cluster(clust, inode); ++ if (ret) ++ return ret; ++ assert("edward-869", !tfm_cluster_is_uptodate(&clust->tc)); ++ ++ lock_page(page); ++ ret = do_readpage_ctail(inode, clust, page, ZNODE_READ_LOCK); ++ assert("edward-1061", ergo(!ret, PageUptodate(page))); ++ unlock_page(page); ++ ++ return ret; ++} ++ ++/* We populate a bit more then upper readahead suggests: ++ with each nominated page we read the whole page cluster ++ this page belongs to. */ ++int readpages_ctail(struct file *file, struct address_space *mapping, ++ struct list_head *pages) ++{ ++ int ret = 0; ++ hint_t *hint; ++ reiser4_cluster_t clust; ++ struct inode *inode = mapping->host; ++ ++ assert("edward-1521", inode == file->f_dentry->d_inode); ++ ++ cluster_init_read(&clust, NULL); ++ clust.file = file; ++ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get()); ++ if (hint == NULL) { ++ warning("vs-28", "failed to allocate hint"); ++ ret = RETERR(-ENOMEM); ++ goto exit1; ++ } ++ clust.hint = hint; ++ ret = load_file_hint(clust.file, hint); ++ if (ret) { ++ warning("edward-1522", "failed to load hint"); ++ goto exit2; ++ } ++ assert("vs-26", hint->ext_coord.lh == &hint->lh); ++ ret = alloc_cluster_pgset(&clust, cluster_nrpages(inode)); ++ if (ret) { ++ warning("edward-1523", "failed to alloc pgset"); ++ goto exit3; ++ } ++ ret = read_cache_pages(mapping, pages, ctail_readpages_filler, &clust); ++ ++ assert("edward-870", !tfm_cluster_is_uptodate(&clust.tc)); ++ exit3: ++ done_lh(&hint->lh); ++ save_file_hint(file, hint); ++ hint->ext_coord.valid = 0; ++ exit2: ++ kfree(hint); ++ exit1: ++ put_cluster_handle(&clust); ++ return ret; ++} ++ ++/* ++ plugin->u.item.s.file.append_key ++ key of the first item of the next disk cluster ++*/ ++reiser4_key *append_key_ctail(const coord_t * coord, reiser4_key * key) ++{ ++ assert("edward-1241", item_id_by_coord(coord) == CTAIL_ID); ++ assert("edward-1242", cluster_shift_ok(cluster_shift_by_coord(coord))); ++ ++ item_key_by_coord(coord, key); ++ set_key_offset(key, ++ ((__u64) (clust_by_coord(coord, NULL)) + ++ 1) << cluster_shift_by_coord(coord)); ++ return key; ++} ++ ++static int ++insert_unprepped_ctail(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ int result; ++ char buf[UCTAIL_NR_UNITS]; ++ reiser4_item_data data; ++ reiser4_key key; ++ int shift = (int)UCTAIL_SHIFT; ++ ++ memset(buf, 0, (size_t) UCTAIL_NR_UNITS); ++ result = key_by_inode_cryptcompress(inode, ++ clust_to_off(clust->index, inode), ++ &key); ++ if (result) ++ return result; ++ data.user = 0; ++ data.iplug = item_plugin_by_id(CTAIL_ID); ++ data.arg = &shift; ++ data.length = sizeof(ctail_item_format) + (size_t) UCTAIL_NR_UNITS; ++ data.data = buf; ++ ++ result = insert_by_coord(&clust->hint->ext_coord.coord, ++ &data, &key, clust->hint->ext_coord.lh, 0); ++ return result; ++} ++ ++static int ++insert_cryptcompress_flow(coord_t * coord, lock_handle * lh, flow_t * f, ++ struct inode *inode) ++{ ++ int result; ++ carry_pool *pool; ++ carry_level *lowest_level; ++ reiser4_item_data *data; ++ carry_op *op; ++ int cluster_shift = inode_cluster_shift(inode); ++ ++ pool = ++ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) + ++ sizeof(*data)); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ lowest_level = (carry_level *) (pool + 1); ++ init_carry_level(lowest_level, pool); ++ data = (reiser4_item_data *) (lowest_level + 3); ++ ++ assert("edward-466", coord->between == AFTER_ITEM ++ || coord->between == AFTER_UNIT || coord->between == BEFORE_ITEM ++ || coord->between == EMPTY_NODE ++ || coord->between == BEFORE_UNIT); ++ ++ if (coord->between == AFTER_UNIT) { ++ coord->unit_pos = 0; ++ coord->between = AFTER_ITEM; ++ } ++ op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node, ++ 0 /* operate directly on coord -> node */); ++ if (IS_ERR(op) || (op == NULL)) { ++ done_carry_pool(pool); ++ return RETERR(op ? PTR_ERR(op) : -EIO); ++ } ++ data->user = 0; ++ data->iplug = item_plugin_by_id(CTAIL_ID); ++ data->arg = &cluster_shift; ++ ++ data->length = 0; ++ data->data = NULL; ++ ++ op->u.insert_flow.flags = COPI_DONT_SHIFT_LEFT | COPI_DONT_SHIFT_RIGHT; ++ op->u.insert_flow.insert_point = coord; ++ op->u.insert_flow.flow = f; ++ op->u.insert_flow.data = data; ++ op->u.insert_flow.new_nodes = 0; ++ ++ lowest_level->track_type = CARRY_TRACK_CHANGE; ++ lowest_level->tracked = lh; ++ ++ result = reiser4_carry(lowest_level, NULL); ++ done_carry_pool(pool); ++ ++ return result; ++} ++ ++/* Implementation of CRC_APPEND_ITEM mode of ctail conversion */ ++static int insert_cryptcompress_flow_in_place(coord_t * coord, ++ lock_handle * lh, flow_t * f, ++ struct inode *inode) ++{ ++ int ret; ++ coord_t pos; ++ lock_handle lock; ++ ++ assert("edward-674", f->length <= inode_scaled_cluster_size(inode)); ++ assert("edward-484", coord->between == AT_UNIT ++ || coord->between == AFTER_ITEM); ++ assert("edward-485", item_id_by_coord(coord) == CTAIL_ID); ++ ++ coord_dup(&pos, coord); ++ pos.unit_pos = 0; ++ pos.between = AFTER_ITEM; ++ ++ init_lh(&lock); ++ copy_lh(&lock, lh); ++ ++ ret = insert_cryptcompress_flow(&pos, &lock, f, inode); ++ done_lh(&lock); ++ assert("edward-1347", znode_is_write_locked(lh->node)); ++ assert("edward-1228", !ret); ++ return ret; ++} ++ ++/* Implementation of CRC_OVERWRITE_ITEM mode of ctail conversion */ ++static int overwrite_ctail(coord_t * coord, flow_t * f) ++{ ++ unsigned count; ++ ++ assert("edward-269", f->user == 0); ++ assert("edward-270", f->data != NULL); ++ assert("edward-271", f->length > 0); ++ assert("edward-272", coord_is_existing_unit(coord)); ++ assert("edward-273", coord->unit_pos == 0); ++ assert("edward-274", znode_is_write_locked(coord->node)); ++ assert("edward-275", reiser4_schedulable()); ++ assert("edward-467", item_id_by_coord(coord) == CTAIL_ID); ++ assert("edward-1243", ctail_ok(coord)); ++ ++ count = nr_units_ctail(coord); ++ ++ if (count > f->length) ++ count = f->length; ++ memcpy(first_unit(coord), f->data, count); ++ move_flow_forward(f, count); ++ coord->unit_pos += count; ++ return 0; ++} ++ ++/* Implementation of CRC_CUT_ITEM mode of ctail conversion: ++ cut ctail (part or whole) starting from next unit position */ ++static int cut_ctail(coord_t * coord) ++{ ++ coord_t stop; ++ ++ assert("edward-435", coord->between == AT_UNIT && ++ coord->item_pos < coord_num_items(coord) && ++ coord->unit_pos <= coord_num_units(coord)); ++ ++ if (coord->unit_pos == coord_num_units(coord)) ++ /* nothing to cut */ ++ return 0; ++ coord_dup(&stop, coord); ++ stop.unit_pos = coord_last_unit_pos(coord); ++ ++ return cut_node_content(coord, &stop, NULL, NULL, NULL); ++} ++ ++int ++ctail_insert_unprepped_cluster(reiser4_cluster_t * clust, struct inode *inode) ++{ ++ int result; ++ assert("edward-1244", inode != NULL); ++ assert("edward-1245", clust->hint != NULL); ++ assert("edward-1246", clust->dstat == FAKE_DISK_CLUSTER); ++ assert("edward-1247", clust->reserved == 1); ++ ++ result = get_disk_cluster_locked(clust, inode, ZNODE_WRITE_LOCK); ++ if (cbk_errored(result)) ++ return result; ++ assert("edward-1249", result == CBK_COORD_NOTFOUND); ++ assert("edward-1250", znode_is_write_locked(clust->hint->lh.node)); ++ ++ assert("edward-1295", ++ clust->hint->ext_coord.lh->node == ++ clust->hint->ext_coord.coord.node); ++ ++ coord_set_between_clusters(&clust->hint->ext_coord.coord); ++ ++ result = insert_unprepped_ctail(clust, inode); ++ all_grabbed2free(); ++ ++ assert("edward-1251", !result); ++ assert("edward-1252", cryptcompress_inode_ok(inode)); ++ assert("edward-1253", znode_is_write_locked(clust->hint->lh.node)); ++ assert("edward-1254", ++ reiser4_clustered_blocks(reiser4_get_current_sb())); ++ assert("edward-1255", ++ znode_convertible(clust->hint->ext_coord.coord.node)); ++ ++ return result; ++} ++ ++static int do_convert_ctail(flush_pos_t * pos, cryptcompress_write_mode_t mode) ++{ ++ int result = 0; ++ convert_item_info_t *info; ++ ++ assert("edward-468", pos != NULL); ++ assert("edward-469", pos->sq != NULL); ++ assert("edward-845", item_convert_data(pos) != NULL); ++ ++ info = item_convert_data(pos); ++ assert("edward-679", info->flow.data != NULL); ++ ++ switch (mode) { ++ case CRC_APPEND_ITEM: ++ assert("edward-1229", info->flow.length != 0); ++ assert("edward-1256", ++ cluster_shift_ok(cluster_shift_by_coord(&pos->coord))); ++ result = ++ insert_cryptcompress_flow_in_place(&pos->coord, ++ &pos->lock, ++ &info->flow, ++ info->inode); ++ break; ++ case CRC_OVERWRITE_ITEM: ++ assert("edward-1230", info->flow.length != 0); ++ overwrite_ctail(&pos->coord, &info->flow); ++ if (info->flow.length != 0) ++ break; ++ case CRC_CUT_ITEM: ++ assert("edward-1231", info->flow.length == 0); ++ result = cut_ctail(&pos->coord); ++ break; ++ default: ++ result = RETERR(-EIO); ++ impossible("edward-244", "bad convert mode"); ++ } ++ return result; ++} ++ ++/* plugin->u.item.f.scan */ ++int scan_ctail(flush_scan * scan) ++{ ++ int result = 0; ++ struct page *page; ++ struct inode *inode; ++ jnode *node = scan->node; ++ ++ assert("edward-227", scan->node != NULL); ++ assert("edward-228", jnode_is_cluster_page(scan->node)); ++ assert("edward-639", znode_is_write_locked(scan->parent_lock.node)); ++ ++ page = jnode_page(node); ++ inode = page->mapping->host; ++ ++ if (!reiser4_scanning_left(scan)) ++ return result; ++ if (!ZF_ISSET(scan->parent_lock.node, JNODE_DIRTY)) ++ znode_make_dirty(scan->parent_lock.node); ++ ++ if (!znode_convertible(scan->parent_lock.node)) { ++ if (JF_ISSET(scan->node, JNODE_DIRTY)) ++ znode_set_convertible(scan->parent_lock.node); ++ else { ++ warning("edward-681", ++ "cluster page is already processed"); ++ return -EAGAIN; ++ } ++ } ++ return result; ++} ++ ++/* If true, this function attaches children */ ++static int should_attach_convert_idata(flush_pos_t * pos) ++{ ++ int result; ++ assert("edward-431", pos != NULL); ++ assert("edward-432", pos->child == NULL); ++ assert("edward-619", znode_is_write_locked(pos->coord.node)); ++ assert("edward-470", ++ item_plugin_by_coord(&pos->coord) == ++ item_plugin_by_id(CTAIL_ID)); ++ ++ /* check for leftmost child */ ++ utmost_child_ctail(&pos->coord, LEFT_SIDE, &pos->child); ++ ++ if (!pos->child) ++ return 0; ++ spin_lock_jnode(pos->child); ++ result = (JF_ISSET(pos->child, JNODE_DIRTY) && ++ pos->child->atom == ZJNODE(pos->coord.node)->atom); ++ spin_unlock_jnode(pos->child); ++ if (!result && pos->child) { ++ /* existing child isn't to attach, clear up this one */ ++ jput(pos->child); ++ pos->child = NULL; ++ } ++ return result; ++} ++ ++/* plugin->init_convert_data() */ ++static int ++init_convert_data_ctail(convert_item_info_t * idata, struct inode *inode) ++{ ++ assert("edward-813", idata != NULL); ++ assert("edward-814", inode != NULL); ++ ++ idata->inode = inode; ++ idata->d_cur = DC_FIRST_ITEM; ++ idata->d_next = DC_INVALID_STATE; ++ ++ return 0; ++} ++ ++static int alloc_item_convert_data(convert_info_t * sq) ++{ ++ assert("edward-816", sq != NULL); ++ assert("edward-817", sq->itm == NULL); ++ ++ sq->itm = kmalloc(sizeof(*sq->itm), reiser4_ctx_gfp_mask_get()); ++ if (sq->itm == NULL) ++ return RETERR(-ENOMEM); ++ return 0; ++} ++ ++static void free_item_convert_data(convert_info_t * sq) ++{ ++ assert("edward-818", sq != NULL); ++ assert("edward-819", sq->itm != NULL); ++ assert("edward-820", sq->iplug != NULL); ++ ++ kfree(sq->itm); ++ sq->itm = NULL; ++ return; ++} ++ ++static int alloc_convert_data(flush_pos_t * pos) ++{ ++ assert("edward-821", pos != NULL); ++ assert("edward-822", pos->sq == NULL); ++ ++ pos->sq = kmalloc(sizeof(*pos->sq), reiser4_ctx_gfp_mask_get()); ++ if (!pos->sq) ++ return RETERR(-ENOMEM); ++ memset(pos->sq, 0, sizeof(*pos->sq)); ++ cluster_init_write(&pos->sq->clust, NULL); ++ return 0; ++} ++ ++void free_convert_data(flush_pos_t * pos) ++{ ++ convert_info_t *sq; ++ ++ assert("edward-823", pos != NULL); ++ assert("edward-824", pos->sq != NULL); ++ ++ sq = pos->sq; ++ if (sq->itm) ++ free_item_convert_data(sq); ++ put_cluster_handle(&sq->clust); ++ kfree(pos->sq); ++ pos->sq = NULL; ++ return; ++} ++ ++static int init_item_convert_data(flush_pos_t * pos, struct inode *inode) ++{ ++ convert_info_t *sq; ++ ++ assert("edward-825", pos != NULL); ++ assert("edward-826", pos->sq != NULL); ++ assert("edward-827", item_convert_data(pos) != NULL); ++ assert("edward-828", inode != NULL); ++ ++ sq = pos->sq; ++ ++ memset(sq->itm, 0, sizeof(*sq->itm)); ++ ++ /* iplug->init_convert_data() */ ++ return init_convert_data_ctail(sq->itm, inode); ++} ++ ++/* create and attach disk cluster info used by 'convert' phase of the flush ++ squalloc() */ ++static int attach_convert_idata(flush_pos_t * pos, struct inode *inode) ++{ ++ int ret = 0; ++ convert_item_info_t *info; ++ reiser4_cluster_t *clust; ++ file_plugin *fplug = inode_file_plugin(inode); ++ compression_plugin *cplug = inode_compression_plugin(inode); ++ ++ assert("edward-248", pos != NULL); ++ assert("edward-249", pos->child != NULL); ++ assert("edward-251", inode != NULL); ++ assert("edward-682", cryptcompress_inode_ok(inode)); ++ assert("edward-252", ++ fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); ++ assert("edward-473", ++ item_plugin_by_coord(&pos->coord) == ++ item_plugin_by_id(CTAIL_ID)); ++ ++ if (!pos->sq) { ++ ret = alloc_convert_data(pos); ++ if (ret) ++ return ret; ++ } ++ clust = &pos->sq->clust; ++ ret = grab_coa(&clust->tc, cplug); ++ if (ret) ++ goto err; ++ ret = set_cluster_by_page(clust, ++ jnode_page(pos->child), ++ MAX_CLUSTER_NRPAGES); ++ if (ret) ++ goto err; ++ ++ assert("edward-829", pos->sq != NULL); ++ assert("edward-250", item_convert_data(pos) == NULL); ++ ++ pos->sq->iplug = item_plugin_by_id(CTAIL_ID); ++ ++ ret = alloc_item_convert_data(pos->sq); ++ if (ret) ++ goto err; ++ ret = init_item_convert_data(pos, inode); ++ if (ret) ++ goto err; ++ info = item_convert_data(pos); ++ ++ ret = flush_cluster_pages(clust, pos->child, inode); ++ if (ret) ++ goto err; ++ ++ reiser4_deflate_cluster(clust, inode); ++ inc_item_convert_count(pos); ++ ++ /* make flow by transformed stream */ ++ fplug->flow_by_inode(info->inode, ++ (const char __user *)tfm_stream_data(&clust->tc, OUTPUT_STREAM), ++ 0 /* kernel space */ , ++ clust->tc.len, ++ clust_to_off(clust->index, inode), ++ WRITE_OP, &info->flow); ++ jput(pos->child); ++ ++ assert("edward-683", cryptcompress_inode_ok(inode)); ++ return 0; ++ err: ++ jput(pos->child); ++ free_convert_data(pos); ++ return ret; ++} ++ ++/* clear up disk cluster info */ ++static void detach_convert_idata(convert_info_t * sq) ++{ ++ convert_item_info_t *info; ++ ++ assert("edward-253", sq != NULL); ++ assert("edward-840", sq->itm != NULL); ++ ++ info = sq->itm; ++ assert("edward-255", info->inode != NULL); ++ assert("edward-1212", info->flow.length == 0); ++ ++ free_item_convert_data(sq); ++ return; ++} ++ ++/* plugin->u.item.f.utmost_child */ ++ ++/* This function sets leftmost child for a first cluster item, ++ if the child exists, and NULL in other cases. ++ NOTE-EDWARD: Do not call this for RIGHT_SIDE */ ++ ++int utmost_child_ctail(const coord_t * coord, sideof side, jnode ** child) ++{ ++ reiser4_key key; ++ ++ item_key_by_coord(coord, &key); ++ ++ assert("edward-257", coord != NULL); ++ assert("edward-258", child != NULL); ++ assert("edward-259", side == LEFT_SIDE); ++ assert("edward-260", ++ item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID)); ++ ++ if (!is_disk_cluster_key(&key, coord)) ++ *child = NULL; ++ else ++ *child = jlookup(current_tree, ++ get_key_objectid(item_key_by_coord ++ (coord, &key)), ++ off_to_pg(get_key_offset(&key))); ++ return 0; ++} ++ ++/* Returns true if @p2 is the next item to @p1 ++ in the _same_ disk cluster. ++ Disk cluster is a set of items. If ->clustered() != NULL, ++ with each item the whole disk cluster should be read/modified ++*/ ++static int clustered_ctail(const coord_t * p1, const coord_t * p2) ++{ ++ return mergeable_ctail(p1, p2); ++} ++ ++/* Go rightward and check for next disk cluster item, set ++ d_next to DC_CHAINED_ITEM, if the last one exists. ++ If the current position is last item, go to right neighbor. ++ Skip empty nodes. Note, that right neighbors may be not in ++ the slum because of races. If so, make it dirty and ++ convertible. ++*/ ++static int next_item_dc_stat(flush_pos_t * pos) ++{ ++ int ret = 0; ++ int stop = 0; ++ znode *cur; ++ coord_t coord; ++ lock_handle lh; ++ lock_handle right_lock; ++ ++ assert("edward-1232", !node_is_empty(pos->coord.node)); ++ assert("edward-1014", ++ pos->coord.item_pos < coord_num_items(&pos->coord)); ++ assert("edward-1015", chaining_data_present(pos)); ++ assert("edward-1017", ++ item_convert_data(pos)->d_next == DC_INVALID_STATE); ++ ++ item_convert_data(pos)->d_next = DC_AFTER_CLUSTER; ++ ++ if (item_convert_data(pos)->d_cur == DC_AFTER_CLUSTER) ++ return ret; ++ if (pos->coord.item_pos < coord_num_items(&pos->coord) - 1) ++ return ret; ++ ++ /* check next slum item */ ++ init_lh(&right_lock); ++ cur = pos->coord.node; ++ ++ while (!stop) { ++ init_lh(&lh); ++ ret = reiser4_get_right_neighbor(&lh, ++ cur, ++ ZNODE_WRITE_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ if (ret) ++ break; ++ ret = zload(lh.node); ++ if (ret) { ++ done_lh(&lh); ++ break; ++ } ++ coord_init_before_first_item(&coord, lh.node); ++ ++ if (node_is_empty(lh.node)) { ++ znode_make_dirty(lh.node); ++ znode_set_convertible(lh.node); ++ stop = 0; ++ } else if (clustered_ctail(&pos->coord, &coord)) { ++ ++ item_convert_data(pos)->d_next = DC_CHAINED_ITEM; ++ ++ if (!ZF_ISSET(lh.node, JNODE_DIRTY)) { ++ /* ++ warning("edward-1024", ++ "next slum item mergeable, " ++ "but znode %p isn't dirty\n", ++ lh.node); ++ */ ++ znode_make_dirty(lh.node); ++ } ++ if (!znode_convertible(lh.node)) { ++ /* ++ warning("edward-1272", ++ "next slum item mergeable, " ++ "but znode %p isn't convertible\n", ++ lh.node); ++ */ ++ znode_set_convertible(lh.node); ++ } ++ stop = 1; ++ } else ++ stop = 1; ++ zrelse(lh.node); ++ done_lh(&right_lock); ++ copy_lh(&right_lock, &lh); ++ done_lh(&lh); ++ cur = right_lock.node; ++ } ++ done_lh(&right_lock); ++ ++ if (ret == -E_NO_NEIGHBOR) ++ ret = 0; ++ return ret; ++} ++ ++static int ++assign_convert_mode(convert_item_info_t * idata, ++ cryptcompress_write_mode_t * mode) ++{ ++ int result = 0; ++ ++ assert("edward-1025", idata != NULL); ++ ++ if (idata->flow.length) { ++ /* append or overwrite */ ++ switch (idata->d_cur) { ++ case DC_FIRST_ITEM: ++ case DC_CHAINED_ITEM: ++ *mode = CRC_OVERWRITE_ITEM; ++ break; ++ case DC_AFTER_CLUSTER: ++ *mode = CRC_APPEND_ITEM; ++ break; ++ default: ++ impossible("edward-1018", "wrong current item state"); ++ } ++ } else { ++ /* cut or invalidate */ ++ switch (idata->d_cur) { ++ case DC_FIRST_ITEM: ++ case DC_CHAINED_ITEM: ++ *mode = CRC_CUT_ITEM; ++ break; ++ case DC_AFTER_CLUSTER: ++ result = 1; ++ break; ++ default: ++ impossible("edward-1019", "wrong current item state"); ++ } ++ } ++ return result; ++} ++ ++/* plugin->u.item.f.convert */ ++/* write ctail in guessed mode */ ++int convert_ctail(flush_pos_t * pos) ++{ ++ int result; ++ int nr_items; ++ cryptcompress_write_mode_t mode = CRC_OVERWRITE_ITEM; ++ ++ assert("edward-1020", pos != NULL); ++ assert("edward-1213", coord_num_items(&pos->coord) != 0); ++ assert("edward-1257", item_id_by_coord(&pos->coord) == CTAIL_ID); ++ assert("edward-1258", ctail_ok(&pos->coord)); ++ assert("edward-261", pos->coord.node != NULL); ++ ++ nr_items = coord_num_items(&pos->coord); ++ if (!chaining_data_present(pos)) { ++ if (should_attach_convert_idata(pos)) { ++ /* attach convert item info */ ++ struct inode *inode; ++ ++ assert("edward-264", pos->child != NULL); ++ assert("edward-265", jnode_page(pos->child) != NULL); ++ assert("edward-266", ++ jnode_page(pos->child)->mapping != NULL); ++ ++ inode = jnode_page(pos->child)->mapping->host; ++ ++ assert("edward-267", inode != NULL); ++ ++ /* attach item convert info by child and put the last one */ ++ result = attach_convert_idata(pos, inode); ++ pos->child = NULL; ++ if (result == -E_REPEAT) { ++ /* jnode became clean, or there is no dirty ++ pages (nothing to update in disk cluster) */ ++ warning("edward-1021", ++ "convert_ctail: nothing to attach"); ++ return 0; ++ } ++ if (result != 0) ++ return result; ++ } else ++ /* unconvertible */ ++ return 0; ++ } else { ++ /* use old convert info */ ++ ++ convert_item_info_t *idata; ++ ++ idata = item_convert_data(pos); ++ ++ result = assign_convert_mode(idata, &mode); ++ if (result) { ++ /* disk cluster is over, ++ nothing to update anymore */ ++ detach_convert_idata(pos->sq); ++ return 0; ++ } ++ } ++ ++ assert("edward-433", chaining_data_present(pos)); ++ assert("edward-1022", ++ pos->coord.item_pos < coord_num_items(&pos->coord)); ++ ++ result = next_item_dc_stat(pos); ++ if (result) { ++ detach_convert_idata(pos->sq); ++ return result; ++ } ++ result = do_convert_ctail(pos, mode); ++ if (result) { ++ detach_convert_idata(pos->sq); ++ return result; ++ } ++ switch (mode) { ++ case CRC_CUT_ITEM: ++ assert("edward-1214", item_convert_data(pos)->flow.length == 0); ++ assert("edward-1215", ++ coord_num_items(&pos->coord) == nr_items || ++ coord_num_items(&pos->coord) == nr_items - 1); ++ if (item_convert_data(pos)->d_next == DC_CHAINED_ITEM) ++ break; ++ if (coord_num_items(&pos->coord) != nr_items) { ++ /* the item was killed, no more chained items */ ++ detach_convert_idata(pos->sq); ++ if (!node_is_empty(pos->coord.node)) ++ /* make sure the next item will be scanned */ ++ coord_init_before_item(&pos->coord); ++ break; ++ } ++ case CRC_APPEND_ITEM: ++ assert("edward-434", item_convert_data(pos)->flow.length == 0); ++ detach_convert_idata(pos->sq); ++ break; ++ case CRC_OVERWRITE_ITEM: ++ if (coord_is_unprepped_ctail(&pos->coord)) { ++ /* convert unpprepped ctail to prepped one */ ++ int shift; ++ shift = ++ inode_cluster_shift(item_convert_data(pos)->inode); ++ assert("edward-1259", cluster_shift_ok(shift)); ++ put_unaligned((d8)shift, ++ &ctail_formatted_at(&pos->coord)-> ++ cluster_shift); ++ } ++ break; ++ } ++ return result; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/ctail.h b/fs/reiser4/plugin/item/ctail.h +new file mode 100644 +index 0000000..ead4418 +--- /dev/null ++++ b/fs/reiser4/plugin/item/ctail.h +@@ -0,0 +1,97 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#if !defined( __FS_REISER4_CTAIL_H__ ) ++#define __FS_REISER4_CTAIL_H__ ++ ++/* Disk format of ctail item */ ++typedef struct ctail_item_format { ++ /* packed shift; size of (prepped) disk cluster ++ is calculated as (1 << cluster_shift) */ ++ d8 cluster_shift; ++ /* ctail body */ ++ d8 body[0]; ++} __attribute__ ((packed)) ctail_item_format; ++ ++/* Unprepped disk cluster is represented by a single ctail item ++ with the following "magic" attributes: */ ++/* "magic" cluster_shift */ ++#define UCTAIL_SHIFT 0xff ++/* How many units unprepped ctail item has */ ++#define UCTAIL_NR_UNITS 1 ++ ++/* The following is a set of various item states in a disk cluster. ++ Disk cluster is a set of items whose keys belong to the interval ++ [dc_key , dc_key + disk_cluster_size - 1] */ ++typedef enum { ++ DC_INVALID_STATE = 0, ++ DC_FIRST_ITEM = 1, ++ DC_CHAINED_ITEM = 2, ++ DC_AFTER_CLUSTER = 3 ++} dc_item_stat; ++ ++/* ctail-specific extension. ++ In particular this describes parameters of disk cluster an item belongs to */ ++typedef struct { ++ int shift; /* this contains cluster_shift extracted from ++ ctail_item_format (above), or UCTAIL_SHIFT ++ (the last one is the "magic" of unprepped disk clusters)*/ ++ int dsize; /* size of a prepped disk cluster */ ++ int ncount; /* count of nodes occupied by a disk cluster */ ++} ctail_coord_extension_t; ++ ++struct cut_list; ++ ++/* plugin->item.b.* */ ++int can_contain_key_ctail(const coord_t *, const reiser4_key *, ++ const reiser4_item_data *); ++int mergeable_ctail(const coord_t * p1, const coord_t * p2); ++pos_in_node_t nr_units_ctail(const coord_t * coord); ++int estimate_ctail(const coord_t * coord, const reiser4_item_data * data); ++void print_ctail(const char *prefix, coord_t * coord); ++lookup_result lookup_ctail(const reiser4_key *, lookup_bias, coord_t *); ++ ++int paste_ctail(coord_t * coord, reiser4_item_data * data, ++ carry_plugin_info * info UNUSED_ARG); ++int init_ctail(coord_t *, coord_t *, reiser4_item_data *); ++int can_shift_ctail(unsigned free_space, coord_t * coord, ++ znode * target, shift_direction pend, unsigned *size, ++ unsigned want); ++void copy_units_ctail(coord_t * target, coord_t * source, unsigned from, ++ unsigned count, shift_direction where_is_free_space, ++ unsigned free_space); ++int cut_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ carry_cut_data *, reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++int kill_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ carry_kill_data *, reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++int ctail_ok(const coord_t * coord); ++int check_ctail(const coord_t * coord, const char **error); ++ ++/* plugin->u.item.s.* */ ++int read_ctail(struct file *, flow_t *, hint_t *); ++int readpage_ctail(void *, struct page *); ++int readpages_ctail(struct file *, struct address_space *, struct list_head *); ++reiser4_key *append_key_ctail(const coord_t *, reiser4_key *); ++int create_hook_ctail(const coord_t * coord, void *arg); ++int kill_hook_ctail(const coord_t *, pos_in_node_t, pos_in_node_t, ++ carry_kill_data *); ++int shift_hook_ctail(const coord_t *, unsigned, unsigned, znode *); ++ ++/* plugin->u.item.f */ ++int utmost_child_ctail(const coord_t *, sideof, jnode **); ++int scan_ctail(flush_scan *); ++int convert_ctail(flush_pos_t *); ++size_t inode_scaled_cluster_size(struct inode *); ++ ++#endif /* __FS_REISER4_CTAIL_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/extent.c b/fs/reiser4/plugin/item/extent.c +new file mode 100644 +index 0000000..e35a4d5 +--- /dev/null ++++ b/fs/reiser4/plugin/item/extent.c +@@ -0,0 +1,197 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "item.h" ++#include "../../key.h" ++#include "../../super.h" ++#include "../../carry.h" ++#include "../../inode.h" ++#include "../../page_cache.h" ++#include "../../flush.h" ++#include "../object.h" ++ ++/* prepare structure reiser4_item_data. It is used to put one extent unit into tree */ ++/* Audited by: green(2002.06.13) */ ++reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit, ++ int nr_extents) ++{ ++ data->data = ext_unit; ++ /* data->data is kernel space */ ++ data->user = 0; ++ data->length = sizeof(reiser4_extent) * nr_extents; ++ data->arg = NULL; ++ data->iplug = item_plugin_by_id(EXTENT_POINTER_ID); ++ return data; ++} ++ ++/* how many bytes are addressed by @nr first extents of the extent item */ ++reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr) ++{ ++ pos_in_node_t i; ++ reiser4_block_nr blocks; ++ reiser4_extent *ext; ++ ++ ext = item_body_by_coord(coord); ++ assert("vs-263", nr <= nr_units_extent(coord)); ++ ++ blocks = 0; ++ for (i = 0; i < nr; i++, ext++) { ++ blocks += extent_get_width(ext); ++ } ++ ++ return blocks * current_blocksize; ++} ++ ++extent_state state_of_extent(reiser4_extent * ext) ++{ ++ switch ((int)extent_get_start(ext)) { ++ case 0: ++ return HOLE_EXTENT; ++ case 1: ++ return UNALLOCATED_EXTENT; ++ default: ++ break; ++ } ++ return ALLOCATED_EXTENT; ++} ++ ++int extent_is_unallocated(const coord_t * item) ++{ ++ assert("jmacd-5133", item_is_extent(item)); ++ ++ return state_of_extent(extent_by_coord(item)) == UNALLOCATED_EXTENT; ++} ++ ++/* set extent's start and width */ ++void reiser4_set_extent(reiser4_extent * ext, reiser4_block_nr start, ++ reiser4_block_nr width) ++{ ++ extent_set_start(ext, start); ++ extent_set_width(ext, width); ++} ++ ++/** ++ * reiser4_replace_extent - replace extent and paste 1 or 2 after it ++ * @un_extent: coordinate of extent to be overwritten ++ * @lh: need better comment ++ * @key: need better comment ++ * @exts_to_add: data prepared for insertion into tree ++ * @replace: need better comment ++ * @flags: need better comment ++ * @return_insert_position: need better comment ++ * ++ * Overwrites one extent, pastes 1 or 2 more ones after overwritten one. If ++ * @return_inserted_position is 1 - @un_extent and @lh are returned set to ++ * first of newly inserted units, if it is 0 - @un_extent and @lh are returned ++ * set to extent which was overwritten. ++ */ ++int reiser4_replace_extent(struct replace_handle *h, ++ int return_inserted_position) ++{ ++ int result; ++ znode *orig_znode; ++ /*ON_DEBUG(reiser4_extent orig_ext);*/ /* this is for debugging */ ++ ++ assert("vs-990", coord_is_existing_unit(h->coord)); ++ assert("vs-1375", znode_is_write_locked(h->coord->node)); ++ assert("vs-1426", extent_get_width(&h->overwrite) != 0); ++ assert("vs-1427", extent_get_width(&h->new_extents[0]) != 0); ++ assert("vs-1427", ergo(h->nr_new_extents == 2, ++ extent_get_width(&h->new_extents[1]) != 0)); ++ ++ /* compose structure for paste */ ++ init_new_extent(&h->item, &h->new_extents[0], h->nr_new_extents); ++ ++ coord_dup(&h->coord_after, h->coord); ++ init_lh(&h->lh_after); ++ copy_lh(&h->lh_after, h->lh); ++ reiser4_tap_init(&h->watch, &h->coord_after, &h->lh_after, ZNODE_WRITE_LOCK); ++ reiser4_tap_monitor(&h->watch); ++ ++ ON_DEBUG(h->orig_ext = *extent_by_coord(h->coord)); ++ orig_znode = h->coord->node; ++ ++#if REISER4_DEBUG ++ /* make sure that key is set properly */ ++ unit_key_by_coord(h->coord, &h->tmp); ++ set_key_offset(&h->tmp, ++ get_key_offset(&h->tmp) + ++ extent_get_width(&h->overwrite) * current_blocksize); ++ assert("vs-1080", keyeq(&h->tmp, &h->paste_key)); ++#endif ++ ++ /* set insert point after unit to be replaced */ ++ h->coord->between = AFTER_UNIT; ++ ++ result = insert_into_item(h->coord, return_inserted_position ? h->lh : NULL, ++ &h->paste_key, &h->item, h->flags); ++ if (!result) { ++ /* now we have to replace the unit after which new units were ++ inserted. Its position is tracked by @watch */ ++ reiser4_extent *ext; ++ znode *node; ++ ++ node = h->coord_after.node; ++ if (node != orig_znode) { ++ coord_clear_iplug(&h->coord_after); ++ result = zload(node); ++ } ++ ++ if (likely(!result)) { ++ ext = extent_by_coord(&h->coord_after); ++ ++ assert("vs-987", znode_is_loaded(node)); ++ assert("vs-988", !memcmp(ext, &h->orig_ext, sizeof(*ext))); ++ ++ /* overwrite extent unit */ ++ memcpy(ext, &h->overwrite, sizeof(reiser4_extent)); ++ znode_make_dirty(node); ++ ++ if (node != orig_znode) ++ zrelse(node); ++ ++ if (return_inserted_position == 0) { ++ /* coord and lh are to be set to overwritten ++ extent */ ++ assert("vs-1662", ++ WITH_DATA(node, !memcmp(&h->overwrite, ++ extent_by_coord( ++ &h->coord_after), ++ sizeof(reiser4_extent)))); ++ ++ *h->coord = h->coord_after; ++ done_lh(h->lh); ++ copy_lh(h->lh, &h->lh_after); ++ } else { ++ /* h->coord and h->lh are to be set to first of ++ inserted units */ ++ assert("vs-1663", ++ WITH_DATA(h->coord->node, ++ !memcmp(&h->new_extents[0], ++ extent_by_coord(h->coord), ++ sizeof(reiser4_extent)))); ++ assert("vs-1664", h->lh->node == h->coord->node); ++ } ++ } ++ } ++ reiser4_tap_done(&h->watch); ++ ++ return result; ++} ++ ++lock_handle *znode_lh(znode *node) ++{ ++ assert("vs-1371", znode_is_write_locked(node)); ++ assert("vs-1372", znode_is_wlocked_once(node)); ++ return list_entry(node->lock.owners.next, lock_handle, owners_link); ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/item/extent.h b/fs/reiser4/plugin/item/extent.h +new file mode 100644 +index 0000000..d817d1b +--- /dev/null ++++ b/fs/reiser4/plugin/item/extent.h +@@ -0,0 +1,231 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#ifndef __REISER4_EXTENT_H__ ++#define __REISER4_EXTENT_H__ ++ ++/* on disk extent */ ++typedef struct { ++ reiser4_dblock_nr start; ++ reiser4_dblock_nr width; ++} reiser4_extent; ++ ++typedef struct extent_stat { ++ int unallocated_units; ++ int unallocated_blocks; ++ int allocated_units; ++ int allocated_blocks; ++ int hole_units; ++ int hole_blocks; ++} extent_stat; ++ ++/* extents in an extent item can be either holes, or unallocated or allocated ++ extents */ ++typedef enum { ++ HOLE_EXTENT, ++ UNALLOCATED_EXTENT, ++ ALLOCATED_EXTENT ++} extent_state; ++ ++#define HOLE_EXTENT_START 0 ++#define UNALLOCATED_EXTENT_START 1 ++#define UNALLOCATED_EXTENT_START2 2 ++ ++typedef struct { ++ reiser4_block_nr pos_in_unit; ++ reiser4_block_nr width; /* width of current unit */ ++ pos_in_node_t nr_units; /* number of units */ ++ int ext_offset; /* offset from the beginning of zdata() */ ++ unsigned long expected_page; ++#if REISER4_DEBUG ++ reiser4_extent extent; ++#endif ++} extent_coord_extension_t; ++ ++/* macros to set/get fields of on-disk extent */ ++static inline reiser4_block_nr extent_get_start(const reiser4_extent * ext) ++{ ++ return le64_to_cpu(ext->start); ++} ++ ++static inline reiser4_block_nr extent_get_width(const reiser4_extent * ext) ++{ ++ return le64_to_cpu(ext->width); ++} ++ ++extern __u64 reiser4_current_block_count(void); ++ ++static inline void ++extent_set_start(reiser4_extent * ext, reiser4_block_nr start) ++{ ++ cassert(sizeof(ext->start) == 8); ++ assert("nikita-2510", ++ ergo(start > 1, start < reiser4_current_block_count())); ++ put_unaligned(cpu_to_le64(start), &ext->start); ++} ++ ++static inline void ++extent_set_width(reiser4_extent * ext, reiser4_block_nr width) ++{ ++ cassert(sizeof(ext->width) == 8); ++ assert("", width > 0); ++ put_unaligned(cpu_to_le64(width), &ext->width); ++ assert("nikita-2511", ++ ergo(extent_get_start(ext) > 1, ++ extent_get_start(ext) + width <= ++ reiser4_current_block_count())); ++} ++ ++#define extent_item(coord) \ ++({ \ ++ assert("nikita-3143", item_is_extent(coord)); \ ++ ((reiser4_extent *)item_body_by_coord (coord)); \ ++}) ++ ++#define extent_by_coord(coord) \ ++({ \ ++ assert("nikita-3144", item_is_extent(coord)); \ ++ (extent_item (coord) + (coord)->unit_pos); \ ++}) ++ ++#define width_by_coord(coord) \ ++({ \ ++ assert("nikita-3145", item_is_extent(coord)); \ ++ extent_get_width (extent_by_coord(coord)); \ ++}) ++ ++struct carry_cut_data; ++struct carry_kill_data; ++ ++/* plugin->u.item.b.* */ ++reiser4_key *max_key_inside_extent(const coord_t *, reiser4_key *); ++int can_contain_key_extent(const coord_t * coord, const reiser4_key * key, ++ const reiser4_item_data *); ++int mergeable_extent(const coord_t * p1, const coord_t * p2); ++pos_in_node_t nr_units_extent(const coord_t *); ++lookup_result lookup_extent(const reiser4_key *, lookup_bias, coord_t *); ++void init_coord_extent(coord_t *); ++int init_extent(coord_t *, reiser4_item_data *); ++int paste_extent(coord_t *, reiser4_item_data *, carry_plugin_info *); ++int can_shift_extent(unsigned free_space, ++ coord_t * source, znode * target, shift_direction, ++ unsigned *size, unsigned want); ++void copy_units_extent(coord_t * target, coord_t * source, unsigned from, ++ unsigned count, shift_direction where_is_free_space, ++ unsigned free_space); ++int kill_hook_extent(const coord_t *, pos_in_node_t from, pos_in_node_t count, ++ struct carry_kill_data *); ++int create_hook_extent(const coord_t * coord, void *arg); ++int cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ struct carry_cut_data *, reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++int kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ struct carry_kill_data *, reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++reiser4_key *unit_key_extent(const coord_t *, reiser4_key *); ++reiser4_key *max_unit_key_extent(const coord_t *, reiser4_key *); ++void print_extent(const char *, coord_t *); ++int utmost_child_extent(const coord_t * coord, sideof side, jnode ** child); ++int utmost_child_real_block_extent(const coord_t * coord, sideof side, ++ reiser4_block_nr * block); ++void item_stat_extent(const coord_t * coord, void *vp); ++int reiser4_check_extent(const coord_t * coord, const char **error); ++ ++/* plugin->u.item.s.file.* */ ++ssize_t reiser4_write_extent(struct file *, const char __user *, ++ size_t, loff_t *); ++int reiser4_read_extent(struct file *, flow_t *, hint_t *); ++int reiser4_readpage_extent(void *, struct page *); ++int reiser4_do_readpage_extent(reiser4_extent*, reiser4_block_nr, struct page*); ++reiser4_key *append_key_extent(const coord_t *, reiser4_key *); ++void init_coord_extension_extent(uf_coord_t *, loff_t offset); ++int get_block_address_extent(const coord_t *, sector_t block, ++ sector_t * result); ++ ++/* these are used in flush.c ++ FIXME-VS: should they be somewhere in item_plugin? */ ++int allocate_extent_item_in_place(coord_t *, lock_handle *, flush_pos_t * pos); ++int allocate_and_copy_extent(znode * left, coord_t * right, flush_pos_t * pos, ++ reiser4_key * stop_key); ++ ++int extent_is_unallocated(const coord_t * item); /* True if this extent is unallocated (i.e., not a hole, not allocated). */ ++__u64 extent_unit_index(const coord_t * item); /* Block offset of this unit. */ ++__u64 extent_unit_width(const coord_t * item); /* Number of blocks in this unit. */ ++ ++/* plugin->u.item.f. */ ++int reiser4_scan_extent(flush_scan * scan); ++extern int key_by_offset_extent(struct inode *, loff_t, reiser4_key *); ++ ++reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit, ++ int nr_extents); ++reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr); ++extent_state state_of_extent(reiser4_extent * ext); ++void reiser4_set_extent(reiser4_extent *, reiser4_block_nr start, ++ reiser4_block_nr width); ++int reiser4_update_extent(struct inode *, jnode *, loff_t pos, ++ int *plugged_hole); ++ ++#include "../../coord.h" ++#include "../../lock.h" ++#include "../../tap.h" ++ ++struct replace_handle { ++ /* these are to be set before calling reiser4_replace_extent */ ++ coord_t *coord; ++ lock_handle *lh; ++ reiser4_key key; ++ reiser4_key *pkey; ++ reiser4_extent overwrite; ++ reiser4_extent new_extents[2]; ++ int nr_new_extents; ++ unsigned flags; ++ ++ /* these are used by reiser4_replace_extent */ ++ reiser4_item_data item; ++ coord_t coord_after; ++ lock_handle lh_after; ++ tap_t watch; ++ reiser4_key paste_key; ++#if REISER4_DEBUG ++ reiser4_extent orig_ext; ++ reiser4_key tmp; ++#endif ++}; ++ ++/* this structure is kmalloced before calling make_extent to avoid excessive ++ stack consumption on plug_hole->reiser4_replace_extent */ ++struct make_extent_handle { ++ uf_coord_t *uf_coord; ++ reiser4_block_nr blocknr; ++ int created; ++ struct inode *inode; ++ union { ++ struct { ++ } append; ++ struct replace_handle replace; ++ } u; ++}; ++ ++int reiser4_replace_extent(struct replace_handle *, ++ int return_inserted_position); ++lock_handle *znode_lh(znode *); ++ ++/* the reiser4 repacker support */ ++struct repacker_cursor; ++extern int process_extent_backward_for_repacking(tap_t *, ++ struct repacker_cursor *); ++extern int mark_extent_for_repacking(tap_t *, int); ++ ++#define coord_by_uf_coord(uf_coord) (&((uf_coord)->coord)) ++#define ext_coord_by_uf_coord(uf_coord) (&((uf_coord)->extension.extent)) ++ ++/* __REISER4_EXTENT_H__ */ ++#endif ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/extent_file_ops.c b/fs/reiser4/plugin/item/extent_file_ops.c +new file mode 100644 +index 0000000..cf337c4 +--- /dev/null ++++ b/fs/reiser4/plugin/item/extent_file_ops.c +@@ -0,0 +1,1435 @@ ++/* COPYRIGHT 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "item.h" ++#include "../../inode.h" ++#include "../../page_cache.h" ++#include "../object.h" ++ ++#include ++#include ++#include "../../../../mm/filemap.h" ++ ++static inline reiser4_extent *ext_by_offset(const znode *node, int offset) ++{ ++ reiser4_extent *ext; ++ ++ ext = (reiser4_extent *) (zdata(node) + offset); ++ return ext; ++} ++ ++/** ++ * check_uf_coord - verify coord extension ++ * @uf_coord: ++ * @key: ++ * ++ * Makes sure that all fields of @uf_coord are set properly. If @key is ++ * specified - check whether @uf_coord is set correspondingly. ++ */ ++static void check_uf_coord(const uf_coord_t *uf_coord, const reiser4_key *key) ++{ ++#if REISER4_DEBUG ++ const coord_t *coord; ++ const extent_coord_extension_t *ext_coord; ++ reiser4_extent *ext; ++ ++ coord = &uf_coord->coord; ++ ext_coord = &uf_coord->extension.extent; ++ ext = ext_by_offset(coord->node, uf_coord->extension.extent.ext_offset); ++ ++ assert("", ++ WITH_DATA(coord->node, ++ (uf_coord->valid == 1 && ++ coord_is_iplug_set(coord) && ++ item_is_extent(coord) && ++ ext_coord->nr_units == nr_units_extent(coord) && ++ ext == extent_by_coord(coord) && ++ ext_coord->width == extent_get_width(ext) && ++ coord->unit_pos < ext_coord->nr_units && ++ ext_coord->pos_in_unit < ext_coord->width && ++ memcmp(ext, &ext_coord->extent, ++ sizeof(reiser4_extent)) == 0))); ++ if (key) { ++ reiser4_key coord_key; ++ ++ unit_key_by_coord(&uf_coord->coord, &coord_key); ++ set_key_offset(&coord_key, ++ get_key_offset(&coord_key) + ++ (uf_coord->extension.extent. ++ pos_in_unit << PAGE_CACHE_SHIFT)); ++ assert("", keyeq(key, &coord_key)); ++ } ++#endif ++} ++ ++static inline reiser4_extent *ext_by_ext_coord(const uf_coord_t *uf_coord) ++{ ++ check_uf_coord(uf_coord, NULL); ++ ++ return ext_by_offset(uf_coord->coord.node, ++ uf_coord->extension.extent.ext_offset); ++} ++ ++#if REISER4_DEBUG ++ ++/** ++ * offset_is_in_unit ++ * ++ * ++ * ++ */ ++/* return 1 if offset @off is inside of extent unit pointed to by @coord. Set ++ pos_in_unit inside of unit correspondingly */ ++static int offset_is_in_unit(const coord_t *coord, loff_t off) ++{ ++ reiser4_key unit_key; ++ __u64 unit_off; ++ reiser4_extent *ext; ++ ++ ext = extent_by_coord(coord); ++ ++ unit_key_extent(coord, &unit_key); ++ unit_off = get_key_offset(&unit_key); ++ if (off < unit_off) ++ return 0; ++ if (off >= (unit_off + (current_blocksize * extent_get_width(ext)))) ++ return 0; ++ return 1; ++} ++ ++static int ++coord_matches_key_extent(const coord_t * coord, const reiser4_key * key) ++{ ++ reiser4_key item_key; ++ ++ assert("vs-771", coord_is_existing_unit(coord)); ++ assert("vs-1258", keylt(key, append_key_extent(coord, &item_key))); ++ assert("vs-1259", keyge(key, item_key_by_coord(coord, &item_key))); ++ ++ return offset_is_in_unit(coord, get_key_offset(key)); ++} ++ ++#endif ++ ++/** ++ * can_append - ++ * @key: ++ * @coord: ++ * ++ * Returns 1 if @key is equal to an append key of item @coord is set to ++ */ ++static int can_append(const reiser4_key *key, const coord_t *coord) ++{ ++ reiser4_key append_key; ++ ++ return keyeq(key, append_key_extent(coord, &append_key)); ++} ++ ++/** ++ * append_hole ++ * @coord: ++ * @lh: ++ * @key: ++ * ++ */ ++static int append_hole(coord_t *coord, lock_handle *lh, ++ const reiser4_key *key) ++{ ++ reiser4_key append_key; ++ reiser4_block_nr hole_width; ++ reiser4_extent *ext, new_ext; ++ reiser4_item_data idata; ++ ++ /* last item of file may have to be appended with hole */ ++ assert("vs-708", znode_get_level(coord->node) == TWIG_LEVEL); ++ assert("vs-714", item_id_by_coord(coord) == EXTENT_POINTER_ID); ++ ++ /* key of first byte which is not addressed by this extent */ ++ append_key_extent(coord, &append_key); ++ ++ assert("", keyle(&append_key, key)); ++ ++ /* ++ * extent item has to be appended with hole. Calculate length of that ++ * hole ++ */ ++ hole_width = ((get_key_offset(key) - get_key_offset(&append_key) + ++ current_blocksize - 1) >> current_blocksize_bits); ++ assert("vs-954", hole_width > 0); ++ ++ /* set coord after last unit */ ++ coord_init_after_item_end(coord); ++ ++ /* get last extent in the item */ ++ ext = extent_by_coord(coord); ++ if (state_of_extent(ext) == HOLE_EXTENT) { ++ /* ++ * last extent of a file is hole extent. Widen that extent by ++ * @hole_width blocks. Note that we do not worry about ++ * overflowing - extent width is 64 bits ++ */ ++ reiser4_set_extent(ext, HOLE_EXTENT_START, ++ extent_get_width(ext) + hole_width); ++ znode_make_dirty(coord->node); ++ return 0; ++ } ++ ++ /* append last item of the file with hole extent unit */ ++ assert("vs-713", (state_of_extent(ext) == ALLOCATED_EXTENT || ++ state_of_extent(ext) == UNALLOCATED_EXTENT)); ++ ++ reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width); ++ init_new_extent(&idata, &new_ext, 1); ++ return insert_into_item(coord, lh, &append_key, &idata, 0); ++} ++ ++/** ++ * check_jnodes ++ * @twig: longterm locked twig node ++ * @key: ++ * ++ */ ++static void check_jnodes(znode *twig, const reiser4_key *key, int count) ++{ ++#if REISER4_DEBUG ++ coord_t c; ++ reiser4_key node_key, jnode_key; ++ ++ jnode_key = *key; ++ ++ assert("", twig != NULL); ++ assert("", znode_get_level(twig) == TWIG_LEVEL); ++ assert("", znode_is_write_locked(twig)); ++ ++ zload(twig); ++ /* get the smallest key in twig node */ ++ coord_init_first_unit(&c, twig); ++ unit_key_by_coord(&c, &node_key); ++ assert("", keyle(&node_key, &jnode_key)); ++ ++ coord_init_last_unit(&c, twig); ++ unit_key_by_coord(&c, &node_key); ++ if (item_plugin_by_coord(&c)->s.file.append_key) ++ item_plugin_by_coord(&c)->s.file.append_key(&c, &node_key); ++ set_key_offset(&jnode_key, ++ get_key_offset(&jnode_key) + (loff_t)count * PAGE_CACHE_SIZE - 1); ++ assert("", keylt(&jnode_key, &node_key)); ++ zrelse(twig); ++#endif ++} ++ ++/** ++ * append_last_extent - append last file item ++ * @uf_coord: coord to start insertion from ++ * @jnodes: array of jnodes ++ * @count: number of jnodes in the array ++ * ++ * There is already at least one extent item of file @inode in the tree. Append ++ * the last of them with unallocated extent unit of width @count. Assign ++ * fake block numbers to jnodes corresponding to the inserted extent. ++ */ ++static int append_last_extent(uf_coord_t *uf_coord, const reiser4_key *key, ++ jnode **jnodes, int count) ++{ ++ int result; ++ reiser4_extent new_ext; ++ reiser4_item_data idata; ++ coord_t *coord; ++ extent_coord_extension_t *ext_coord; ++ reiser4_extent *ext; ++ reiser4_block_nr block; ++ jnode *node; ++ int i; ++ ++ coord = &uf_coord->coord; ++ ext_coord = &uf_coord->extension.extent; ++ ext = ext_by_ext_coord(uf_coord); ++ ++ /* check correctness of position in the item */ ++ assert("vs-228", coord->unit_pos == coord_last_unit_pos(coord)); ++ assert("vs-1311", coord->between == AFTER_UNIT); ++ assert("vs-1302", ext_coord->pos_in_unit == ext_coord->width - 1); ++ ++ if (!can_append(key, coord)) { ++ /* hole extent has to be inserted */ ++ result = append_hole(coord, uf_coord->lh, key); ++ uf_coord->valid = 0; ++ return result; ++ } ++ ++ if (count == 0) ++ return 0; ++ ++ assert("", get_key_offset(key) == (loff_t)index_jnode(jnodes[0]) * PAGE_CACHE_SIZE); ++ ++ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(jnodes[0])->host, ++ count); ++ BUG_ON(result != 0); ++ ++ switch (state_of_extent(ext)) { ++ case UNALLOCATED_EXTENT: ++ /* ++ * last extent unit of the file is unallocated one. Increase ++ * its width by @count ++ */ ++ reiser4_set_extent(ext, UNALLOCATED_EXTENT_START, ++ extent_get_width(ext) + count); ++ znode_make_dirty(coord->node); ++ ++ /* update coord extension */ ++ ext_coord->width += count; ++ ON_DEBUG(extent_set_width ++ (&uf_coord->extension.extent.extent, ++ ext_coord->width)); ++ break; ++ ++ case HOLE_EXTENT: ++ case ALLOCATED_EXTENT: ++ /* ++ * last extent unit of the file is either hole or allocated ++ * one. Append one unallocated extent of width @count ++ */ ++ reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count); ++ init_new_extent(&idata, &new_ext, 1); ++ result = insert_into_item(coord, uf_coord->lh, key, &idata, 0); ++ uf_coord->valid = 0; ++ if (result) ++ return result; ++ break; ++ ++ default: ++ return RETERR(-EIO); ++ } ++ ++ /* ++ * make sure that we hold long term locked twig node containing all ++ * jnodes we are about to capture ++ */ ++ check_jnodes(uf_coord->lh->node, key, count); ++ ++ /* ++ * assign fake block numbers to all jnodes. FIXME: make sure whether ++ * twig node containing inserted extent item is locked ++ */ ++ block = fake_blocknr_unformatted(count); ++ for (i = 0; i < count; i ++, block ++) { ++ node = jnodes[i]; ++ spin_lock_jnode(node); ++ JF_SET(node, JNODE_CREATED); ++ jnode_set_block(node, &block); ++ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0); ++ BUG_ON(result != 0); ++ jnode_make_dirty_locked(node); ++ spin_unlock_jnode(node); ++ } ++ return count; ++} ++ ++/** ++ * insert_first_hole - inser hole extent into tree ++ * @coord: ++ * @lh: ++ * @key: ++ * ++ * ++ */ ++static int insert_first_hole(coord_t *coord, lock_handle *lh, ++ const reiser4_key *key) ++{ ++ reiser4_extent new_ext; ++ reiser4_item_data idata; ++ reiser4_key item_key; ++ reiser4_block_nr hole_width; ++ ++ /* @coord must be set for inserting of new item */ ++ assert("vs-711", coord_is_between_items(coord)); ++ ++ item_key = *key; ++ set_key_offset(&item_key, 0ull); ++ ++ hole_width = ((get_key_offset(key) + current_blocksize - 1) >> ++ current_blocksize_bits); ++ assert("vs-710", hole_width > 0); ++ ++ /* compose body of hole extent and insert item into tree */ ++ reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width); ++ init_new_extent(&idata, &new_ext, 1); ++ return insert_extent_by_coord(coord, &idata, &item_key, lh); ++} ++ ++ ++/** ++ * insert_first_extent - insert first file item ++ * @inode: inode of file ++ * @uf_coord: coord to start insertion from ++ * @jnodes: array of jnodes ++ * @count: number of jnodes in the array ++ * @inode: ++ * ++ * There are no items of file @inode in the tree yet. Insert unallocated extent ++ * of width @count into tree or hole extent if writing not to the ++ * beginning. Assign fake block numbers to jnodes corresponding to the inserted ++ * unallocated extent. Returns number of jnodes or error code. ++ */ ++static int insert_first_extent(uf_coord_t *uf_coord, const reiser4_key *key, ++ jnode **jnodes, int count, ++ struct inode *inode) ++{ ++ int result; ++ int i; ++ reiser4_extent new_ext; ++ reiser4_item_data idata; ++ reiser4_block_nr block; ++ unix_file_info_t *uf_info; ++ jnode *node; ++ ++ /* first extent insertion starts at leaf level */ ++ assert("vs-719", znode_get_level(uf_coord->coord.node) == LEAF_LEVEL); ++ assert("vs-711", coord_is_between_items(&uf_coord->coord)); ++ ++ if (get_key_offset(key) != 0) { ++ result = insert_first_hole(&uf_coord->coord, uf_coord->lh, key); ++ uf_coord->valid = 0; ++ uf_info = unix_file_inode_data(inode); ++ ++ /* ++ * first item insertion is only possible when writing to empty ++ * file or performing tail conversion ++ */ ++ assert("", (uf_info->container == UF_CONTAINER_EMPTY || ++ (reiser4_inode_get_flag(inode, ++ REISER4_PART_MIXED) && ++ reiser4_inode_get_flag(inode, ++ REISER4_PART_IN_CONV)))); ++ /* if file was empty - update its state */ ++ if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY) ++ uf_info->container = UF_CONTAINER_EXTENTS; ++ return result; ++ } ++ ++ if (count == 0) ++ return 0; ++ ++ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(jnodes[0])->host, count); ++ BUG_ON(result != 0); ++ ++ /* ++ * prepare for tree modification: compose body of item and item data ++ * structure needed for insertion ++ */ ++ reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count); ++ init_new_extent(&idata, &new_ext, 1); ++ ++ /* insert extent item into the tree */ ++ result = insert_extent_by_coord(&uf_coord->coord, &idata, key, ++ uf_coord->lh); ++ if (result) ++ return result; ++ ++ /* ++ * make sure that we hold long term locked twig node containing all ++ * jnodes we are about to capture ++ */ ++ check_jnodes(uf_coord->lh->node, key, count); ++ /* ++ * assign fake block numbers to all jnodes, capture and mark them dirty ++ */ ++ block = fake_blocknr_unformatted(count); ++ for (i = 0; i < count; i ++, block ++) { ++ node = jnodes[i]; ++ spin_lock_jnode(node); ++ JF_SET(node, JNODE_CREATED); ++ jnode_set_block(node, &block); ++ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0); ++ BUG_ON(result != 0); ++ jnode_make_dirty_locked(node); ++ spin_unlock_jnode(node); ++ } ++ ++ /* ++ * invalidate coordinate, research must be performed to continue ++ * because write will continue on twig level ++ */ ++ uf_coord->valid = 0; ++ return count; ++} ++ ++/** ++ * plug_hole - replace hole extent with unallocated and holes ++ * @uf_coord: ++ * @key: ++ * @node: ++ * @h: structure containing coordinate, lock handle, key, etc ++ * ++ * Creates an unallocated extent of width 1 within a hole. In worst case two ++ * additional extents can be created. ++ */ ++static int plug_hole(uf_coord_t *uf_coord, const reiser4_key *key, int *how) ++{ ++ struct replace_handle rh; ++ reiser4_extent *ext; ++ reiser4_block_nr width, pos_in_unit; ++ coord_t *coord; ++ extent_coord_extension_t *ext_coord; ++ int return_inserted_position; ++ ++ check_uf_coord(uf_coord, key); ++ ++ rh.coord = coord_by_uf_coord(uf_coord); ++ rh.lh = uf_coord->lh; ++ rh.flags = 0; ++ ++ coord = coord_by_uf_coord(uf_coord); ++ ext_coord = ext_coord_by_uf_coord(uf_coord); ++ ext = ext_by_ext_coord(uf_coord); ++ ++ width = ext_coord->width; ++ pos_in_unit = ext_coord->pos_in_unit; ++ ++ *how = 0; ++ if (width == 1) { ++ reiser4_set_extent(ext, UNALLOCATED_EXTENT_START, 1); ++ znode_make_dirty(coord->node); ++ /* update uf_coord */ ++ ON_DEBUG(ext_coord->extent = *ext); ++ *how = 1; ++ return 0; ++ } else if (pos_in_unit == 0) { ++ /* we deal with first element of extent */ ++ if (coord->unit_pos) { ++ /* there is an extent to the left */ ++ if (state_of_extent(ext - 1) == UNALLOCATED_EXTENT) { ++ /* ++ * left neighboring unit is an unallocated ++ * extent. Increase its width and decrease ++ * width of hole ++ */ ++ extent_set_width(ext - 1, ++ extent_get_width(ext - 1) + 1); ++ extent_set_width(ext, width - 1); ++ znode_make_dirty(coord->node); ++ ++ /* update coord extension */ ++ coord->unit_pos--; ++ ext_coord->width = extent_get_width(ext - 1); ++ ext_coord->pos_in_unit = ext_coord->width - 1; ++ ext_coord->ext_offset -= sizeof(reiser4_extent); ++ ON_DEBUG(ext_coord->extent = ++ *extent_by_coord(coord)); ++ *how = 2; ++ return 0; ++ } ++ } ++ /* extent for replace */ ++ reiser4_set_extent(&rh.overwrite, UNALLOCATED_EXTENT_START, 1); ++ /* extent to be inserted */ ++ reiser4_set_extent(&rh.new_extents[0], HOLE_EXTENT_START, ++ width - 1); ++ rh.nr_new_extents = 1; ++ ++ /* have reiser4_replace_extent to return with @coord and ++ @uf_coord->lh set to unit which was replaced */ ++ return_inserted_position = 0; ++ *how = 3; ++ } else if (pos_in_unit == width - 1) { ++ /* we deal with last element of extent */ ++ if (coord->unit_pos < nr_units_extent(coord) - 1) { ++ /* there is an extent unit to the right */ ++ if (state_of_extent(ext + 1) == UNALLOCATED_EXTENT) { ++ /* ++ * right neighboring unit is an unallocated ++ * extent. Increase its width and decrease ++ * width of hole ++ */ ++ extent_set_width(ext + 1, ++ extent_get_width(ext + 1) + 1); ++ extent_set_width(ext, width - 1); ++ znode_make_dirty(coord->node); ++ ++ /* update coord extension */ ++ coord->unit_pos++; ++ ext_coord->width = extent_get_width(ext + 1); ++ ext_coord->pos_in_unit = 0; ++ ext_coord->ext_offset += sizeof(reiser4_extent); ++ ON_DEBUG(ext_coord->extent = ++ *extent_by_coord(coord)); ++ *how = 4; ++ return 0; ++ } ++ } ++ /* extent for replace */ ++ reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START, width - 1); ++ /* extent to be inserted */ ++ reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START, ++ 1); ++ rh.nr_new_extents = 1; ++ ++ /* have reiser4_replace_extent to return with @coord and ++ @uf_coord->lh set to unit which was inserted */ ++ return_inserted_position = 1; ++ *how = 5; ++ } else { ++ /* extent for replace */ ++ reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START, ++ pos_in_unit); ++ /* extents to be inserted */ ++ reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START, ++ 1); ++ reiser4_set_extent(&rh.new_extents[1], HOLE_EXTENT_START, ++ width - pos_in_unit - 1); ++ rh.nr_new_extents = 2; ++ ++ /* have reiser4_replace_extent to return with @coord and ++ @uf_coord->lh set to first of units which were inserted */ ++ return_inserted_position = 1; ++ *how = 6; ++ } ++ unit_key_by_coord(coord, &rh.paste_key); ++ set_key_offset(&rh.paste_key, get_key_offset(&rh.paste_key) + ++ extent_get_width(&rh.overwrite) * current_blocksize); ++ ++ uf_coord->valid = 0; ++ return reiser4_replace_extent(&rh, return_inserted_position); ++} ++ ++/** ++ * overwrite_one_block - ++ * @uf_coord: ++ * @key: ++ * @node: ++ * ++ * If @node corresponds to hole extent - create unallocated extent for it and ++ * assign fake block number. If @node corresponds to allocated extent - assign ++ * block number of jnode ++ */ ++static int overwrite_one_block(uf_coord_t *uf_coord, const reiser4_key *key, ++ jnode *node, int *hole_plugged) ++{ ++ int result; ++ extent_coord_extension_t *ext_coord; ++ reiser4_extent *ext; ++ reiser4_block_nr block; ++ int how; ++ ++ assert("vs-1312", uf_coord->coord.between == AT_UNIT); ++ ++ result = 0; ++ ext_coord = ext_coord_by_uf_coord(uf_coord); ++ ext = ext_by_ext_coord(uf_coord); ++ assert("", state_of_extent(ext) != UNALLOCATED_EXTENT); ++ ++ switch (state_of_extent(ext)) { ++ case ALLOCATED_EXTENT: ++ block = extent_get_start(ext) + ext_coord->pos_in_unit; ++ break; ++ ++ case HOLE_EXTENT: ++ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(node)->host, 1); ++ BUG_ON(result != 0); ++ result = plug_hole(uf_coord, key, &how); ++ if (result) ++ return result; ++ block = fake_blocknr_unformatted(1); ++ if (hole_plugged) ++ *hole_plugged = 1; ++ JF_SET(node, JNODE_CREATED); ++ break; ++ ++ default: ++ return RETERR(-EIO); ++ } ++ ++ jnode_set_block(node, &block); ++ return 0; ++} ++ ++/** ++ * move_coord - move coordinate forward ++ * @uf_coord: ++ * ++ * Move coordinate one data block pointer forward. Return 1 if coord is set to ++ * the last one already or is invalid. ++ */ ++static int move_coord(uf_coord_t *uf_coord) ++{ ++ extent_coord_extension_t *ext_coord; ++ ++ if (uf_coord->valid == 0) ++ return 1; ++ ext_coord = &uf_coord->extension.extent; ++ ext_coord->pos_in_unit ++; ++ if (ext_coord->pos_in_unit < ext_coord->width) ++ /* coordinate moved within the unit */ ++ return 0; ++ ++ /* end of unit is reached. Try to move to next unit */ ++ ext_coord->pos_in_unit = 0; ++ uf_coord->coord.unit_pos ++; ++ if (uf_coord->coord.unit_pos < ext_coord->nr_units) { ++ /* coordinate moved to next unit */ ++ ext_coord->ext_offset += sizeof(reiser4_extent); ++ ext_coord->width = ++ extent_get_width(ext_by_offset ++ (uf_coord->coord.node, ++ ext_coord->ext_offset)); ++ ON_DEBUG(ext_coord->extent = ++ *ext_by_offset(uf_coord->coord.node, ++ ext_coord->ext_offset)); ++ return 0; ++ } ++ /* end of item is reached */ ++ uf_coord->valid = 0; ++ return 1; ++} ++ ++/** ++ * overwrite_extent - ++ * @inode: ++ * ++ * Returns number of handled jnodes. ++ */ ++static int overwrite_extent(uf_coord_t *uf_coord, const reiser4_key *key, ++ jnode **jnodes, int count, int *plugged_hole) ++{ ++ int result; ++ reiser4_key k; ++ int i; ++ jnode *node; ++ ++ k = *key; ++ for (i = 0; i < count; i ++) { ++ node = jnodes[i]; ++ if (*jnode_get_block(node) == 0) { ++ result = overwrite_one_block(uf_coord, &k, node, plugged_hole); ++ if (result) ++ return result; ++ } ++ /* ++ * make sure that we hold long term locked twig node containing ++ * all jnodes we are about to capture ++ */ ++ check_jnodes(uf_coord->lh->node, &k, 1); ++ /* ++ * assign fake block numbers to all jnodes, capture and mark ++ * them dirty ++ */ ++ spin_lock_jnode(node); ++ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0); ++ BUG_ON(result != 0); ++ jnode_make_dirty_locked(node); ++ spin_unlock_jnode(node); ++ ++ if (uf_coord->valid == 0) ++ return i + 1; ++ ++ check_uf_coord(uf_coord, &k); ++ ++ if (move_coord(uf_coord)) { ++ /* ++ * failed to move to the next node pointer. Either end ++ * of file or end of twig node is reached. In the later ++ * case we might go to the right neighbor. ++ */ ++ uf_coord->valid = 0; ++ return i + 1; ++ } ++ set_key_offset(&k, get_key_offset(&k) + PAGE_CACHE_SIZE); ++ } ++ ++ return count; ++} ++ ++/** ++ * reiser4_update_extent ++ * @file: ++ * @jnodes: ++ * @count: ++ * @off: ++ * ++ */ ++int reiser4_update_extent(struct inode *inode, jnode *node, loff_t pos, ++ int *plugged_hole) ++{ ++ int result; ++ znode *loaded; ++ uf_coord_t uf_coord; ++ coord_t *coord; ++ lock_handle lh; ++ reiser4_key key; ++ ++ assert("", reiser4_lock_counters()->d_refs == 0); ++ ++ key_by_inode_and_offset_common(inode, pos, &key); ++ ++ init_uf_coord(&uf_coord, &lh); ++ coord = &uf_coord.coord; ++ result = find_file_item_nohint(coord, &lh, &key, ++ ZNODE_WRITE_LOCK, inode); ++ if (IS_CBKERR(result)) { ++ assert("", reiser4_lock_counters()->d_refs == 0); ++ return result; ++ } ++ ++ result = zload(coord->node); ++ BUG_ON(result != 0); ++ loaded = coord->node; ++ ++ if (coord->between == AFTER_UNIT) { ++ /* ++ * append existing extent item with unallocated extent of width ++ * nr_jnodes ++ */ ++ init_coord_extension_extent(&uf_coord, ++ get_key_offset(&key)); ++ result = append_last_extent(&uf_coord, &key, ++ &node, 1); ++ } else if (coord->between == AT_UNIT) { ++ /* ++ * overwrite ++ * not optimal yet. Will be optimized if new write will show ++ * performance win. ++ */ ++ init_coord_extension_extent(&uf_coord, ++ get_key_offset(&key)); ++ result = overwrite_extent(&uf_coord, &key, ++ &node, 1, plugged_hole); ++ } else { ++ /* ++ * there are no items of this file in the tree yet. Create ++ * first item of the file inserting one unallocated extent of ++ * width nr_jnodes ++ */ ++ result = insert_first_extent(&uf_coord, &key, &node, 1, inode); ++ } ++ assert("", result == 1 || result < 0); ++ zrelse(loaded); ++ done_lh(&lh); ++ assert("", reiser4_lock_counters()->d_refs == 0); ++ return (result == 1) ? 0 : result; ++} ++ ++/** ++ * update_extents ++ * @file: ++ * @jnodes: ++ * @count: ++ * @off: ++ * ++ */ ++static int update_extents(struct file *file, jnode **jnodes, int count, loff_t pos) ++{ ++ struct inode *inode; ++ struct hint hint; ++ reiser4_key key; ++ int result; ++ znode *loaded; ++ ++ result = load_file_hint(file, &hint); ++ BUG_ON(result != 0); ++ ++ inode = file->f_dentry->d_inode; ++ if (count != 0) ++ /* ++ * count == 0 is special case: expanding truncate ++ */ ++ pos = (loff_t)index_jnode(jnodes[0]) << PAGE_CACHE_SHIFT; ++ key_by_inode_and_offset_common(inode, pos, &key); ++ ++ assert("", reiser4_lock_counters()->d_refs == 0); ++ ++ do { ++ result = find_file_item(&hint, &key, ZNODE_WRITE_LOCK, inode); ++ if (IS_CBKERR(result)) { ++ assert("", reiser4_lock_counters()->d_refs == 0); ++ return result; ++ } ++ ++ result = zload(hint.ext_coord.coord.node); ++ BUG_ON(result != 0); ++ loaded = hint.ext_coord.coord.node; ++ ++ if (hint.ext_coord.coord.between == AFTER_UNIT) { ++ /* ++ * append existing extent item with unallocated extent ++ * of width nr_jnodes ++ */ ++ if (hint.ext_coord.valid == 0) ++ /* NOTE: get statistics on this */ ++ init_coord_extension_extent(&hint.ext_coord, ++ get_key_offset(&key)); ++ result = append_last_extent(&hint.ext_coord, &key, ++ jnodes, count); ++ } else if (hint.ext_coord.coord.between == AT_UNIT) { ++ /* ++ * overwrite ++ * not optimal yet. Will be optimized if new write will ++ * show performance win. ++ */ ++ if (hint.ext_coord.valid == 0) ++ /* NOTE: get statistics on this */ ++ init_coord_extension_extent(&hint.ext_coord, ++ get_key_offset(&key)); ++ result = overwrite_extent(&hint.ext_coord, &key, ++ jnodes, count, NULL); ++ } else { ++ /* ++ * there are no items of this file in the tree ++ * yet. Create first item of the file inserting one ++ * unallocated extent of * width nr_jnodes ++ */ ++ result = insert_first_extent(&hint.ext_coord, &key, ++ jnodes, count, inode); ++ } ++ zrelse(loaded); ++ if (result < 0) { ++ done_lh(hint.ext_coord.lh); ++ break; ++ } ++ ++ jnodes += result; ++ count -= result; ++ set_key_offset(&key, get_key_offset(&key) + result * PAGE_CACHE_SIZE); ++ ++ /* seal and unlock znode */ ++ if (hint.ext_coord.valid) ++ reiser4_set_hint(&hint, &key, ZNODE_WRITE_LOCK); ++ else ++ reiser4_unset_hint(&hint); ++ ++ } while (count > 0); ++ ++ save_file_hint(file, &hint); ++ assert("", reiser4_lock_counters()->d_refs == 0); ++ return result; ++} ++ ++/** ++ * write_extent_reserve_space - reserve space for extent write operation ++ * @inode: ++ * ++ * Estimates and reserves space which may be required for writing ++ * WRITE_GRANULARITY pages of file. ++ */ ++static int write_extent_reserve_space(struct inode *inode) ++{ ++ __u64 count; ++ reiser4_tree *tree; ++ ++ /* ++ * to write WRITE_GRANULARITY pages to a file by extents we have to ++ * reserve disk space for: ++ ++ * 1. find_file_item may have to insert empty node to the tree (empty ++ * leaf node between two extent items). This requires 1 block and ++ * number of blocks which are necessary to perform insertion of an ++ * internal item into twig level. ++ ++ * 2. for each of written pages there might be needed 1 block and ++ * number of blocks which might be necessary to perform insertion of or ++ * paste to an extent item. ++ ++ * 3. stat data update ++ */ ++ tree = reiser4_tree_by_inode(inode); ++ count = estimate_one_insert_item(tree) + ++ WRITE_GRANULARITY * (1 + estimate_one_insert_into_item(tree)) + ++ estimate_one_insert_item(tree); ++ grab_space_enable(); ++ return reiser4_grab_space(count, 0 /* flags */); ++} ++ ++/** ++ * reiser4_write_extent - write method of extent item plugin ++ * @file: file to write to ++ * @buf: address of user-space buffer ++ * @write_amount: number of bytes to write ++ * @off: position in file to write to ++ * ++ */ ++ssize_t reiser4_write_extent(struct file *file, const char __user *buf, ++ size_t count, loff_t *pos) ++{ ++ int have_to_update_extent; ++ int nr_pages; ++ struct page *page; ++ jnode *jnodes[WRITE_GRANULARITY + 1]; ++ struct inode *inode; ++ unsigned long index; ++ unsigned long end; ++ int i; ++ int to_page, page_off; ++ size_t left, written; ++ int result; ++ ++ inode = file->f_dentry->d_inode; ++ if (write_extent_reserve_space(inode)) ++ return RETERR(-ENOSPC); ++ ++ if (count == 0) { ++ /* truncate case */ ++ update_extents(file, jnodes, 0, *pos); ++ return 0; ++ } ++ ++ BUG_ON(get_current_context()->trans->atom != NULL); ++ ++ index = *pos >> PAGE_CACHE_SHIFT; ++ /* calculate number of pages which are to be written */ ++ end = ((*pos + count - 1) >> PAGE_CACHE_SHIFT); ++ nr_pages = end - index + 1; ++ assert("", nr_pages <= WRITE_GRANULARITY + 1); ++ ++ /* get pages and jnodes */ ++ for (i = 0; i < nr_pages; i ++) { ++ page = find_or_create_page(inode->i_mapping, index + i, ++ reiser4_ctx_gfp_mask_get()); ++ if (page == NULL) { ++ while(i --) { ++ unlock_page(jnode_page(jnodes[i])); ++ page_cache_release(jnode_page(jnodes[i])); ++ } ++ return RETERR(-ENOMEM); ++ } ++ ++ jnodes[i] = jnode_of_page(page); ++ if (IS_ERR(jnodes[i])) { ++ unlock_page(page); ++ page_cache_release(page); ++ while (i --) { ++ jput(jnodes[i]); ++ page_cache_release(jnode_page(jnodes[i])); ++ } ++ return RETERR(-ENOMEM); ++ } ++ /* prevent jnode and page from disconnecting */ ++ JF_SET(jnodes[i], JNODE_WRITE_PREPARED); ++ unlock_page(page); ++ } ++ ++ BUG_ON(get_current_context()->trans->atom != NULL); ++ ++ have_to_update_extent = 0; ++ ++ left = count; ++ page_off = (*pos & (PAGE_CACHE_SIZE - 1)); ++ for (i = 0; i < nr_pages; i ++) { ++ to_page = PAGE_CACHE_SIZE - page_off; ++ if (to_page > left) ++ to_page = left; ++ page = jnode_page(jnodes[i]); ++ if (page_offset(page) < inode->i_size && ++ !PageUptodate(page) && to_page != PAGE_CACHE_SIZE) { ++ /* ++ * the above is not optimal for partial write to last ++ * page of file when file size is not at boundary of ++ * page ++ */ ++ lock_page(page); ++ if (!PageUptodate(page)) { ++ result = readpage_unix_file(NULL, page); ++ BUG_ON(result != 0); ++ /* wait for read completion */ ++ lock_page(page); ++ BUG_ON(!PageUptodate(page)); ++ } else ++ result = 0; ++ unlock_page(page); ++ } ++ ++ BUG_ON(get_current_context()->trans->atom != NULL); ++ fault_in_pages_readable(buf, to_page); ++ BUG_ON(get_current_context()->trans->atom != NULL); ++ ++ lock_page(page); ++ if (!PageUptodate(page) && to_page != PAGE_CACHE_SIZE) { ++ void *kaddr; ++ ++ kaddr = kmap_atomic(page, KM_USER0); ++ memset(kaddr, 0, page_off); ++ memset(kaddr + page_off + to_page, 0, ++ PAGE_CACHE_SIZE - (page_off + to_page)); ++ flush_dcache_page(page); ++ kunmap_atomic(kaddr, KM_USER0); ++ } ++ ++ written = filemap_copy_from_user(page, page_off, buf, to_page); ++ flush_dcache_page(page); ++ reiser4_set_page_dirty_internal(page); ++ unlock_page(page); ++ mark_page_accessed(page); ++ SetPageUptodate(page); ++ page_cache_release(page); ++ ++ if (jnodes[i]->blocknr == 0) ++ have_to_update_extent ++; ++ ++ page_off = 0; ++ buf += to_page; ++ left -= to_page; ++ BUG_ON(get_current_context()->trans->atom != NULL); ++ } ++ ++ if (have_to_update_extent) { ++ update_extents(file, jnodes, nr_pages, *pos); ++ } else { ++ for (i = 0; i < nr_pages; i ++) { ++ spin_lock_jnode(jnodes[i]); ++ result = reiser4_try_capture(jnodes[i], ++ ZNODE_WRITE_LOCK, 0); ++ BUG_ON(result != 0); ++ jnode_make_dirty_locked(jnodes[i]); ++ spin_unlock_jnode(jnodes[i]); ++ } ++ } ++ ++ for (i = 0; i < nr_pages; i ++) { ++ JF_CLR(jnodes[i], JNODE_WRITE_PREPARED); ++ jput(jnodes[i]); ++ } ++ ++ /* the only error handled so far is EFAULT on copy_from_user */ ++ return (count - left) ? (count - left) : -EFAULT; ++} ++ ++static inline void zero_page(struct page *page) ++{ ++ char *kaddr = kmap_atomic(page, KM_USER0); ++ ++ memset(kaddr, 0, PAGE_CACHE_SIZE); ++ flush_dcache_page(page); ++ kunmap_atomic(kaddr, KM_USER0); ++ SetPageUptodate(page); ++ unlock_page(page); ++} ++ ++int reiser4_do_readpage_extent(reiser4_extent * ext, reiser4_block_nr pos, ++ struct page *page) ++{ ++ jnode *j; ++ struct address_space *mapping; ++ unsigned long index; ++ oid_t oid; ++ reiser4_block_nr block; ++ ++ mapping = page->mapping; ++ oid = get_inode_oid(mapping->host); ++ index = page->index; ++ ++ switch (state_of_extent(ext)) { ++ case HOLE_EXTENT: ++ /* ++ * it is possible to have hole page with jnode, if page was ++ * eflushed previously. ++ */ ++ j = jfind(mapping, index); ++ if (j == NULL) { ++ zero_page(page); ++ return 0; ++ } ++ spin_lock_jnode(j); ++ if (!jnode_page(j)) { ++ jnode_attach_page(j, page); ++ } else { ++ BUG_ON(jnode_page(j) != page); ++ assert("vs-1504", jnode_page(j) == page); ++ } ++ block = *jnode_get_io_block(j); ++ spin_unlock_jnode(j); ++ if (block == 0) { ++ zero_page(page); ++ jput(j); ++ return 0; ++ } ++ break; ++ ++ case ALLOCATED_EXTENT: ++ j = jnode_of_page(page); ++ if (IS_ERR(j)) ++ return PTR_ERR(j); ++ if (*jnode_get_block(j) == 0) { ++ reiser4_block_nr blocknr; ++ ++ blocknr = extent_get_start(ext) + pos; ++ jnode_set_block(j, &blocknr); ++ } else ++ assert("vs-1403", ++ j->blocknr == extent_get_start(ext) + pos); ++ break; ++ ++ case UNALLOCATED_EXTENT: ++ j = jfind(mapping, index); ++ assert("nikita-2688", j); ++ assert("vs-1426", jnode_page(j) == NULL); ++ ++ spin_lock_jnode(j); ++ jnode_attach_page(j, page); ++ spin_unlock_jnode(j); ++ break; ++ ++ default: ++ warning("vs-957", "wrong extent\n"); ++ return RETERR(-EIO); ++ } ++ ++ BUG_ON(j == 0); ++ reiser4_page_io(page, j, READ, reiser4_ctx_gfp_mask_get()); ++ jput(j); ++ return 0; ++} ++ ++/* Implements plugin->u.item.s.file.read operation for extent items. */ ++int reiser4_read_extent(struct file *file, flow_t *flow, hint_t *hint) ++{ ++ int result; ++ struct page *page; ++ unsigned long cur_page, next_page; ++ unsigned long page_off, count; ++ struct address_space *mapping; ++ loff_t file_off; ++ uf_coord_t *uf_coord; ++ coord_t *coord; ++ extent_coord_extension_t *ext_coord; ++ unsigned long nr_pages; ++ char *kaddr; ++ ++ assert("vs-1353", current_blocksize == PAGE_CACHE_SIZE); ++ assert("vs-572", flow->user == 1); ++ assert("vs-1351", flow->length > 0); ++ ++ uf_coord = &hint->ext_coord; ++ ++ check_uf_coord(uf_coord, NULL); ++ assert("vs-33", uf_coord->lh == &hint->lh); ++ ++ coord = &uf_coord->coord; ++ assert("vs-1119", znode_is_rlocked(coord->node)); ++ assert("vs-1120", znode_is_loaded(coord->node)); ++ assert("vs-1256", coord_matches_key_extent(coord, &flow->key)); ++ ++ mapping = file->f_dentry->d_inode->i_mapping; ++ ext_coord = &uf_coord->extension.extent; ++ ++ /* offset in a file to start read from */ ++ file_off = get_key_offset(&flow->key); ++ /* offset within the page to start read from */ ++ page_off = (unsigned long)(file_off & (PAGE_CACHE_SIZE - 1)); ++ /* bytes which can be read from the page which contains file_off */ ++ count = PAGE_CACHE_SIZE - page_off; ++ ++ /* index of page containing offset read is to start from */ ++ cur_page = (unsigned long)(file_off >> PAGE_CACHE_SHIFT); ++ next_page = cur_page; ++ /* number of pages flow spans over */ ++ nr_pages = ++ ((file_off + flow->length + PAGE_CACHE_SIZE - ++ 1) >> PAGE_CACHE_SHIFT) - cur_page; ++ ++ /* we start having twig node read locked. However, we do not want to ++ keep that lock all the time readahead works. So, set a sel and ++ release twig node. */ ++ reiser4_set_hint(hint, &flow->key, ZNODE_READ_LOCK); ++ /* &hint->lh is done-ed */ ++ ++ do { ++ reiser4_txn_restart_current(); ++ page = read_mapping_page(mapping, cur_page, file); ++ if (IS_ERR(page)) ++ return PTR_ERR(page); ++ lock_page(page); ++ if (!PageUptodate(page)) { ++ unlock_page(page); ++ page_cache_release(page); ++ warning("jmacd-97178", "extent_read: page is not up to date"); ++ return RETERR(-EIO); ++ } ++ mark_page_accessed(page); ++ unlock_page(page); ++ ++ /* If users can be writing to this page using arbitrary virtual ++ addresses, take care about potential aliasing before reading ++ the page on the kernel side. ++ */ ++ if (mapping_writably_mapped(mapping)) ++ flush_dcache_page(page); ++ ++ assert("nikita-3034", reiser4_schedulable()); ++ ++ /* number of bytes which are to be read from the page */ ++ if (count > flow->length) ++ count = flow->length; ++ ++ result = fault_in_pages_writeable(flow->data, count); ++ if (result) { ++ page_cache_release(page); ++ return RETERR(-EFAULT); ++ } ++ ++ kaddr = kmap_atomic(page, KM_USER0); ++ result = __copy_to_user_inatomic(flow->data, ++ kaddr + page_off, count); ++ kunmap_atomic(kaddr, KM_USER0); ++ if (result != 0) { ++ kaddr = kmap(page); ++ result = __copy_to_user(flow->data, kaddr + page_off, count); ++ kunmap(page); ++ if (unlikely(result)) ++ return RETERR(-EFAULT); ++ } ++ ++ page_cache_release(page); ++ ++ /* increase key (flow->key), update user area pointer (flow->data) */ ++ move_flow_forward(flow, count); ++ ++ page_off = 0; ++ cur_page ++; ++ count = PAGE_CACHE_SIZE; ++ nr_pages--; ++ } while (flow->length); ++ ++ return 0; ++} ++ ++/* ++ plugin->s.file.readpage ++ reiser4_read->unix_file_read->page_cache_readahead->reiser4_readpage->unix_file_readpage->extent_readpage ++ or ++ filemap_nopage->reiser4_readpage->readpage_unix_file->->readpage_extent ++ ++ At the beginning: coord->node is read locked, zloaded, page is ++ locked, coord is set to existing unit inside of extent item (it is not necessary that coord matches to page->index) ++*/ ++int reiser4_readpage_extent(void *vp, struct page *page) ++{ ++ uf_coord_t *uf_coord = vp; ++ ON_DEBUG(coord_t * coord = &uf_coord->coord); ++ ON_DEBUG(reiser4_key key); ++ ++ assert("vs-1040", PageLocked(page)); ++ assert("vs-1050", !PageUptodate(page)); ++ assert("vs-1039", page->mapping && page->mapping->host); ++ ++ assert("vs-1044", znode_is_loaded(coord->node)); ++ assert("vs-758", item_is_extent(coord)); ++ assert("vs-1046", coord_is_existing_unit(coord)); ++ assert("vs-1045", znode_is_rlocked(coord->node)); ++ assert("vs-1047", ++ page->mapping->host->i_ino == ++ get_key_objectid(item_key_by_coord(coord, &key))); ++ check_uf_coord(uf_coord, NULL); ++ ++ return reiser4_do_readpage_extent( ++ ext_by_ext_coord(uf_coord), ++ uf_coord->extension.extent.pos_in_unit, page); ++} ++ ++/** ++ * get_block_address_extent ++ * @coord: ++ * @block: ++ * @result: ++ * ++ * ++ */ ++int get_block_address_extent(const coord_t *coord, sector_t block, ++ sector_t *result) ++{ ++ reiser4_extent *ext; ++ ++ if (!coord_is_existing_unit(coord)) ++ return RETERR(-EINVAL); ++ ++ ext = extent_by_coord(coord); ++ ++ if (state_of_extent(ext) != ALLOCATED_EXTENT) ++ /* FIXME: bad things may happen if it is unallocated extent */ ++ *result = 0; ++ else { ++ reiser4_key key; ++ ++ unit_key_by_coord(coord, &key); ++ assert("vs-1645", ++ block >= get_key_offset(&key) >> current_blocksize_bits); ++ assert("vs-1646", ++ block < ++ (get_key_offset(&key) >> current_blocksize_bits) + ++ extent_get_width(ext)); ++ *result = ++ extent_get_start(ext) + (block - ++ (get_key_offset(&key) >> ++ current_blocksize_bits)); ++ } ++ return 0; ++} ++ ++/* ++ plugin->u.item.s.file.append_key ++ key of first byte which is the next to last byte by addressed by this extent ++*/ ++reiser4_key *append_key_extent(const coord_t * coord, reiser4_key * key) ++{ ++ item_key_by_coord(coord, key); ++ set_key_offset(key, ++ get_key_offset(key) + reiser4_extent_size(coord, ++ nr_units_extent ++ (coord))); ++ ++ assert("vs-610", get_key_offset(key) ++ && (get_key_offset(key) & (current_blocksize - 1)) == 0); ++ return key; ++} ++ ++/* plugin->u.item.s.file.init_coord_extension */ ++void init_coord_extension_extent(uf_coord_t * uf_coord, loff_t lookuped) ++{ ++ coord_t *coord; ++ extent_coord_extension_t *ext_coord; ++ reiser4_key key; ++ loff_t offset; ++ ++ assert("vs-1295", uf_coord->valid == 0); ++ ++ coord = &uf_coord->coord; ++ assert("vs-1288", coord_is_iplug_set(coord)); ++ assert("vs-1327", znode_is_loaded(coord->node)); ++ ++ if (coord->between != AFTER_UNIT && coord->between != AT_UNIT) ++ return; ++ ++ ext_coord = &uf_coord->extension.extent; ++ ext_coord->nr_units = nr_units_extent(coord); ++ ext_coord->ext_offset = ++ (char *)extent_by_coord(coord) - zdata(coord->node); ++ ext_coord->width = extent_get_width(extent_by_coord(coord)); ++ ON_DEBUG(ext_coord->extent = *extent_by_coord(coord)); ++ uf_coord->valid = 1; ++ ++ /* pos_in_unit is the only uninitialized field in extended coord */ ++ if (coord->between == AFTER_UNIT) { ++ assert("vs-1330", ++ coord->unit_pos == nr_units_extent(coord) - 1); ++ ++ ext_coord->pos_in_unit = ext_coord->width - 1; ++ } else { ++ /* AT_UNIT */ ++ unit_key_by_coord(coord, &key); ++ offset = get_key_offset(&key); ++ ++ assert("vs-1328", offset <= lookuped); ++ assert("vs-1329", ++ lookuped < ++ offset + ext_coord->width * current_blocksize); ++ ext_coord->pos_in_unit = ++ ((lookuped - offset) >> current_blocksize_bits); ++ } ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/item/extent_flush_ops.c b/fs/reiser4/plugin/item/extent_flush_ops.c +new file mode 100644 +index 0000000..02dda3e +--- /dev/null ++++ b/fs/reiser4/plugin/item/extent_flush_ops.c +@@ -0,0 +1,1028 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "item.h" ++#include "../../tree.h" ++#include "../../jnode.h" ++#include "../../super.h" ++#include "../../flush.h" ++#include "../../carry.h" ++#include "../object.h" ++ ++#include ++ ++static reiser4_block_nr extent_unit_start(const coord_t * item); ++ ++/* Return either first or last extent (depending on @side) of the item ++ @coord is set to. Set @pos_in_unit either to first or to last block ++ of extent. */ ++static reiser4_extent *extent_utmost_ext(const coord_t * coord, sideof side, ++ reiser4_block_nr * pos_in_unit) ++{ ++ reiser4_extent *ext; ++ ++ if (side == LEFT_SIDE) { ++ /* get first extent of item */ ++ ext = extent_item(coord); ++ *pos_in_unit = 0; ++ } else { ++ /* get last extent of item and last position within it */ ++ assert("vs-363", side == RIGHT_SIDE); ++ ext = extent_item(coord) + coord_last_unit_pos(coord); ++ *pos_in_unit = extent_get_width(ext) - 1; ++ } ++ ++ return ext; ++} ++ ++/* item_plugin->f.utmost_child */ ++/* Return the child. Coord is set to extent item. Find jnode corresponding ++ either to first or to last unformatted node pointed by the item */ ++int utmost_child_extent(const coord_t * coord, sideof side, jnode ** childp) ++{ ++ reiser4_extent *ext; ++ reiser4_block_nr pos_in_unit; ++ ++ ext = extent_utmost_ext(coord, side, &pos_in_unit); ++ ++ switch (state_of_extent(ext)) { ++ case HOLE_EXTENT: ++ *childp = NULL; ++ return 0; ++ case ALLOCATED_EXTENT: ++ case UNALLOCATED_EXTENT: ++ break; ++ default: ++ /* this should never happen */ ++ assert("vs-1417", 0); ++ } ++ ++ { ++ reiser4_key key; ++ reiser4_tree *tree; ++ unsigned long index; ++ ++ if (side == LEFT_SIDE) { ++ /* get key of first byte addressed by the extent */ ++ item_key_by_coord(coord, &key); ++ } else { ++ /* get key of byte which next after last byte addressed by the extent */ ++ append_key_extent(coord, &key); ++ } ++ ++ assert("vs-544", ++ (get_key_offset(&key) >> PAGE_CACHE_SHIFT) < ~0ul); ++ /* index of first or last (depending on @side) page addressed ++ by the extent */ ++ index = ++ (unsigned long)(get_key_offset(&key) >> PAGE_CACHE_SHIFT); ++ if (side == RIGHT_SIDE) ++ index--; ++ ++ tree = coord->node->zjnode.tree; ++ *childp = jlookup(tree, get_key_objectid(&key), index); ++ } ++ ++ return 0; ++} ++ ++/* item_plugin->f.utmost_child_real_block */ ++/* Return the child's block, if allocated. */ ++int ++utmost_child_real_block_extent(const coord_t * coord, sideof side, ++ reiser4_block_nr * block) ++{ ++ reiser4_extent *ext; ++ ++ ext = extent_by_coord(coord); ++ ++ switch (state_of_extent(ext)) { ++ case ALLOCATED_EXTENT: ++ *block = extent_get_start(ext); ++ if (side == RIGHT_SIDE) ++ *block += extent_get_width(ext) - 1; ++ break; ++ case HOLE_EXTENT: ++ case UNALLOCATED_EXTENT: ++ *block = 0; ++ break; ++ default: ++ /* this should never happen */ ++ assert("vs-1418", 0); ++ } ++ ++ return 0; ++} ++ ++/* item_plugin->f.scan */ ++/* Performs leftward scanning starting from an unformatted node and its parent coordinate. ++ This scan continues, advancing the parent coordinate, until either it encounters a ++ formatted child or it finishes scanning this node. ++ ++ If unallocated, the entire extent must be dirty and in the same atom. (Actually, I'm ++ not sure this is last property (same atom) is enforced, but it should be the case since ++ one atom must write the parent and the others must read the parent, thus fusing?). In ++ any case, the code below asserts this case for unallocated extents. Unallocated ++ extents are thus optimized because we can skip to the endpoint when scanning. ++ ++ It returns control to reiser4_scan_extent, handles these terminating conditions, ++ e.g., by loading the next twig. ++*/ ++int reiser4_scan_extent(flush_scan * scan) ++{ ++ coord_t coord; ++ jnode *neighbor; ++ unsigned long scan_index, unit_index, unit_width, scan_max, scan_dist; ++ reiser4_block_nr unit_start; ++ __u64 oid; ++ reiser4_key key; ++ int ret = 0, allocated, incr; ++ reiser4_tree *tree; ++ ++ if (!JF_ISSET(scan->node, JNODE_DIRTY)) { ++ scan->stop = 1; ++ return 0; /* Race with truncate, this node is already ++ * truncated. */ ++ } ++ ++ coord_dup(&coord, &scan->parent_coord); ++ ++ assert("jmacd-1404", !reiser4_scan_finished(scan)); ++ assert("jmacd-1405", jnode_get_level(scan->node) == LEAF_LEVEL); ++ assert("jmacd-1406", jnode_is_unformatted(scan->node)); ++ ++ /* The scan_index variable corresponds to the current page index of the ++ unformatted block scan position. */ ++ scan_index = index_jnode(scan->node); ++ ++ assert("jmacd-7889", item_is_extent(&coord)); ++ ++ repeat: ++ /* objectid of file */ ++ oid = get_key_objectid(item_key_by_coord(&coord, &key)); ++ ++ allocated = !extent_is_unallocated(&coord); ++ /* Get the values of this extent unit: */ ++ unit_index = extent_unit_index(&coord); ++ unit_width = extent_unit_width(&coord); ++ unit_start = extent_unit_start(&coord); ++ ++ assert("jmacd-7187", unit_width > 0); ++ assert("jmacd-7188", scan_index >= unit_index); ++ assert("jmacd-7189", scan_index <= unit_index + unit_width - 1); ++ ++ /* Depending on the scan direction, we set different maximum values for scan_index ++ (scan_max) and the number of nodes that would be passed if the scan goes the ++ entire way (scan_dist). Incr is an integer reflecting the incremental ++ direction of scan_index. */ ++ if (reiser4_scanning_left(scan)) { ++ scan_max = unit_index; ++ scan_dist = scan_index - unit_index; ++ incr = -1; ++ } else { ++ scan_max = unit_index + unit_width - 1; ++ scan_dist = scan_max - unit_index; ++ incr = +1; ++ } ++ ++ tree = coord.node->zjnode.tree; ++ ++ /* If the extent is allocated we have to check each of its blocks. If the extent ++ is unallocated we can skip to the scan_max. */ ++ if (allocated) { ++ do { ++ neighbor = jlookup(tree, oid, scan_index); ++ if (neighbor == NULL) ++ goto stop_same_parent; ++ ++ if (scan->node != neighbor ++ && !reiser4_scan_goto(scan, neighbor)) { ++ /* @neighbor was jput() by reiser4_scan_goto */ ++ goto stop_same_parent; ++ } ++ ++ ret = scan_set_current(scan, neighbor, 1, &coord); ++ if (ret != 0) { ++ goto exit; ++ } ++ ++ /* reference to @neighbor is stored in @scan, no need ++ to jput(). */ ++ scan_index += incr; ++ ++ } while (incr + scan_max != scan_index); ++ ++ } else { ++ /* Optimized case for unallocated extents, skip to the end. */ ++ neighbor = jlookup(tree, oid, scan_max /*index */ ); ++ if (neighbor == NULL) { ++ /* Race with truncate */ ++ scan->stop = 1; ++ ret = 0; ++ goto exit; ++ } ++ ++ assert("zam-1043", ++ reiser4_blocknr_is_fake(jnode_get_block(neighbor))); ++ ++ ret = scan_set_current(scan, neighbor, scan_dist, &coord); ++ if (ret != 0) { ++ goto exit; ++ } ++ } ++ ++ if (coord_sideof_unit(&coord, scan->direction) == 0 ++ && item_is_extent(&coord)) { ++ /* Continue as long as there are more extent units. */ ++ ++ scan_index = ++ extent_unit_index(&coord) + ++ (reiser4_scanning_left(scan) ? ++ extent_unit_width(&coord) - 1 : 0); ++ goto repeat; ++ } ++ ++ if (0) { ++ stop_same_parent: ++ ++ /* If we are scanning left and we stop in the middle of an allocated ++ extent, we know the preceder immediately.. */ ++ /* middle of extent is (scan_index - unit_index) != 0. */ ++ if (reiser4_scanning_left(scan) && ++ (scan_index - unit_index) != 0) { ++ /* FIXME(B): Someone should step-through and verify that this preceder ++ calculation is indeed correct. */ ++ /* @unit_start is starting block (number) of extent ++ unit. Flush stopped at the @scan_index block from ++ the beginning of the file, which is (scan_index - ++ unit_index) block within extent. ++ */ ++ if (unit_start) { ++ /* skip preceder update when we are at hole */ ++ scan->preceder_blk = ++ unit_start + scan_index - unit_index; ++ check_preceder(scan->preceder_blk); ++ } ++ } ++ ++ /* In this case, we leave coord set to the parent of scan->node. */ ++ scan->stop = 1; ++ ++ } else { ++ /* In this case, we are still scanning, coord is set to the next item which is ++ either off-the-end of the node or not an extent. */ ++ assert("jmacd-8912", scan->stop == 0); ++ assert("jmacd-7812", ++ (coord_is_after_sideof_unit(&coord, scan->direction) ++ || !item_is_extent(&coord))); ++ } ++ ++ ret = 0; ++ exit: ++ return ret; ++} ++ ++/* ask block allocator for some blocks */ ++static void extent_allocate_blocks(reiser4_blocknr_hint *preceder, ++ reiser4_block_nr wanted_count, ++ reiser4_block_nr *first_allocated, ++ reiser4_block_nr *allocated, ++ block_stage_t block_stage) ++{ ++ *allocated = wanted_count; ++ preceder->max_dist = 0; /* scan whole disk, if needed */ ++ ++ /* that number of blocks (wanted_count) is either in UNALLOCATED or in GRABBED */ ++ preceder->block_stage = block_stage; ++ ++ /* FIXME: we do not handle errors here now */ ++ check_me("vs-420", ++ reiser4_alloc_blocks(preceder, first_allocated, allocated, ++ BA_PERMANENT) == 0); ++ /* update flush_pos's preceder to last allocated block number */ ++ preceder->blk = *first_allocated + *allocated - 1; ++} ++ ++/* when on flush time unallocated extent is to be replaced with allocated one it may happen that one unallocated extent ++ will have to be replaced with set of allocated extents. In this case insert_into_item will be called which may have ++ to add new nodes into tree. Space for that is taken from inviolable reserve (5%). */ ++static reiser4_block_nr reserve_replace(void) ++{ ++ reiser4_block_nr grabbed, needed; ++ ++ grabbed = get_current_context()->grabbed_blocks; ++ needed = estimate_one_insert_into_item(current_tree); ++ check_me("vpf-340", !reiser4_grab_space_force(needed, BA_RESERVED)); ++ return grabbed; ++} ++ ++static void free_replace_reserved(reiser4_block_nr grabbed) ++{ ++ reiser4_context *ctx; ++ ++ ctx = get_current_context(); ++ grabbed2free(ctx, get_super_private(ctx->super), ++ ctx->grabbed_blocks - grabbed); ++} ++ ++/* Block offset of first block addressed by unit */ ++__u64 extent_unit_index(const coord_t * item) ++{ ++ reiser4_key key; ++ ++ assert("vs-648", coord_is_existing_unit(item)); ++ unit_key_by_coord(item, &key); ++ return get_key_offset(&key) >> current_blocksize_bits; ++} ++ ++/* AUDIT shouldn't return value be of reiser4_block_nr type? ++ Josh's answer: who knows? Is a "number of blocks" the same type as "block offset"? */ ++__u64 extent_unit_width(const coord_t * item) ++{ ++ assert("vs-649", coord_is_existing_unit(item)); ++ return width_by_coord(item); ++} ++ ++/* Starting block location of this unit */ ++static reiser4_block_nr extent_unit_start(const coord_t * item) ++{ ++ return extent_get_start(extent_by_coord(item)); ++} ++ ++/** ++ * split_allocated_extent - ++ * @coord: ++ * @pos_in_unit: ++ * ++ * replace allocated extent with two allocated extents ++ */ ++static int split_allocated_extent(coord_t *coord, reiser4_block_nr pos_in_unit) ++{ ++ int result; ++ struct replace_handle *h; ++ reiser4_extent *ext; ++ reiser4_block_nr grabbed; ++ ++ ext = extent_by_coord(coord); ++ assert("vs-1410", state_of_extent(ext) == ALLOCATED_EXTENT); ++ assert("vs-1411", extent_get_width(ext) > pos_in_unit); ++ ++ h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get()); ++ if (h == NULL) ++ return RETERR(-ENOMEM); ++ h->coord = coord; ++ h->lh = znode_lh(coord->node); ++ h->pkey = &h->key; ++ unit_key_by_coord(coord, h->pkey); ++ set_key_offset(h->pkey, ++ (get_key_offset(h->pkey) + ++ pos_in_unit * current_blocksize)); ++ reiser4_set_extent(&h->overwrite, extent_get_start(ext), ++ pos_in_unit); ++ reiser4_set_extent(&h->new_extents[0], ++ extent_get_start(ext) + pos_in_unit, ++ extent_get_width(ext) - pos_in_unit); ++ h->nr_new_extents = 1; ++ h->flags = COPI_DONT_SHIFT_LEFT; ++ h->paste_key = h->key; ++ ++ /* reserve space for extent unit paste, @grabbed is reserved before */ ++ grabbed = reserve_replace(); ++ result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten ++ extent */); ++ /* restore reserved */ ++ free_replace_reserved(grabbed); ++ kfree(h); ++ return result; ++} ++ ++/* replace extent @ext by extent @replace. Try to merge @replace with previous extent of the item (if there is ++ one). Return 1 if it succeeded, 0 - otherwise */ ++static int try_to_merge_with_left(coord_t *coord, reiser4_extent *ext, ++ reiser4_extent *replace) ++{ ++ assert("vs-1415", extent_by_coord(coord) == ext); ++ ++ if (coord->unit_pos == 0 ++ || state_of_extent(ext - 1) != ALLOCATED_EXTENT) ++ /* @ext either does not exist or is not allocated extent */ ++ return 0; ++ if (extent_get_start(ext - 1) + extent_get_width(ext - 1) != ++ extent_get_start(replace)) ++ return 0; ++ ++ /* we can glue, widen previous unit */ ++ extent_set_width(ext - 1, ++ extent_get_width(ext - 1) + extent_get_width(replace)); ++ ++ if (extent_get_width(ext) != extent_get_width(replace)) { ++ /* make current extent narrower */ ++ if (state_of_extent(ext) == ALLOCATED_EXTENT) ++ extent_set_start(ext, ++ extent_get_start(ext) + ++ extent_get_width(replace)); ++ extent_set_width(ext, ++ extent_get_width(ext) - ++ extent_get_width(replace)); ++ } else { ++ /* current extent completely glued with its left neighbor, remove it */ ++ coord_t from, to; ++ ++ coord_dup(&from, coord); ++ from.unit_pos = nr_units_extent(coord) - 1; ++ coord_dup(&to, &from); ++ ++ /* currently cut from extent can cut either from the beginning or from the end. Move place which got ++ freed after unit removal to end of item */ ++ memmove(ext, ext + 1, ++ (from.unit_pos - ++ coord->unit_pos) * sizeof(reiser4_extent)); ++ /* wipe part of item which is going to be cut, so that node_check will not be confused */ ++ cut_node_content(&from, &to, NULL, NULL, NULL); ++ } ++ znode_make_dirty(coord->node); ++ /* move coord back */ ++ coord->unit_pos--; ++ return 1; ++} ++ ++/** ++ * conv_extent - replace extent with 2 ones ++ * @coord: coordinate of extent to be replaced ++ * @replace: extent to overwrite the one @coord is set to ++ * ++ * Overwrites extent @coord is set to and paste one extent unit after ++ * overwritten one if @replace is shorter than initial extent ++ */ ++static int conv_extent(coord_t *coord, reiser4_extent *replace) ++{ ++ int result; ++ struct replace_handle *h; ++ reiser4_extent *ext; ++ reiser4_block_nr start, width, new_width; ++ reiser4_block_nr grabbed; ++ extent_state state; ++ ++ ext = extent_by_coord(coord); ++ state = state_of_extent(ext); ++ start = extent_get_start(ext); ++ width = extent_get_width(ext); ++ new_width = extent_get_width(replace); ++ ++ assert("vs-1458", (state == UNALLOCATED_EXTENT || ++ state == ALLOCATED_EXTENT)); ++ assert("vs-1459", width >= new_width); ++ ++ if (try_to_merge_with_left(coord, ext, replace)) { ++ /* merged @replace with left neighbor. Current unit is either ++ removed or narrowed */ ++ return 0; ++ } ++ ++ if (width == new_width) { ++ /* replace current extent with @replace */ ++ *ext = *replace; ++ znode_make_dirty(coord->node); ++ return 0; ++ } ++ ++ h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get()); ++ if (h == NULL) ++ return RETERR(-ENOMEM); ++ h->coord = coord; ++ h->lh = znode_lh(coord->node); ++ h->pkey = &h->key; ++ unit_key_by_coord(coord, h->pkey); ++ set_key_offset(h->pkey, ++ (get_key_offset(h->pkey) + new_width * current_blocksize)); ++ h->overwrite = *replace; ++ ++ /* replace @ext with @replace and padding extent */ ++ reiser4_set_extent(&h->new_extents[0], ++ (state == ALLOCATED_EXTENT) ? ++ (start + new_width) : ++ UNALLOCATED_EXTENT_START, ++ width - new_width); ++ h->nr_new_extents = 1; ++ h->flags = COPI_DONT_SHIFT_LEFT; ++ h->paste_key = h->key; ++ ++ /* reserve space for extent unit paste, @grabbed is reserved before */ ++ grabbed = reserve_replace(); ++ result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten ++ extent */); ++ ++ /* restore reserved */ ++ free_replace_reserved(grabbed); ++ kfree(h); ++ return result; ++} ++ ++/** ++ * assign_real_blocknrs ++ * @flush_pos: ++ * @oid: objectid of file jnodes to assign block number to belongs to ++ * @index: first jnode on the range ++ * @count: number of jnodes to assign block numbers to ++ * @first: start of allocated block range ++ * ++ * Assigns block numbers to each of @count jnodes. Index of first jnode is ++ * @index. Jnodes get lookuped with jlookup. ++ */ ++static void assign_real_blocknrs(flush_pos_t *flush_pos, oid_t oid, ++ unsigned long index, reiser4_block_nr count, ++ reiser4_block_nr first) ++{ ++ unsigned long i; ++ reiser4_tree *tree; ++ txn_atom *atom; ++ int nr; ++ ++ atom = atom_locked_by_fq(flush_pos->fq); ++ assert("vs-1468", atom); ++ BUG_ON(atom == NULL); ++ ++ nr = 0; ++ tree = current_tree; ++ for (i = 0; i < count; ++i, ++index) { ++ jnode *node; ++ ++ node = jlookup(tree, oid, index); ++ assert("", node != NULL); ++ BUG_ON(node == NULL); ++ ++ spin_lock_jnode(node); ++ assert("", !jnode_is_flushprepped(node)); ++ assert("vs-1475", node->atom == atom); ++ assert("vs-1476", atomic_read(&node->x_count) > 0); ++ ++ JF_CLR(node, JNODE_FLUSH_RESERVED); ++ jnode_set_block(node, &first); ++ unformatted_make_reloc(node, flush_pos->fq); ++ ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node), ++ FQ_LIST, 0)); ++ spin_unlock_jnode(node); ++ first++; ++ ++ atomic_dec(&node->x_count); ++ nr ++; ++ } ++ ++ spin_unlock_atom(atom); ++ return; ++} ++ ++/** ++ * make_node_ovrwr - assign node to overwrite set ++ * @jnodes: overwrite set list head ++ * @node: jnode to belong to overwrite set ++ * ++ * Sets OVRWR jnode state bit and puts @node to the end of list head @jnodes ++ * which is an accumulator for nodes before they get to overwrite set list of ++ * atom. ++ */ ++static void make_node_ovrwr(struct list_head *jnodes, jnode *node) ++{ ++ spin_lock_jnode(node); ++ ++ assert("zam-917", !JF_ISSET(node, JNODE_RELOC)); ++ assert("zam-918", !JF_ISSET(node, JNODE_OVRWR)); ++ ++ JF_SET(node, JNODE_OVRWR); ++ list_move_tail(&node->capture_link, jnodes); ++ ON_DEBUG(count_jnode(node->atom, node, DIRTY_LIST, OVRWR_LIST, 0)); ++ ++ spin_unlock_jnode(node); ++} ++ ++/** ++ * mark_jnodes_overwrite - put bunch of jnodes to overwrite set ++ * @flush_pos: flush position ++ * @oid: objectid of file jnodes belong to ++ * @index: starting index ++ * @width: extent width ++ * ++ * Puts nodes of one extent (file objectid @oid, extent width @width) to atom's ++ * overwrite set. Starting from the one with index @index. If end of slum is ++ * detected (node is not found or flushprepped) - stop iterating and set flush ++ * position's state to POS_INVALID. ++ */ ++static void mark_jnodes_overwrite(flush_pos_t *flush_pos, oid_t oid, ++ unsigned long index, reiser4_block_nr width) ++{ ++ unsigned long i; ++ reiser4_tree *tree; ++ jnode *node; ++ txn_atom *atom; ++ LIST_HEAD(jnodes); ++ ++ tree = current_tree; ++ ++ atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos)); ++ assert("vs-1478", atom); ++ ++ for (i = flush_pos->pos_in_unit; i < width; i++, index++) { ++ node = jlookup(tree, oid, index); ++ if (!node) { ++ flush_pos->state = POS_INVALID; ++ break; ++ } ++ if (jnode_check_flushprepped(node)) { ++ flush_pos->state = POS_INVALID; ++ atomic_dec(&node->x_count); ++ break; ++ } ++ if (node->atom != atom) { ++ flush_pos->state = POS_INVALID; ++ atomic_dec(&node->x_count); ++ break; ++ } ++ make_node_ovrwr(&jnodes, node); ++ atomic_dec(&node->x_count); ++ } ++ ++ list_splice_init(&jnodes, ATOM_OVRWR_LIST(atom)->prev); ++ spin_unlock_atom(atom); ++} ++ ++/** ++ * allocated_extent_slum_size ++ * @flush_pos: ++ * @oid: ++ * @index: ++ * @count: ++ * ++ * ++ */ ++static int allocated_extent_slum_size(flush_pos_t *flush_pos, oid_t oid, ++ unsigned long index, unsigned long count) ++{ ++ unsigned long i; ++ reiser4_tree *tree; ++ txn_atom *atom; ++ int nr; ++ ++ atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos)); ++ assert("vs-1468", atom); ++ ++ nr = 0; ++ tree = current_tree; ++ for (i = 0; i < count; ++i, ++index) { ++ jnode *node; ++ ++ node = jlookup(tree, oid, index); ++ if (!node) ++ break; ++ ++ if (jnode_check_flushprepped(node)) { ++ atomic_dec(&node->x_count); ++ break; ++ } ++ ++ if (node->atom != atom) { ++ /* ++ * this is possible on overwrite: extent_write may ++ * capture several unformatted nodes without capturing ++ * any formatted nodes. ++ */ ++ atomic_dec(&node->x_count); ++ break; ++ } ++ ++ assert("vs-1476", atomic_read(&node->x_count) > 1); ++ atomic_dec(&node->x_count); ++ nr ++; ++ } ++ ++ spin_unlock_atom(atom); ++ return nr; ++} ++ ++/** ++ * alloc_extent ++ * @flush_pos: ++ * ++ * ++ * this is called by handle_pos_on_twig to proceed extent unit flush_pos->coord ++ * is set to. It is to prepare for flushing sequence of not flushprepped nodes ++ * (slum). It supposes that slum starts at flush_pos->pos_in_unit position ++ * within the extent. Slum gets to relocate set if flush_pos->leaf_relocate is ++ * set to 1 and to overwrite set otherwise ++ */ ++int reiser4_alloc_extent(flush_pos_t *flush_pos) ++{ ++ coord_t *coord; ++ reiser4_extent *ext; ++ reiser4_extent replace_ext; ++ oid_t oid; ++ reiser4_block_nr protected; ++ reiser4_block_nr start; ++ __u64 index; ++ __u64 width; ++ extent_state state; ++ int result; ++ reiser4_block_nr first_allocated; ++ __u64 allocated; ++ reiser4_key key; ++ block_stage_t block_stage; ++ ++ assert("vs-1468", flush_pos->state == POS_ON_EPOINT); ++ assert("vs-1469", coord_is_existing_unit(&flush_pos->coord) ++ && item_is_extent(&flush_pos->coord)); ++ ++ coord = &flush_pos->coord; ++ ++ ext = extent_by_coord(coord); ++ state = state_of_extent(ext); ++ if (state == HOLE_EXTENT) { ++ flush_pos->state = POS_INVALID; ++ return 0; ++ } ++ ++ item_key_by_coord(coord, &key); ++ oid = get_key_objectid(&key); ++ index = extent_unit_index(coord) + flush_pos->pos_in_unit; ++ start = extent_get_start(ext); ++ width = extent_get_width(ext); ++ ++ assert("vs-1457", width > flush_pos->pos_in_unit); ++ ++ if (flush_pos->leaf_relocate || state == UNALLOCATED_EXTENT) { ++ /* relocate */ ++ if (flush_pos->pos_in_unit) { ++ /* split extent unit into two */ ++ result = ++ split_allocated_extent(coord, ++ flush_pos->pos_in_unit); ++ flush_pos->pos_in_unit = 0; ++ return result; ++ } ++ ++ /* limit number of nodes to allocate */ ++ if (flush_pos->nr_to_write < width) ++ width = flush_pos->nr_to_write; ++ ++ if (state == ALLOCATED_EXTENT) { ++ /* ++ * all protected nodes are not flushprepped, therefore ++ * they are counted as flush_reserved ++ */ ++ block_stage = BLOCK_FLUSH_RESERVED; ++ protected = allocated_extent_slum_size(flush_pos, oid, ++ index, width); ++ if (protected == 0) { ++ flush_pos->state = POS_INVALID; ++ flush_pos->pos_in_unit = 0; ++ return 0; ++ } ++ } else { ++ block_stage = BLOCK_UNALLOCATED; ++ protected = width; ++ } ++ ++ /* ++ * look at previous unit if possible. If it is allocated, make ++ * preceder more precise ++ */ ++ if (coord->unit_pos && ++ (state_of_extent(ext - 1) == ALLOCATED_EXTENT)) ++ reiser4_pos_hint(flush_pos)->blk = ++ extent_get_start(ext - 1) + ++ extent_get_width(ext - 1); ++ ++ /* allocate new block numbers for protected nodes */ ++ extent_allocate_blocks(reiser4_pos_hint(flush_pos), ++ protected, ++ &first_allocated, &allocated, ++ block_stage); ++ ++ if (state == ALLOCATED_EXTENT) ++ /* ++ * on relocating - free nodes which are going to be ++ * relocated ++ */ ++ reiser4_dealloc_blocks(&start, &allocated, ++ BLOCK_ALLOCATED, BA_DEFER); ++ ++ /* assign new block numbers to protected nodes */ ++ assign_real_blocknrs(flush_pos, oid, index, allocated, first_allocated); ++ ++ /* prepare extent which will replace current one */ ++ reiser4_set_extent(&replace_ext, first_allocated, allocated); ++ ++ /* adjust extent item */ ++ result = conv_extent(coord, &replace_ext); ++ if (result != 0 && result != -ENOMEM) { ++ warning("vs-1461", ++ "Failed to allocate extent. Should not happen\n"); ++ return result; ++ } ++ ++ /* ++ * break flush: we prepared for flushing as many blocks as we ++ * were asked for ++ */ ++ if (flush_pos->nr_to_write == allocated) ++ flush_pos->state = POS_INVALID; ++ } else { ++ /* overwrite */ ++ mark_jnodes_overwrite(flush_pos, oid, index, width); ++ } ++ flush_pos->pos_in_unit = 0; ++ return 0; ++} ++ ++/* if @key is glueable to the item @coord is set to */ ++static int must_insert(const coord_t *coord, const reiser4_key *key) ++{ ++ reiser4_key last; ++ ++ if (item_id_by_coord(coord) == EXTENT_POINTER_ID ++ && keyeq(append_key_extent(coord, &last), key)) ++ return 0; ++ return 1; ++} ++ ++/* copy extent @copy to the end of @node. It may have to either insert new item after the last one, or append last item, ++ or modify last unit of last item to have greater width */ ++static int put_unit_to_end(znode *node, const reiser4_key *key, ++ reiser4_extent *copy_ext) ++{ ++ int result; ++ coord_t coord; ++ cop_insert_flag flags; ++ reiser4_extent *last_ext; ++ reiser4_item_data data; ++ ++ /* set coord after last unit in an item */ ++ coord_init_last_unit(&coord, node); ++ coord.between = AFTER_UNIT; ++ ++ flags = ++ COPI_DONT_SHIFT_LEFT | COPI_DONT_SHIFT_RIGHT | COPI_DONT_ALLOCATE; ++ if (must_insert(&coord, key)) { ++ result = ++ insert_by_coord(&coord, init_new_extent(&data, copy_ext, 1), ++ key, NULL /*lh */ , flags); ++ ++ } else { ++ /* try to glue with last unit */ ++ last_ext = extent_by_coord(&coord); ++ if (state_of_extent(last_ext) && ++ extent_get_start(last_ext) + extent_get_width(last_ext) == ++ extent_get_start(copy_ext)) { ++ /* widen last unit of node */ ++ extent_set_width(last_ext, ++ extent_get_width(last_ext) + ++ extent_get_width(copy_ext)); ++ znode_make_dirty(node); ++ return 0; ++ } ++ ++ /* FIXME: put an assertion here that we can not merge last unit in @node and new unit */ ++ result = ++ insert_into_item(&coord, NULL /*lh */ , key, ++ init_new_extent(&data, copy_ext, 1), ++ flags); ++ } ++ ++ assert("vs-438", result == 0 || result == -E_NODE_FULL); ++ return result; ++} ++ ++/* @coord is set to extent unit */ ++squeeze_result squalloc_extent(znode *left, const coord_t *coord, ++ flush_pos_t *flush_pos, ++ reiser4_key *stop_key) ++{ ++ reiser4_extent *ext; ++ __u64 index; ++ __u64 width; ++ reiser4_block_nr start; ++ extent_state state; ++ oid_t oid; ++ reiser4_block_nr first_allocated; ++ __u64 allocated; ++ __u64 protected; ++ reiser4_extent copy_extent; ++ reiser4_key key; ++ int result; ++ block_stage_t block_stage; ++ ++ assert("vs-1457", flush_pos->pos_in_unit == 0); ++ assert("vs-1467", coord_is_leftmost_unit(coord)); ++ assert("vs-1467", item_is_extent(coord)); ++ ++ ext = extent_by_coord(coord); ++ index = extent_unit_index(coord); ++ start = extent_get_start(ext); ++ width = extent_get_width(ext); ++ state = state_of_extent(ext); ++ unit_key_by_coord(coord, &key); ++ oid = get_key_objectid(&key); ++ ++ if ((flush_pos->leaf_relocate && state == ALLOCATED_EXTENT) || ++ (state == UNALLOCATED_EXTENT)) { ++ /* relocate */ ++ if (state == ALLOCATED_EXTENT) { ++ /* all protected nodes are not flushprepped, therefore ++ * they are counted as flush_reserved */ ++ block_stage = BLOCK_FLUSH_RESERVED; ++ protected = allocated_extent_slum_size(flush_pos, oid, ++ index, width); ++ if (protected == 0) { ++ flush_pos->state = POS_INVALID; ++ flush_pos->pos_in_unit = 0; ++ return 0; ++ } ++ } else { ++ block_stage = BLOCK_UNALLOCATED; ++ protected = width; ++ } ++ ++ /* ++ * look at previous unit if possible. If it is allocated, make ++ * preceder more precise ++ */ ++ if (coord->unit_pos && ++ (state_of_extent(ext - 1) == ALLOCATED_EXTENT)) ++ reiser4_pos_hint(flush_pos)->blk = ++ extent_get_start(ext - 1) + ++ extent_get_width(ext - 1); ++ ++ /* allocate new block numbers for protected nodes */ ++ extent_allocate_blocks(reiser4_pos_hint(flush_pos), ++ protected, ++ &first_allocated, &allocated, ++ block_stage); ++ ++ /* prepare extent which will be copied to left */ ++ reiser4_set_extent(©_extent, first_allocated, allocated); ++ ++ result = put_unit_to_end(left, &key, ©_extent); ++ if (result == -E_NODE_FULL) { ++ int target_block_stage; ++ ++ /* free blocks which were just allocated */ ++ target_block_stage = ++ (state == ++ ALLOCATED_EXTENT) ? BLOCK_FLUSH_RESERVED : ++ BLOCK_UNALLOCATED; ++ reiser4_dealloc_blocks(&first_allocated, &allocated, ++ target_block_stage, ++ BA_PERMANENT); ++ ++ /* rewind the preceder. */ ++ flush_pos->preceder.blk = first_allocated; ++ check_preceder(flush_pos->preceder.blk); ++ ++ return SQUEEZE_TARGET_FULL; ++ } ++ ++ if (state == ALLOCATED_EXTENT) { ++ /* free nodes which were relocated */ ++ reiser4_dealloc_blocks(&start, &allocated, ++ BLOCK_ALLOCATED, BA_DEFER); ++ } ++ ++ /* assign new block numbers to protected nodes */ ++ assign_real_blocknrs(flush_pos, oid, index, allocated, ++ first_allocated); ++ ++ set_key_offset(&key, ++ get_key_offset(&key) + ++ (allocated << current_blocksize_bits)); ++ } else { ++ /* ++ * overwrite: try to copy unit as it is to left neighbor and ++ * make all first not flushprepped nodes overwrite nodes ++ */ ++ reiser4_set_extent(©_extent, start, width); ++ result = put_unit_to_end(left, &key, ©_extent); ++ if (result == -E_NODE_FULL) ++ return SQUEEZE_TARGET_FULL; ++ ++ if (state != HOLE_EXTENT) ++ mark_jnodes_overwrite(flush_pos, oid, index, width); ++ set_key_offset(&key, ++ get_key_offset(&key) + ++ (width << current_blocksize_bits)); ++ } ++ *stop_key = key; ++ return SQUEEZE_CONTINUE; ++} ++ ++int key_by_offset_extent(struct inode *inode, loff_t off, reiser4_key * key) ++{ ++ return key_by_inode_and_offset_common(inode, off, key); ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/item/extent_item_ops.c b/fs/reiser4/plugin/item/extent_item_ops.c +new file mode 100644 +index 0000000..53ba8e7 +--- /dev/null ++++ b/fs/reiser4/plugin/item/extent_item_ops.c +@@ -0,0 +1,889 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "item.h" ++#include "../../inode.h" ++#include "../../tree_walk.h" /* check_sibling_list() */ ++#include "../../page_cache.h" ++#include "../../carry.h" ++ ++#include ++ ++/* item_plugin->b.max_key_inside */ ++reiser4_key *max_key_inside_extent(const coord_t * coord, reiser4_key * key) ++{ ++ item_key_by_coord(coord, key); ++ set_key_offset(key, get_key_offset(reiser4_max_key())); ++ return key; ++} ++ ++/* item_plugin->b.can_contain_key ++ this checks whether @key of @data is matching to position set by @coord */ ++int ++can_contain_key_extent(const coord_t * coord, const reiser4_key * key, ++ const reiser4_item_data * data) ++{ ++ reiser4_key item_key; ++ ++ if (item_plugin_by_coord(coord) != data->iplug) ++ return 0; ++ ++ item_key_by_coord(coord, &item_key); ++ if (get_key_locality(key) != get_key_locality(&item_key) || ++ get_key_objectid(key) != get_key_objectid(&item_key) || ++ get_key_ordering(key) != get_key_ordering(&item_key)) ++ return 0; ++ ++ return 1; ++} ++ ++/* item_plugin->b.mergeable ++ first item is of extent type */ ++/* Audited by: green(2002.06.13) */ ++int mergeable_extent(const coord_t * p1, const coord_t * p2) ++{ ++ reiser4_key key1, key2; ++ ++ assert("vs-299", item_id_by_coord(p1) == EXTENT_POINTER_ID); ++ /* FIXME-VS: Which is it? Assert or return 0 */ ++ if (item_id_by_coord(p2) != EXTENT_POINTER_ID) { ++ return 0; ++ } ++ ++ item_key_by_coord(p1, &key1); ++ item_key_by_coord(p2, &key2); ++ if (get_key_locality(&key1) != get_key_locality(&key2) || ++ get_key_objectid(&key1) != get_key_objectid(&key2) || ++ get_key_ordering(&key1) != get_key_ordering(&key2) || ++ get_key_type(&key1) != get_key_type(&key2)) ++ return 0; ++ if (get_key_offset(&key1) + ++ reiser4_extent_size(p1, nr_units_extent(p1)) != ++ get_key_offset(&key2)) ++ return 0; ++ return 1; ++} ++ ++/* item_plugin->b.nr_units */ ++pos_in_node_t nr_units_extent(const coord_t * coord) ++{ ++ /* length of extent item has to be multiple of extent size */ ++ assert("vs-1424", ++ (item_length_by_coord(coord) % sizeof(reiser4_extent)) == 0); ++ return item_length_by_coord(coord) / sizeof(reiser4_extent); ++} ++ ++/* item_plugin->b.lookup */ ++lookup_result ++lookup_extent(const reiser4_key * key, lookup_bias bias UNUSED_ARG, ++ coord_t * coord) ++{ /* znode and item_pos are ++ set to an extent item to ++ look through */ ++ reiser4_key item_key; ++ reiser4_block_nr lookuped, offset; ++ unsigned i, nr_units; ++ reiser4_extent *ext; ++ unsigned blocksize; ++ unsigned char blocksize_bits; ++ ++ item_key_by_coord(coord, &item_key); ++ offset = get_key_offset(&item_key); ++ ++ /* key we are looking for must be greater than key of item @coord */ ++ assert("vs-414", keygt(key, &item_key)); ++ ++ assert("umka-99945", ++ !keygt(key, max_key_inside_extent(coord, &item_key))); ++ ++ ext = extent_item(coord); ++ assert("vs-1350", (char *)ext == (zdata(coord->node) + coord->offset)); ++ ++ blocksize = current_blocksize; ++ blocksize_bits = current_blocksize_bits; ++ ++ /* offset we are looking for */ ++ lookuped = get_key_offset(key); ++ ++ nr_units = nr_units_extent(coord); ++ /* go through all extents until the one which address given offset */ ++ for (i = 0; i < nr_units; i++, ext++) { ++ offset += (extent_get_width(ext) << blocksize_bits); ++ if (offset > lookuped) { ++ /* desired byte is somewhere in this extent */ ++ coord->unit_pos = i; ++ coord->between = AT_UNIT; ++ return CBK_COORD_FOUND; ++ } ++ } ++ ++ /* set coord after last unit */ ++ coord->unit_pos = nr_units - 1; ++ coord->between = AFTER_UNIT; ++ return CBK_COORD_FOUND; ++} ++ ++/* item_plugin->b.paste ++ item @coord is set to has been appended with @data->length of free ++ space. data->data contains data to be pasted into the item in position ++ @coord->in_item.unit_pos. It must fit into that free space. ++ @coord must be set between units. ++*/ ++int ++paste_extent(coord_t * coord, reiser4_item_data * data, ++ carry_plugin_info * info UNUSED_ARG) ++{ ++ unsigned old_nr_units; ++ reiser4_extent *ext; ++ int item_length; ++ ++ ext = extent_item(coord); ++ item_length = item_length_by_coord(coord); ++ old_nr_units = (item_length - data->length) / sizeof(reiser4_extent); ++ ++ /* this is also used to copy extent into newly created item, so ++ old_nr_units could be 0 */ ++ assert("vs-260", item_length >= data->length); ++ ++ /* make sure that coord is set properly */ ++ assert("vs-35", ++ ((!coord_is_existing_unit(coord)) ++ || (!old_nr_units && !coord->unit_pos))); ++ ++ /* first unit to be moved */ ++ switch (coord->between) { ++ case AFTER_UNIT: ++ coord->unit_pos++; ++ case BEFORE_UNIT: ++ coord->between = AT_UNIT; ++ break; ++ case AT_UNIT: ++ assert("vs-331", !old_nr_units && !coord->unit_pos); ++ break; ++ default: ++ impossible("vs-330", "coord is set improperly"); ++ } ++ ++ /* prepare space for new units */ ++ memmove(ext + coord->unit_pos + data->length / sizeof(reiser4_extent), ++ ext + coord->unit_pos, ++ (old_nr_units - coord->unit_pos) * sizeof(reiser4_extent)); ++ ++ /* copy new data from kernel space */ ++ assert("vs-556", data->user == 0); ++ memcpy(ext + coord->unit_pos, data->data, (unsigned)data->length); ++ ++ /* after paste @coord is set to first of pasted units */ ++ assert("vs-332", coord_is_existing_unit(coord)); ++ assert("vs-333", ++ !memcmp(data->data, extent_by_coord(coord), ++ (unsigned)data->length)); ++ return 0; ++} ++ ++/* item_plugin->b.can_shift */ ++int ++can_shift_extent(unsigned free_space, coord_t * source, ++ znode * target UNUSED_ARG, shift_direction pend UNUSED_ARG, ++ unsigned *size, unsigned want) ++{ ++ *size = item_length_by_coord(source); ++ if (*size > free_space) ++ /* never split a unit of extent item */ ++ *size = free_space - free_space % sizeof(reiser4_extent); ++ ++ /* we can shift *size bytes, calculate how many do we want to shift */ ++ if (*size > want * sizeof(reiser4_extent)) ++ *size = want * sizeof(reiser4_extent); ++ ++ if (*size % sizeof(reiser4_extent) != 0) ++ impossible("vs-119", "Wrong extent size: %i %zd", *size, ++ sizeof(reiser4_extent)); ++ return *size / sizeof(reiser4_extent); ++ ++} ++ ++/* item_plugin->b.copy_units */ ++void ++copy_units_extent(coord_t * target, coord_t * source, ++ unsigned from, unsigned count, ++ shift_direction where_is_free_space, unsigned free_space) ++{ ++ char *from_ext, *to_ext; ++ ++ assert("vs-217", free_space == count * sizeof(reiser4_extent)); ++ ++ from_ext = item_body_by_coord(source); ++ to_ext = item_body_by_coord(target); ++ ++ if (where_is_free_space == SHIFT_LEFT) { ++ assert("vs-215", from == 0); ++ ++ /* At this moment, item length was already updated in the item ++ header by shifting code, hence nr_units_extent() will ++ return "new" number of units---one we obtain after copying ++ units. ++ */ ++ to_ext += ++ (nr_units_extent(target) - count) * sizeof(reiser4_extent); ++ } else { ++ reiser4_key key; ++ coord_t coord; ++ ++ assert("vs-216", ++ from + count == coord_last_unit_pos(source) + 1); ++ ++ from_ext += item_length_by_coord(source) - free_space; ++ ++ /* new units are inserted before first unit in an item, ++ therefore, we have to update item key */ ++ coord = *source; ++ coord.unit_pos = from; ++ unit_key_extent(&coord, &key); ++ ++ node_plugin_by_node(target->node)->update_item_key(target, &key, ++ NULL /*info */); ++ } ++ ++ memcpy(to_ext, from_ext, free_space); ++} ++ ++/* item_plugin->b.create_hook ++ @arg is znode of leaf node for which we need to update right delimiting key */ ++int create_hook_extent(const coord_t * coord, void *arg) ++{ ++ coord_t *child_coord; ++ znode *node; ++ reiser4_key key; ++ reiser4_tree *tree; ++ ++ if (!arg) ++ return 0; ++ ++ child_coord = arg; ++ tree = znode_get_tree(coord->node); ++ ++ assert("nikita-3246", znode_get_level(child_coord->node) == LEAF_LEVEL); ++ ++ write_lock_tree(tree); ++ write_lock_dk(tree); ++ /* find a node on the left level for which right delimiting key has to ++ be updated */ ++ if (coord_wrt(child_coord) == COORD_ON_THE_LEFT) { ++ assert("vs-411", znode_is_left_connected(child_coord->node)); ++ node = child_coord->node->left; ++ } else { ++ assert("vs-412", coord_wrt(child_coord) == COORD_ON_THE_RIGHT); ++ node = child_coord->node; ++ assert("nikita-3314", node != NULL); ++ } ++ ++ if (node != NULL) { ++ znode_set_rd_key(node, item_key_by_coord(coord, &key)); ++ ++ assert("nikita-3282", check_sibling_list(node)); ++ /* break sibling links */ ++ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && node->right) { ++ ON_DEBUG(node->right->left_version = ++ atomic_inc_return(&delim_key_version); ++ node->right_version = ++ atomic_inc_return(&delim_key_version);); ++ ++ node->right->left = NULL; ++ node->right = NULL; ++ } ++ } ++ write_unlock_dk(tree); ++ write_unlock_tree(tree); ++ return 0; ++} ++ ++#define ITEM_TAIL_KILLED 0 ++#define ITEM_HEAD_KILLED 1 ++#define ITEM_KILLED 2 ++ ++/* item_plugin->b.kill_hook ++ this is called when @count units starting from @from-th one are going to be removed ++ */ ++int ++kill_hook_extent(const coord_t * coord, pos_in_node_t from, pos_in_node_t count, ++ struct carry_kill_data *kdata) ++{ ++ reiser4_extent *ext; ++ reiser4_block_nr start, length; ++ const reiser4_key *pfrom_key, *pto_key; ++ struct inode *inode; ++ reiser4_tree *tree; ++ pgoff_t from_off, to_off, offset, skip; ++ int retval; ++ ++ /* these are located in memory kmalloc-ed by kill_node_content */ ++ reiser4_key *min_item_key, *max_item_key, *from_key, *to_key, *key; ++ coord_t *dup, *next; ++ ++ assert("zam-811", znode_is_write_locked(coord->node)); ++ assert("nikita-3315", kdata != NULL); ++ assert("vs-34", kdata->buf != NULL); ++ ++ /* map structures to kdata->buf */ ++ min_item_key = (reiser4_key *) (kdata->buf); ++ max_item_key = min_item_key + 1; ++ from_key = max_item_key + 1; ++ to_key = from_key + 1; ++ key = to_key + 1; ++ dup = (coord_t *) (key + 1); ++ next = dup + 1; ++ ++ item_key_by_coord(coord, min_item_key); ++ max_item_key_by_coord(coord, max_item_key); ++ ++ if (kdata->params.from_key) { ++ pfrom_key = kdata->params.from_key; ++ pto_key = kdata->params.to_key; ++ } else { ++ assert("vs-1549", from == coord->unit_pos); ++ unit_key_by_coord(coord, from_key); ++ pfrom_key = from_key; ++ ++ coord_dup(dup, coord); ++ dup->unit_pos = from + count - 1; ++ max_unit_key_by_coord(dup, to_key); ++ pto_key = to_key; ++ } ++ ++ if (!keylt(pto_key, max_item_key)) { ++ if (!keygt(pfrom_key, min_item_key)) { ++ znode *left, *right; ++ ++ /* item is to be removed completely */ ++ assert("nikita-3316", kdata->left != NULL ++ && kdata->right != NULL); ++ ++ left = kdata->left->node; ++ right = kdata->right->node; ++ ++ tree = current_tree; ++ /* we have to do two things: ++ * ++ * 1. link left and right formatted neighbors of ++ * extent being removed, and ++ * ++ * 2. update their delimiting keys. ++ * ++ * atomicity of these operations is protected by ++ * taking dk-lock and tree-lock. ++ */ ++ /* if neighbors of item being removed are znodes - ++ * link them */ ++ write_lock_tree(tree); ++ write_lock_dk(tree); ++ link_left_and_right(left, right); ++ if (left) { ++ /* update right delimiting key of left ++ * neighbor of extent item */ ++ /*coord_t next; ++ reiser4_key key; */ ++ ++ coord_dup(next, coord); ++ ++ if (coord_next_item(next)) ++ *key = *znode_get_rd_key(coord->node); ++ else ++ item_key_by_coord(next, key); ++ znode_set_rd_key(left, key); ++ } ++ write_unlock_dk(tree); ++ write_unlock_tree(tree); ++ ++ from_off = ++ get_key_offset(min_item_key) >> PAGE_CACHE_SHIFT; ++ to_off = ++ (get_key_offset(max_item_key) + ++ 1) >> PAGE_CACHE_SHIFT; ++ retval = ITEM_KILLED; ++ } else { ++ /* tail of item is to be removed */ ++ from_off = ++ (get_key_offset(pfrom_key) + PAGE_CACHE_SIZE - ++ 1) >> PAGE_CACHE_SHIFT; ++ to_off = ++ (get_key_offset(max_item_key) + ++ 1) >> PAGE_CACHE_SHIFT; ++ retval = ITEM_TAIL_KILLED; ++ } ++ } else { ++ /* head of item is to be removed */ ++ assert("vs-1571", keyeq(pfrom_key, min_item_key)); ++ assert("vs-1572", ++ (get_key_offset(pfrom_key) & (PAGE_CACHE_SIZE - 1)) == ++ 0); ++ assert("vs-1573", ++ ((get_key_offset(pto_key) + 1) & (PAGE_CACHE_SIZE - ++ 1)) == 0); ++ ++ if (kdata->left->node) { ++ /* update right delimiting key of left neighbor of extent item */ ++ /*reiser4_key key; */ ++ ++ *key = *pto_key; ++ set_key_offset(key, get_key_offset(pto_key) + 1); ++ ++ write_lock_dk(current_tree); ++ znode_set_rd_key(kdata->left->node, key); ++ write_unlock_dk(current_tree); ++ } ++ ++ from_off = get_key_offset(pfrom_key) >> PAGE_CACHE_SHIFT; ++ to_off = (get_key_offset(pto_key) + 1) >> PAGE_CACHE_SHIFT; ++ retval = ITEM_HEAD_KILLED; ++ } ++ ++ inode = kdata->inode; ++ assert("vs-1545", inode != NULL); ++ if (inode != NULL) ++ /* take care of pages and jnodes corresponding to part of item being killed */ ++ reiser4_invalidate_pages(inode->i_mapping, from_off, ++ to_off - from_off, ++ kdata->params.truncate); ++ ++ ext = extent_item(coord) + from; ++ offset = ++ (get_key_offset(min_item_key) + ++ reiser4_extent_size(coord, from)) >> PAGE_CACHE_SHIFT; ++ ++ assert("vs-1551", from_off >= offset); ++ assert("vs-1552", from_off - offset <= extent_get_width(ext)); ++ skip = from_off - offset; ++ offset = from_off; ++ ++ while (offset < to_off) { ++ length = extent_get_width(ext) - skip; ++ if (state_of_extent(ext) == HOLE_EXTENT) { ++ skip = 0; ++ offset += length; ++ ext++; ++ continue; ++ } ++ ++ if (offset + length > to_off) { ++ length = to_off - offset; ++ } ++ ++ DQUOT_FREE_BLOCK_NODIRTY(inode, length); ++ ++ if (state_of_extent(ext) == UNALLOCATED_EXTENT) { ++ /* some jnodes corresponding to this unallocated extent */ ++ fake_allocated2free(length, 0 /* unformatted */ ); ++ ++ skip = 0; ++ offset += length; ++ ext++; ++ continue; ++ } ++ ++ assert("vs-1218", state_of_extent(ext) == ALLOCATED_EXTENT); ++ ++ if (length != 0) { ++ start = extent_get_start(ext) + skip; ++ ++ /* BA_DEFER bit parameter is turned on because blocks which get freed are not safe to be freed ++ immediately */ ++ reiser4_dealloc_blocks(&start, &length, ++ 0 /* not used */ , ++ BA_DEFER ++ /* unformatted with defer */ ); ++ } ++ skip = 0; ++ offset += length; ++ ext++; ++ } ++ return retval; ++} ++ ++/* item_plugin->b.kill_units */ ++int ++kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ struct carry_kill_data *kdata, reiser4_key * smallest_removed, ++ reiser4_key * new_first) ++{ ++ reiser4_extent *ext; ++ reiser4_key item_key; ++ pos_in_node_t count; ++ reiser4_key from_key, to_key; ++ const reiser4_key *pfrom_key, *pto_key; ++ loff_t off; ++ int result; ++ ++ assert("vs-1541", ++ ((kdata->params.from_key == NULL && kdata->params.to_key == NULL) ++ || (kdata->params.from_key != NULL ++ && kdata->params.to_key != NULL))); ++ ++ if (kdata->params.from_key) { ++ pfrom_key = kdata->params.from_key; ++ pto_key = kdata->params.to_key; ++ } else { ++ coord_t dup; ++ ++ /* calculate key range of kill */ ++ assert("vs-1549", from == coord->unit_pos); ++ unit_key_by_coord(coord, &from_key); ++ pfrom_key = &from_key; ++ ++ coord_dup(&dup, coord); ++ dup.unit_pos = to; ++ max_unit_key_by_coord(&dup, &to_key); ++ pto_key = &to_key; ++ } ++ ++ item_key_by_coord(coord, &item_key); ++ ++#if REISER4_DEBUG ++ { ++ reiser4_key max_item_key; ++ ++ max_item_key_by_coord(coord, &max_item_key); ++ ++ if (new_first) { ++ /* head of item is to be cut */ ++ assert("vs-1542", keyeq(pfrom_key, &item_key)); ++ assert("vs-1538", keylt(pto_key, &max_item_key)); ++ } else { ++ /* tail of item is to be cut */ ++ assert("vs-1540", keygt(pfrom_key, &item_key)); ++ assert("vs-1543", !keylt(pto_key, &max_item_key)); ++ } ++ } ++#endif ++ ++ if (smallest_removed) ++ *smallest_removed = *pfrom_key; ++ ++ if (new_first) { ++ /* item head is cut. Item key will change. This new key is calculated here */ ++ assert("vs-1556", ++ (get_key_offset(pto_key) & (PAGE_CACHE_SIZE - 1)) == ++ (PAGE_CACHE_SIZE - 1)); ++ *new_first = *pto_key; ++ set_key_offset(new_first, get_key_offset(new_first) + 1); ++ } ++ ++ count = to - from + 1; ++ result = kill_hook_extent(coord, from, count, kdata); ++ if (result == ITEM_TAIL_KILLED) { ++ assert("vs-1553", ++ get_key_offset(pfrom_key) >= ++ get_key_offset(&item_key) + ++ reiser4_extent_size(coord, from)); ++ off = ++ get_key_offset(pfrom_key) - ++ (get_key_offset(&item_key) + ++ reiser4_extent_size(coord, from)); ++ if (off) { ++ /* unit @from is to be cut partially. Its width decreases */ ++ ext = extent_item(coord) + from; ++ extent_set_width(ext, ++ (off + PAGE_CACHE_SIZE - ++ 1) >> PAGE_CACHE_SHIFT); ++ count--; ++ } ++ } else { ++ __u64 max_to_offset; ++ __u64 rest; ++ ++ assert("vs-1575", result == ITEM_HEAD_KILLED); ++ assert("", from == 0); ++ assert("", ++ ((get_key_offset(pto_key) + 1) & (PAGE_CACHE_SIZE - ++ 1)) == 0); ++ assert("", ++ get_key_offset(pto_key) + 1 > ++ get_key_offset(&item_key) + ++ reiser4_extent_size(coord, to)); ++ max_to_offset = ++ get_key_offset(&item_key) + ++ reiser4_extent_size(coord, to + 1) - 1; ++ assert("", get_key_offset(pto_key) <= max_to_offset); ++ ++ rest = ++ (max_to_offset - ++ get_key_offset(pto_key)) >> PAGE_CACHE_SHIFT; ++ if (rest) { ++ /* unit @to is to be cut partially */ ++ ext = extent_item(coord) + to; ++ ++ assert("", extent_get_width(ext) > rest); ++ ++ if (state_of_extent(ext) == ALLOCATED_EXTENT) ++ extent_set_start(ext, ++ extent_get_start(ext) + ++ (extent_get_width(ext) - ++ rest)); ++ ++ extent_set_width(ext, rest); ++ count--; ++ } ++ } ++ return count * sizeof(reiser4_extent); ++} ++ ++/* item_plugin->b.cut_units ++ this is too similar to kill_units_extent */ ++int ++cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ struct carry_cut_data *cdata, reiser4_key * smallest_removed, ++ reiser4_key * new_first) ++{ ++ reiser4_extent *ext; ++ reiser4_key item_key; ++ pos_in_node_t count; ++ reiser4_key from_key, to_key; ++ const reiser4_key *pfrom_key, *pto_key; ++ loff_t off; ++ ++ assert("vs-1541", ++ ((cdata->params.from_key == NULL && cdata->params.to_key == NULL) ++ || (cdata->params.from_key != NULL ++ && cdata->params.to_key != NULL))); ++ ++ if (cdata->params.from_key) { ++ pfrom_key = cdata->params.from_key; ++ pto_key = cdata->params.to_key; ++ } else { ++ coord_t dup; ++ ++ /* calculate key range of kill */ ++ coord_dup(&dup, coord); ++ dup.unit_pos = from; ++ unit_key_by_coord(&dup, &from_key); ++ ++ dup.unit_pos = to; ++ max_unit_key_by_coord(&dup, &to_key); ++ ++ pfrom_key = &from_key; ++ pto_key = &to_key; ++ } ++ ++ assert("vs-1555", ++ (get_key_offset(pfrom_key) & (PAGE_CACHE_SIZE - 1)) == 0); ++ assert("vs-1556", ++ (get_key_offset(pto_key) & (PAGE_CACHE_SIZE - 1)) == ++ (PAGE_CACHE_SIZE - 1)); ++ ++ item_key_by_coord(coord, &item_key); ++ ++#if REISER4_DEBUG ++ { ++ reiser4_key max_item_key; ++ ++ assert("vs-1584", ++ get_key_locality(pfrom_key) == ++ get_key_locality(&item_key)); ++ assert("vs-1585", ++ get_key_type(pfrom_key) == get_key_type(&item_key)); ++ assert("vs-1586", ++ get_key_objectid(pfrom_key) == ++ get_key_objectid(&item_key)); ++ assert("vs-1587", ++ get_key_ordering(pfrom_key) == ++ get_key_ordering(&item_key)); ++ ++ max_item_key_by_coord(coord, &max_item_key); ++ ++ if (new_first != NULL) { ++ /* head of item is to be cut */ ++ assert("vs-1542", keyeq(pfrom_key, &item_key)); ++ assert("vs-1538", keylt(pto_key, &max_item_key)); ++ } else { ++ /* tail of item is to be cut */ ++ assert("vs-1540", keygt(pfrom_key, &item_key)); ++ assert("vs-1543", keyeq(pto_key, &max_item_key)); ++ } ++ } ++#endif ++ ++ if (smallest_removed) ++ *smallest_removed = *pfrom_key; ++ ++ if (new_first) { ++ /* item head is cut. Item key will change. This new key is calculated here */ ++ *new_first = *pto_key; ++ set_key_offset(new_first, get_key_offset(new_first) + 1); ++ } ++ ++ count = to - from + 1; ++ ++ assert("vs-1553", ++ get_key_offset(pfrom_key) >= ++ get_key_offset(&item_key) + reiser4_extent_size(coord, from)); ++ off = ++ get_key_offset(pfrom_key) - (get_key_offset(&item_key) + ++ reiser4_extent_size(coord, from)); ++ if (off) { ++ /* tail of unit @from is to be cut partially. Its width decreases */ ++ assert("vs-1582", new_first == NULL); ++ ext = extent_item(coord) + from; ++ extent_set_width(ext, off >> PAGE_CACHE_SHIFT); ++ count--; ++ } ++ ++ assert("vs-1554", ++ get_key_offset(pto_key) <= ++ get_key_offset(&item_key) + ++ reiser4_extent_size(coord, to + 1) - 1); ++ off = ++ (get_key_offset(&item_key) + ++ reiser4_extent_size(coord, to + 1) - 1) - ++ get_key_offset(pto_key); ++ if (off) { ++ /* @to_key is smaller than max key of unit @to. Unit @to will not be removed. It gets start increased ++ and width decreased. */ ++ assert("vs-1583", (off & (PAGE_CACHE_SIZE - 1)) == 0); ++ ext = extent_item(coord) + to; ++ if (state_of_extent(ext) == ALLOCATED_EXTENT) ++ extent_set_start(ext, ++ extent_get_start(ext) + ++ (extent_get_width(ext) - ++ (off >> PAGE_CACHE_SHIFT))); ++ ++ extent_set_width(ext, (off >> PAGE_CACHE_SHIFT)); ++ count--; ++ } ++ return count * sizeof(reiser4_extent); ++} ++ ++/* item_plugin->b.unit_key */ ++reiser4_key *unit_key_extent(const coord_t * coord, reiser4_key * key) ++{ ++ assert("vs-300", coord_is_existing_unit(coord)); ++ ++ item_key_by_coord(coord, key); ++ set_key_offset(key, ++ (get_key_offset(key) + ++ reiser4_extent_size(coord, coord->unit_pos))); ++ ++ return key; ++} ++ ++/* item_plugin->b.max_unit_key */ ++reiser4_key *max_unit_key_extent(const coord_t * coord, reiser4_key * key) ++{ ++ assert("vs-300", coord_is_existing_unit(coord)); ++ ++ item_key_by_coord(coord, key); ++ set_key_offset(key, ++ (get_key_offset(key) + ++ reiser4_extent_size(coord, coord->unit_pos + 1) - 1)); ++ return key; ++} ++ ++/* item_plugin->b.estimate ++ item_plugin->b.item_data_by_flow */ ++ ++#if REISER4_DEBUG ++ ++/* item_plugin->b.check ++ used for debugging, every item should have here the most complete ++ possible check of the consistency of the item that the inventor can ++ construct ++*/ ++int reiser4_check_extent(const coord_t * coord /* coord of item to check */, ++ const char **error /* where to store error message */) ++{ ++ reiser4_extent *ext, *first; ++ unsigned i, j; ++ reiser4_block_nr start, width, blk_cnt; ++ unsigned num_units; ++ reiser4_tree *tree; ++ oid_t oid; ++ reiser4_key key; ++ coord_t scan; ++ ++ assert("vs-933", REISER4_DEBUG); ++ ++ if (znode_get_level(coord->node) != TWIG_LEVEL) { ++ *error = "Extent on the wrong level"; ++ return -1; ++ } ++ if (item_length_by_coord(coord) % sizeof(reiser4_extent) != 0) { ++ *error = "Wrong item size"; ++ return -1; ++ } ++ ext = first = extent_item(coord); ++ blk_cnt = reiser4_block_count(reiser4_get_current_sb()); ++ num_units = coord_num_units(coord); ++ tree = znode_get_tree(coord->node); ++ item_key_by_coord(coord, &key); ++ oid = get_key_objectid(&key); ++ coord_dup(&scan, coord); ++ ++ for (i = 0; i < num_units; ++i, ++ext) { ++ __u64 index; ++ ++ scan.unit_pos = i; ++ index = extent_unit_index(&scan); ++ ++#if 0 ++ /* check that all jnodes are present for the unallocated ++ * extent */ ++ if (state_of_extent(ext) == UNALLOCATED_EXTENT) { ++ for (j = 0; j < extent_get_width(ext); j++) { ++ jnode *node; ++ ++ node = jlookup(tree, oid, index + j); ++ if (node == NULL) { ++ print_coord("scan", &scan, 0); ++ *error = "Jnode missing"; ++ return -1; ++ } ++ jput(node); ++ } ++ } ++#endif ++ ++ start = extent_get_start(ext); ++ if (start < 2) ++ continue; ++ /* extent is allocated one */ ++ width = extent_get_width(ext); ++ if (start >= blk_cnt) { ++ *error = "Start too large"; ++ return -1; ++ } ++ if (start + width > blk_cnt) { ++ *error = "End too large"; ++ return -1; ++ } ++ /* make sure that this extent does not overlap with other ++ allocated extents extents */ ++ for (j = 0; j < i; j++) { ++ if (state_of_extent(first + j) != ALLOCATED_EXTENT) ++ continue; ++ if (! ++ ((extent_get_start(ext) >= ++ extent_get_start(first + j) + ++ extent_get_width(first + j)) ++ || (extent_get_start(ext) + ++ extent_get_width(ext) <= ++ extent_get_start(first + j)))) { ++ *error = "Extent overlaps with others"; ++ return -1; ++ } ++ } ++ ++ } ++ ++ return 0; ++} ++ ++#endif /* REISER4_DEBUG */ ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/internal.c b/fs/reiser4/plugin/item/internal.c +new file mode 100644 +index 0000000..eb79388 +--- /dev/null ++++ b/fs/reiser4/plugin/item/internal.c +@@ -0,0 +1,396 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Implementation of internal-item plugin methods. */ ++ ++#include "../../forward.h" ++#include "../../debug.h" ++#include "../../dformat.h" ++#include "../../key.h" ++#include "../../coord.h" ++#include "internal.h" ++#include "item.h" ++#include "../node/node.h" ++#include "../plugin.h" ++#include "../../jnode.h" ++#include "../../znode.h" ++#include "../../tree_walk.h" ++#include "../../tree_mod.h" ++#include "../../tree.h" ++#include "../../super.h" ++#include "../../block_alloc.h" ++ ++/* see internal.h for explanation */ ++ ++/* plugin->u.item.b.mergeable */ ++int mergeable_internal(const coord_t * p1 UNUSED_ARG /* first item */ , ++ const coord_t * p2 UNUSED_ARG /* second item */ ) ++{ ++ /* internal items are not mergeable */ ++ return 0; ++} ++ ++/* ->lookup() method for internal items */ ++lookup_result lookup_internal(const reiser4_key * key /* key to look up */ , ++ lookup_bias bias UNUSED_ARG /* lookup bias */ , ++ coord_t * coord /* coord of item */ ) ++{ ++ reiser4_key ukey; ++ ++ switch (keycmp(unit_key_by_coord(coord, &ukey), key)) { ++ default: ++ impossible("", "keycmp()?!"); ++ case LESS_THAN: ++ /* FIXME-VS: AFTER_ITEM used to be here. But with new coord ++ item plugin can not be taken using coord set this way */ ++ assert("vs-681", coord->unit_pos == 0); ++ coord->between = AFTER_UNIT; ++ case EQUAL_TO: ++ return CBK_COORD_FOUND; ++ case GREATER_THAN: ++ return CBK_COORD_NOTFOUND; ++ } ++} ++ ++/* return body of internal item at @coord */ ++static internal_item_layout *internal_at(const coord_t * coord /* coord of ++ * item */ ) ++{ ++ assert("nikita-607", coord != NULL); ++ assert("nikita-1650", ++ item_plugin_by_coord(coord) == ++ item_plugin_by_id(NODE_POINTER_ID)); ++ return (internal_item_layout *) item_body_by_coord(coord); ++} ++ ++void reiser4_update_internal(const coord_t * coord, ++ const reiser4_block_nr * blocknr) ++{ ++ internal_item_layout *item = internal_at(coord); ++ assert("nikita-2959", reiser4_blocknr_is_sane(blocknr)); ++ ++ put_unaligned(cpu_to_le64(*blocknr), &item->pointer); ++} ++ ++/* return child block number stored in the internal item at @coord */ ++static reiser4_block_nr pointer_at(const coord_t * coord /* coord of item */ ) ++{ ++ assert("nikita-608", coord != NULL); ++ return le64_to_cpu(get_unaligned(&internal_at(coord)->pointer)); ++} ++ ++/* get znode pointed to by internal @item */ ++static znode *znode_at(const coord_t * item /* coord of item */ , ++ znode * parent /* parent node */ ) ++{ ++ return child_znode(item, parent, 1, 0); ++} ++ ++/* store pointer from internal item into "block". Implementation of ++ ->down_link() method */ ++void down_link_internal(const coord_t * coord /* coord of item */ , ++ const reiser4_key * key UNUSED_ARG /* key to get ++ * pointer for */ , ++ reiser4_block_nr * block /* resulting block number */ ) ++{ ++ ON_DEBUG(reiser4_key item_key); ++ ++ assert("nikita-609", coord != NULL); ++ assert("nikita-611", block != NULL); ++ assert("nikita-612", (key == NULL) || ++ /* twig horrors */ ++ (znode_get_level(coord->node) == TWIG_LEVEL) ++ || keyle(item_key_by_coord(coord, &item_key), key)); ++ ++ *block = pointer_at(coord); ++ assert("nikita-2960", reiser4_blocknr_is_sane(block)); ++} ++ ++/* Get the child's block number, or 0 if the block is unallocated. */ ++int ++utmost_child_real_block_internal(const coord_t * coord, sideof side UNUSED_ARG, ++ reiser4_block_nr * block) ++{ ++ assert("jmacd-2059", coord != NULL); ++ ++ *block = pointer_at(coord); ++ assert("nikita-2961", reiser4_blocknr_is_sane(block)); ++ ++ if (reiser4_blocknr_is_fake(block)) { ++ *block = 0; ++ } ++ ++ return 0; ++} ++ ++/* Return the child. */ ++int ++utmost_child_internal(const coord_t * coord, sideof side UNUSED_ARG, ++ jnode ** childp) ++{ ++ reiser4_block_nr block = pointer_at(coord); ++ znode *child; ++ ++ assert("jmacd-2059", childp != NULL); ++ assert("nikita-2962", reiser4_blocknr_is_sane(&block)); ++ ++ child = zlook(znode_get_tree(coord->node), &block); ++ ++ if (IS_ERR(child)) { ++ return PTR_ERR(child); ++ } ++ ++ *childp = ZJNODE(child); ++ ++ return 0; ++} ++ ++#if REISER4_DEBUG ++ ++static void check_link(znode * left, znode * right) ++{ ++ znode *scan; ++ ++ for (scan = left; scan != right; scan = scan->right) { ++ if (ZF_ISSET(scan, JNODE_RIP)) ++ break; ++ if (znode_is_right_connected(scan) && scan->right != NULL) { ++ if (ZF_ISSET(scan->right, JNODE_RIP)) ++ break; ++ assert("nikita-3285", ++ znode_is_left_connected(scan->right)); ++ assert("nikita-3265", ++ ergo(scan != left, ++ ZF_ISSET(scan, JNODE_HEARD_BANSHEE))); ++ assert("nikita-3284", scan->right->left == scan); ++ } else ++ break; ++ } ++} ++ ++int check__internal(const coord_t * coord, const char **error) ++{ ++ reiser4_block_nr blk; ++ znode *child; ++ coord_t cpy; ++ ++ blk = pointer_at(coord); ++ if (!reiser4_blocknr_is_sane(&blk)) { ++ *error = "Invalid pointer"; ++ return -1; ++ } ++ coord_dup(&cpy, coord); ++ child = znode_at(&cpy, cpy.node); ++ if (child != NULL) { ++ znode *left_child; ++ znode *right_child; ++ ++ left_child = right_child = NULL; ++ ++ assert("nikita-3256", znode_invariant(child)); ++ if (coord_prev_item(&cpy) == 0 && item_is_internal(&cpy)) { ++ left_child = znode_at(&cpy, cpy.node); ++ if (left_child != NULL) { ++ read_lock_tree(znode_get_tree(child)); ++ check_link(left_child, child); ++ read_unlock_tree(znode_get_tree(child)); ++ zput(left_child); ++ } ++ } ++ coord_dup(&cpy, coord); ++ if (coord_next_item(&cpy) == 0 && item_is_internal(&cpy)) { ++ right_child = znode_at(&cpy, cpy.node); ++ if (right_child != NULL) { ++ read_lock_tree(znode_get_tree(child)); ++ check_link(child, right_child); ++ read_unlock_tree(znode_get_tree(child)); ++ zput(right_child); ++ } ++ } ++ zput(child); ++ } ++ return 0; ++} ++ ++#endif /* REISER4_DEBUG */ ++ ++/* return true only if this item really points to "block" */ ++/* Audited by: green(2002.06.14) */ ++int has_pointer_to_internal(const coord_t * coord /* coord of item */ , ++ const reiser4_block_nr * block /* block number to ++ * check */ ) ++{ ++ assert("nikita-613", coord != NULL); ++ assert("nikita-614", block != NULL); ++ ++ return pointer_at(coord) == *block; ++} ++ ++/* hook called by ->create_item() method of node plugin after new internal ++ item was just created. ++ ++ This is point where pointer to new node is inserted into tree. Initialize ++ parent pointer in child znode, insert child into sibling list and slum. ++ ++*/ ++int create_hook_internal(const coord_t * item /* coord of item */ , ++ void *arg /* child's left neighbor, if any */ ) ++{ ++ znode *child; ++ __u64 child_ptr; ++ ++ assert("nikita-1252", item != NULL); ++ assert("nikita-1253", item->node != NULL); ++ assert("nikita-1181", znode_get_level(item->node) > LEAF_LEVEL); ++ assert("nikita-1450", item->unit_pos == 0); ++ ++ /* ++ * preparing to item insertion build_child_ptr_data sets pointer to ++ * data to be inserted to jnode's blocknr which is in cpu byte ++ * order. Node's create_item simply copied those data. As result we ++ * have child pointer in cpu's byte order. Convert content of internal ++ * item to little endian byte order. ++ */ ++ child_ptr = get_unaligned((__u64 *)item_body_by_coord(item)); ++ reiser4_update_internal(item, &child_ptr); ++ ++ child = znode_at(item, item->node); ++ if (child != NULL && !IS_ERR(child)) { ++ znode *left; ++ int result = 0; ++ reiser4_tree *tree; ++ ++ left = arg; ++ tree = znode_get_tree(item->node); ++ write_lock_tree(tree); ++ write_lock_dk(tree); ++ assert("nikita-1400", (child->in_parent.node == NULL) ++ || (znode_above_root(child->in_parent.node))); ++ ++item->node->c_count; ++ coord_to_parent_coord(item, &child->in_parent); ++ sibling_list_insert_nolock(child, left); ++ ++ assert("nikita-3297", ZF_ISSET(child, JNODE_ORPHAN)); ++ ZF_CLR(child, JNODE_ORPHAN); ++ ++ if ((left != NULL) && !keyeq(znode_get_rd_key(left), ++ znode_get_rd_key(child))) { ++ znode_set_rd_key(child, znode_get_rd_key(left)); ++ } ++ write_unlock_dk(tree); ++ write_unlock_tree(tree); ++ zput(child); ++ return result; ++ } else { ++ if (child == NULL) ++ child = ERR_PTR(-EIO); ++ return PTR_ERR(child); ++ } ++} ++ ++/* hook called by ->cut_and_kill() method of node plugin just before internal ++ item is removed. ++ ++ This is point where empty node is removed from the tree. Clear parent ++ pointer in child, and mark node for pending deletion. ++ ++ Node will be actually deleted later and in several installations: ++ ++ . when last lock on this node will be released, node will be removed from ++ the sibling list and its lock will be invalidated ++ ++ . when last reference to this node will be dropped, bitmap will be updated ++ and node will be actually removed from the memory. ++ ++*/ ++int kill_hook_internal(const coord_t * item /* coord of item */ , ++ pos_in_node_t from UNUSED_ARG /* start unit */ , ++ pos_in_node_t count UNUSED_ARG /* stop unit */ , ++ struct carry_kill_data *p UNUSED_ARG) ++{ ++ znode *child; ++ ++ assert("nikita-1222", item != NULL); ++ assert("nikita-1224", from == 0); ++ assert("nikita-1225", count == 1); ++ ++ child = znode_at(item, item->node); ++ if (IS_ERR(child)) ++ return PTR_ERR(child); ++ else if (node_is_empty(child)) { ++ reiser4_tree *tree; ++ ++ assert("nikita-1397", znode_is_write_locked(child)); ++ assert("nikita-1398", child->c_count == 0); ++ assert("nikita-2546", ZF_ISSET(child, JNODE_HEARD_BANSHEE)); ++ ++ tree = znode_get_tree(item->node); ++ write_lock_tree(tree); ++ init_parent_coord(&child->in_parent, NULL); ++ --item->node->c_count; ++ write_unlock_tree(tree); ++ zput(child); ++ return 0; ++ } else { ++ warning("nikita-1223", ++ "Cowardly refuse to remove link to non-empty node"); ++ zput(child); ++ return RETERR(-EIO); ++ } ++} ++ ++/* hook called by ->shift() node plugin method when iternal item was just ++ moved from one node to another. ++ ++ Update parent pointer in child and c_counts in old and new parent ++ ++*/ ++int shift_hook_internal(const coord_t * item /* coord of item */ , ++ unsigned from UNUSED_ARG /* start unit */ , ++ unsigned count UNUSED_ARG /* stop unit */ , ++ znode * old_node /* old parent */ ) ++{ ++ znode *child; ++ znode *new_node; ++ reiser4_tree *tree; ++ ++ assert("nikita-1276", item != NULL); ++ assert("nikita-1277", from == 0); ++ assert("nikita-1278", count == 1); ++ assert("nikita-1451", item->unit_pos == 0); ++ ++ new_node = item->node; ++ assert("nikita-2132", new_node != old_node); ++ tree = znode_get_tree(item->node); ++ child = child_znode(item, old_node, 1, 0); ++ if (child == NULL) ++ return 0; ++ if (!IS_ERR(child)) { ++ write_lock_tree(tree); ++ ++new_node->c_count; ++ assert("nikita-1395", znode_parent(child) == old_node); ++ assert("nikita-1396", old_node->c_count > 0); ++ coord_to_parent_coord(item, &child->in_parent); ++ assert("nikita-1781", znode_parent(child) == new_node); ++ assert("nikita-1782", ++ check_tree_pointer(item, child) == NS_FOUND); ++ --old_node->c_count; ++ write_unlock_tree(tree); ++ zput(child); ++ return 0; ++ } else ++ return PTR_ERR(child); ++} ++ ++/* plugin->u.item.b.max_key_inside - not defined */ ++ ++/* plugin->u.item.b.nr_units - item.c:single_unit */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/internal.h b/fs/reiser4/plugin/item/internal.h +new file mode 100644 +index 0000000..27aa27d +--- /dev/null ++++ b/fs/reiser4/plugin/item/internal.h +@@ -0,0 +1,57 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++/* Internal item contains down-link to the child of the internal/twig ++ node in a tree. It is internal items that are actually used during ++ tree traversal. */ ++ ++#if !defined( __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ ) ++#define __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ ++ ++#include "../../forward.h" ++#include "../../dformat.h" ++ ++/* on-disk layout of internal item */ ++typedef struct internal_item_layout { ++ /* 0 */ reiser4_dblock_nr pointer; ++ /* 4 */ ++} internal_item_layout; ++ ++struct cut_list; ++ ++int mergeable_internal(const coord_t * p1, const coord_t * p2); ++lookup_result lookup_internal(const reiser4_key * key, lookup_bias bias, ++ coord_t * coord); ++/* store pointer from internal item into "block". Implementation of ++ ->down_link() method */ ++extern void down_link_internal(const coord_t * coord, const reiser4_key * key, ++ reiser4_block_nr * block); ++extern int has_pointer_to_internal(const coord_t * coord, ++ const reiser4_block_nr * block); ++extern int create_hook_internal(const coord_t * item, void *arg); ++extern int kill_hook_internal(const coord_t * item, pos_in_node_t from, ++ pos_in_node_t count, struct carry_kill_data *); ++extern int shift_hook_internal(const coord_t * item, unsigned from, ++ unsigned count, znode * old_node); ++extern void reiser4_print_internal(const char *prefix, coord_t * coord); ++ ++extern int utmost_child_internal(const coord_t * coord, sideof side, ++ jnode ** child); ++int utmost_child_real_block_internal(const coord_t * coord, sideof side, ++ reiser4_block_nr * block); ++ ++extern void reiser4_update_internal(const coord_t * coord, ++ const reiser4_block_nr * blocknr); ++/* FIXME: reiserfs has check_internal */ ++extern int check__internal(const coord_t * coord, const char **error); ++ ++/* __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/item.c b/fs/reiser4/plugin/item/item.c +new file mode 100644 +index 0000000..e226f04 +--- /dev/null ++++ b/fs/reiser4/plugin/item/item.c +@@ -0,0 +1,719 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* definition of item plugins. */ ++ ++#include "../../forward.h" ++#include "../../debug.h" ++#include "../../key.h" ++#include "../../coord.h" ++#include "../plugin_header.h" ++#include "sde.h" ++#include "internal.h" ++#include "item.h" ++#include "static_stat.h" ++#include "../plugin.h" ++#include "../../znode.h" ++#include "../../tree.h" ++#include "../../context.h" ++#include "ctail.h" ++ ++/* return pointer to item body */ ++void item_body_by_coord_hard(coord_t * coord /* coord to query */ ) ++{ ++ assert("nikita-324", coord != NULL); ++ assert("nikita-325", coord->node != NULL); ++ assert("nikita-326", znode_is_loaded(coord->node)); ++ assert("nikita-3200", coord->offset == INVALID_OFFSET); ++ ++ coord->offset = ++ node_plugin_by_node(coord->node)->item_by_coord(coord) - ++ zdata(coord->node); ++ ON_DEBUG(coord->body_v = coord->node->times_locked); ++} ++ ++void *item_body_by_coord_easy(const coord_t * coord /* coord to query */ ) ++{ ++ return zdata(coord->node) + coord->offset; ++} ++ ++#if REISER4_DEBUG ++ ++int item_body_is_valid(const coord_t * coord) ++{ ++ return ++ coord->offset == ++ node_plugin_by_node(coord->node)->item_by_coord(coord) - ++ zdata(coord->node); ++} ++ ++#endif ++ ++/* return length of item at @coord */ ++pos_in_node_t item_length_by_coord(const coord_t * coord /* coord to query */ ) ++{ ++ int len; ++ ++ assert("nikita-327", coord != NULL); ++ assert("nikita-328", coord->node != NULL); ++ assert("nikita-329", znode_is_loaded(coord->node)); ++ ++ len = node_plugin_by_node(coord->node)->length_by_coord(coord); ++ return len; ++} ++ ++void obtain_item_plugin(const coord_t * coord) ++{ ++ assert("nikita-330", coord != NULL); ++ assert("nikita-331", coord->node != NULL); ++ assert("nikita-332", znode_is_loaded(coord->node)); ++ ++ coord_set_iplug((coord_t *) coord, ++ node_plugin_by_node(coord->node)-> ++ plugin_by_coord(coord)); ++ assert("nikita-2479", ++ coord_iplug(coord) == ++ node_plugin_by_node(coord->node)->plugin_by_coord(coord)); ++} ++ ++/* return id of item */ ++/* Audited by: green(2002.06.15) */ ++item_id item_id_by_coord(const coord_t * coord /* coord to query */ ) ++{ ++ assert("vs-539", coord != NULL); ++ assert("vs-538", coord->node != NULL); ++ assert("vs-537", znode_is_loaded(coord->node)); ++ assert("vs-536", item_plugin_by_coord(coord) != NULL); ++ assert("vs-540", ++ item_id_by_plugin(item_plugin_by_coord(coord)) < LAST_ITEM_ID); ++ ++ return item_id_by_plugin(item_plugin_by_coord(coord)); ++} ++ ++/* return key of item at @coord */ ++/* Audited by: green(2002.06.15) */ ++reiser4_key *item_key_by_coord(const coord_t * coord /* coord to query */ , ++ reiser4_key * key /* result */ ) ++{ ++ assert("nikita-338", coord != NULL); ++ assert("nikita-339", coord->node != NULL); ++ assert("nikita-340", znode_is_loaded(coord->node)); ++ ++ return node_plugin_by_node(coord->node)->key_at(coord, key); ++} ++ ++/* this returns max key in the item */ ++reiser4_key *max_item_key_by_coord(const coord_t * coord /* coord to query */ , ++ reiser4_key * key /* result */ ) ++{ ++ coord_t last; ++ ++ assert("nikita-338", coord != NULL); ++ assert("nikita-339", coord->node != NULL); ++ assert("nikita-340", znode_is_loaded(coord->node)); ++ ++ /* make coord pointing to last item's unit */ ++ coord_dup(&last, coord); ++ last.unit_pos = coord_num_units(&last) - 1; ++ assert("vs-1560", coord_is_existing_unit(&last)); ++ ++ max_unit_key_by_coord(&last, key); ++ return key; ++} ++ ++/* return key of unit at @coord */ ++reiser4_key *unit_key_by_coord(const coord_t * coord /* coord to query */ , ++ reiser4_key * key /* result */ ) ++{ ++ assert("nikita-772", coord != NULL); ++ assert("nikita-774", coord->node != NULL); ++ assert("nikita-775", znode_is_loaded(coord->node)); ++ ++ if (item_plugin_by_coord(coord)->b.unit_key != NULL) ++ return item_plugin_by_coord(coord)->b.unit_key(coord, key); ++ else ++ return item_key_by_coord(coord, key); ++} ++ ++/* return the biggest key contained the unit @coord */ ++reiser4_key *max_unit_key_by_coord(const coord_t * coord /* coord to query */ , ++ reiser4_key * key /* result */ ) ++{ ++ assert("nikita-772", coord != NULL); ++ assert("nikita-774", coord->node != NULL); ++ assert("nikita-775", znode_is_loaded(coord->node)); ++ ++ if (item_plugin_by_coord(coord)->b.max_unit_key != NULL) ++ return item_plugin_by_coord(coord)->b.max_unit_key(coord, key); ++ else ++ return unit_key_by_coord(coord, key); ++} ++ ++/* ->max_key_inside() method for items consisting of exactly one key (like ++ stat-data) */ ++static reiser4_key *max_key_inside_single_key(const coord_t * ++ coord /* coord of item */ , ++ reiser4_key * ++ result /* resulting key */ ) ++{ ++ assert("nikita-604", coord != NULL); ++ ++ /* coord -> key is starting key of this item and it has to be already ++ filled in */ ++ return unit_key_by_coord(coord, result); ++} ++ ++/* ->nr_units() method for items consisting of exactly one unit always */ ++pos_in_node_t ++nr_units_single_unit(const coord_t * coord UNUSED_ARG /* coord of item */ ) ++{ ++ return 1; ++} ++ ++static int ++paste_no_paste(coord_t * coord UNUSED_ARG, ++ reiser4_item_data * data UNUSED_ARG, ++ carry_plugin_info * info UNUSED_ARG) ++{ ++ return 0; ++} ++ ++/* default ->fast_paste() method */ ++static int ++agree_to_fast_op(const coord_t * coord UNUSED_ARG /* coord of item */ ) ++{ ++ return 1; ++} ++ ++int item_can_contain_key(const coord_t * item /* coord of item */ , ++ const reiser4_key * key /* key to check */ , ++ const reiser4_item_data * data /* parameters of item ++ * being created */ ) ++{ ++ item_plugin *iplug; ++ reiser4_key min_key_in_item; ++ reiser4_key max_key_in_item; ++ ++ assert("nikita-1658", item != NULL); ++ assert("nikita-1659", key != NULL); ++ ++ iplug = item_plugin_by_coord(item); ++ if (iplug->b.can_contain_key != NULL) ++ return iplug->b.can_contain_key(item, key, data); ++ else { ++ assert("nikita-1681", iplug->b.max_key_inside != NULL); ++ item_key_by_coord(item, &min_key_in_item); ++ iplug->b.max_key_inside(item, &max_key_in_item); ++ ++ /* can contain key if ++ min_key_in_item <= key && ++ key <= max_key_in_item ++ */ ++ return keyle(&min_key_in_item, key) ++ && keyle(key, &max_key_in_item); ++ } ++} ++ ++/* mergeable method for non mergeable items */ ++static int ++not_mergeable(const coord_t * i1 UNUSED_ARG, const coord_t * i2 UNUSED_ARG) ++{ ++ return 0; ++} ++ ++/* return 0 if @item1 and @item2 are not mergeable, !0 - otherwise */ ++int are_items_mergeable(const coord_t * i1 /* coord of first item */ , ++ const coord_t * i2 /* coord of second item */ ) ++{ ++ item_plugin *iplug; ++ reiser4_key k1; ++ reiser4_key k2; ++ ++ assert("nikita-1336", i1 != NULL); ++ assert("nikita-1337", i2 != NULL); ++ ++ iplug = item_plugin_by_coord(i1); ++ assert("nikita-1338", iplug != NULL); ++ ++ /* NOTE-NIKITA are_items_mergeable() is also called by assertions in ++ shifting code when nodes are in "suspended" state. */ ++ assert("nikita-1663", ++ keyle(item_key_by_coord(i1, &k1), item_key_by_coord(i2, &k2))); ++ ++ if (iplug->b.mergeable != NULL) { ++ return iplug->b.mergeable(i1, i2); ++ } else if (iplug->b.max_key_inside != NULL) { ++ iplug->b.max_key_inside(i1, &k1); ++ item_key_by_coord(i2, &k2); ++ ++ /* mergeable if ->max_key_inside() >= key of i2; */ ++ return keyge(iplug->b.max_key_inside(i1, &k1), ++ item_key_by_coord(i2, &k2)); ++ } else { ++ item_key_by_coord(i1, &k1); ++ item_key_by_coord(i2, &k2); ++ ++ return ++ (get_key_locality(&k1) == get_key_locality(&k2)) && ++ (get_key_objectid(&k1) == get_key_objectid(&k2)) ++ && (iplug == item_plugin_by_coord(i2)); ++ } ++} ++ ++int item_is_extent(const coord_t * item) ++{ ++ assert("vs-482", coord_is_existing_item(item)); ++ return item_id_by_coord(item) == EXTENT_POINTER_ID; ++} ++ ++int item_is_tail(const coord_t * item) ++{ ++ assert("vs-482", coord_is_existing_item(item)); ++ return item_id_by_coord(item) == FORMATTING_ID; ++} ++ ++#if REISER4_DEBUG ++ ++int item_is_statdata(const coord_t * item) ++{ ++ assert("vs-516", coord_is_existing_item(item)); ++ return plugin_of_group(item_plugin_by_coord(item), STAT_DATA_ITEM_TYPE); ++} ++ ++int item_is_ctail(const coord_t * item) ++{ ++ assert("edward-xx", coord_is_existing_item(item)); ++ return item_id_by_coord(item) == CTAIL_ID; ++} ++ ++#endif /* REISER4_DEBUG */ ++ ++static int change_item(struct inode *inode, ++ reiser4_plugin * plugin, ++ pset_member memb) ++{ ++ /* cannot change constituent item (sd, or dir_item) */ ++ return RETERR(-EINVAL); ++} ++ ++static reiser4_plugin_ops item_plugin_ops = { ++ .init = NULL, ++ .load = NULL, ++ .save_len = NULL, ++ .save = NULL, ++ .change = change_item ++}; ++ ++item_plugin item_plugins[LAST_ITEM_ID] = { ++ [STATIC_STAT_DATA_ID] = { ++ .h = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .id = STATIC_STAT_DATA_ID, ++ .groups = (1 << STAT_DATA_ITEM_TYPE), ++ .pops = &item_plugin_ops, ++ .label = "sd", ++ .desc = "stat-data", ++ .linkage = {NULL, NULL} ++ }, ++ .b = { ++ .max_key_inside = max_key_inside_single_key, ++ .can_contain_key = NULL, ++ .mergeable = not_mergeable, ++ .nr_units = nr_units_single_unit, ++ .lookup = NULL, ++ .init = NULL, ++ .paste = paste_no_paste, ++ .fast_paste = NULL, ++ .can_shift = NULL, ++ .copy_units = NULL, ++ .create_hook = NULL, ++ .kill_hook = NULL, ++ .shift_hook = NULL, ++ .cut_units = NULL, ++ .kill_units = NULL, ++ .unit_key = NULL, ++ .max_unit_key = NULL, ++ .estimate = NULL, ++ .item_data_by_flow = NULL, ++#if REISER4_DEBUG ++ .check = NULL ++#endif ++ }, ++ .f = { ++ .utmost_child = NULL, ++ .utmost_child_real_block = NULL, ++ .update = NULL, ++ .scan = NULL, ++ .convert = NULL ++ }, ++ .s = { ++ .sd = { ++ .init_inode = init_inode_static_sd, ++ .save_len = save_len_static_sd, ++ .save = save_static_sd ++ } ++ } ++ }, ++ [SIMPLE_DIR_ENTRY_ID] = { ++ .h = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .id = SIMPLE_DIR_ENTRY_ID, ++ .groups = (1 << DIR_ENTRY_ITEM_TYPE), ++ .pops = &item_plugin_ops, ++ .label = "de", ++ .desc = "directory entry", ++ .linkage = {NULL, NULL} ++ }, ++ .b = { ++ .max_key_inside = max_key_inside_single_key, ++ .can_contain_key = NULL, ++ .mergeable = NULL, ++ .nr_units = nr_units_single_unit, ++ .lookup = NULL, ++ .init = NULL, ++ .paste = NULL, ++ .fast_paste = NULL, ++ .can_shift = NULL, ++ .copy_units = NULL, ++ .create_hook = NULL, ++ .kill_hook = NULL, ++ .shift_hook = NULL, ++ .cut_units = NULL, ++ .kill_units = NULL, ++ .unit_key = NULL, ++ .max_unit_key = NULL, ++ .estimate = NULL, ++ .item_data_by_flow = NULL, ++#if REISER4_DEBUG ++ .check = NULL ++#endif ++ }, ++ .f = { ++ .utmost_child = NULL, ++ .utmost_child_real_block = NULL, ++ .update = NULL, ++ .scan = NULL, ++ .convert = NULL ++ }, ++ .s = { ++ .dir = { ++ .extract_key = extract_key_de, ++ .update_key = update_key_de, ++ .extract_name = extract_name_de, ++ .extract_file_type = extract_file_type_de, ++ .add_entry = add_entry_de, ++ .rem_entry = rem_entry_de, ++ .max_name_len = max_name_len_de ++ } ++ } ++ }, ++ [COMPOUND_DIR_ID] = { ++ .h = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .id = COMPOUND_DIR_ID, ++ .groups = (1 << DIR_ENTRY_ITEM_TYPE), ++ .pops = &item_plugin_ops, ++ .label = "cde", ++ .desc = "compressed directory entry", ++ .linkage = {NULL, NULL} ++ }, ++ .b = { ++ .max_key_inside = max_key_inside_cde, ++ .can_contain_key = can_contain_key_cde, ++ .mergeable = mergeable_cde, ++ .nr_units = nr_units_cde, ++ .lookup = lookup_cde, ++ .init = init_cde, ++ .paste = paste_cde, ++ .fast_paste = agree_to_fast_op, ++ .can_shift = can_shift_cde, ++ .copy_units = copy_units_cde, ++ .create_hook = NULL, ++ .kill_hook = NULL, ++ .shift_hook = NULL, ++ .cut_units = cut_units_cde, ++ .kill_units = kill_units_cde, ++ .unit_key = unit_key_cde, ++ .max_unit_key = unit_key_cde, ++ .estimate = estimate_cde, ++ .item_data_by_flow = NULL, ++#if REISER4_DEBUG ++ .check = reiser4_check_cde ++#endif ++ }, ++ .f = { ++ .utmost_child = NULL, ++ .utmost_child_real_block = NULL, ++ .update = NULL, ++ .scan = NULL, ++ .convert = NULL ++ }, ++ .s = { ++ .dir = { ++ .extract_key = extract_key_cde, ++ .update_key = update_key_cde, ++ .extract_name = extract_name_cde, ++ .extract_file_type = extract_file_type_de, ++ .add_entry = add_entry_cde, ++ .rem_entry = rem_entry_cde, ++ .max_name_len = max_name_len_cde ++ } ++ } ++ }, ++ [NODE_POINTER_ID] = { ++ .h = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .id = NODE_POINTER_ID, ++ .groups = (1 << INTERNAL_ITEM_TYPE), ++ .pops = NULL, ++ .label = "internal", ++ .desc = "internal item", ++ .linkage = {NULL, NULL} ++ }, ++ .b = { ++ .max_key_inside = NULL, ++ .can_contain_key = NULL, ++ .mergeable = mergeable_internal, ++ .nr_units = nr_units_single_unit, ++ .lookup = lookup_internal, ++ .init = NULL, ++ .paste = NULL, ++ .fast_paste = NULL, ++ .can_shift = NULL, ++ .copy_units = NULL, ++ .create_hook = create_hook_internal, ++ .kill_hook = kill_hook_internal, ++ .shift_hook = shift_hook_internal, ++ .cut_units = NULL, ++ .kill_units = NULL, ++ .unit_key = NULL, ++ .max_unit_key = NULL, ++ .estimate = NULL, ++ .item_data_by_flow = NULL, ++#if REISER4_DEBUG ++ .check = check__internal ++#endif ++ }, ++ .f = { ++ .utmost_child = utmost_child_internal, ++ .utmost_child_real_block = ++ utmost_child_real_block_internal, ++ .update = reiser4_update_internal, ++ .scan = NULL, ++ .convert = NULL ++ }, ++ .s = { ++ .internal = { ++ .down_link = down_link_internal, ++ .has_pointer_to = has_pointer_to_internal ++ } ++ } ++ }, ++ [EXTENT_POINTER_ID] = { ++ .h = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .id = EXTENT_POINTER_ID, ++ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE), ++ .pops = NULL, ++ .label = "extent", ++ .desc = "extent item", ++ .linkage = {NULL, NULL} ++ }, ++ .b = { ++ .max_key_inside = max_key_inside_extent, ++ .can_contain_key = can_contain_key_extent, ++ .mergeable = mergeable_extent, ++ .nr_units = nr_units_extent, ++ .lookup = lookup_extent, ++ .init = NULL, ++ .paste = paste_extent, ++ .fast_paste = agree_to_fast_op, ++ .can_shift = can_shift_extent, ++ .create_hook = create_hook_extent, ++ .copy_units = copy_units_extent, ++ .kill_hook = kill_hook_extent, ++ .shift_hook = NULL, ++ .cut_units = cut_units_extent, ++ .kill_units = kill_units_extent, ++ .unit_key = unit_key_extent, ++ .max_unit_key = max_unit_key_extent, ++ .estimate = NULL, ++ .item_data_by_flow = NULL, ++#if REISER4_DEBUG ++ .check = reiser4_check_extent ++#endif ++ }, ++ .f = { ++ .utmost_child = utmost_child_extent, ++ .utmost_child_real_block = ++ utmost_child_real_block_extent, ++ .update = NULL, ++ .scan = reiser4_scan_extent, ++ .convert = NULL, ++ .key_by_offset = key_by_offset_extent ++ }, ++ .s = { ++ .file = { ++ .write = reiser4_write_extent, ++ .read = reiser4_read_extent, ++ .readpage = reiser4_readpage_extent, ++ .get_block = get_block_address_extent, ++ .append_key = append_key_extent, ++ .init_coord_extension = ++ init_coord_extension_extent ++ } ++ } ++ }, ++ [FORMATTING_ID] = { ++ .h = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .id = FORMATTING_ID, ++ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE), ++ .pops = NULL, ++ .label = "body", ++ .desc = "body (or tail?) item", ++ .linkage = {NULL, NULL} ++ }, ++ .b = { ++ .max_key_inside = max_key_inside_tail, ++ .can_contain_key = can_contain_key_tail, ++ .mergeable = mergeable_tail, ++ .nr_units = nr_units_tail, ++ .lookup = lookup_tail, ++ .init = NULL, ++ .paste = paste_tail, ++ .fast_paste = agree_to_fast_op, ++ .can_shift = can_shift_tail, ++ .create_hook = NULL, ++ .copy_units = copy_units_tail, ++ .kill_hook = kill_hook_tail, ++ .shift_hook = NULL, ++ .cut_units = cut_units_tail, ++ .kill_units = kill_units_tail, ++ .unit_key = unit_key_tail, ++ .max_unit_key = unit_key_tail, ++ .estimate = NULL, ++ .item_data_by_flow = NULL, ++#if REISER4_DEBUG ++ .check = NULL ++#endif ++ }, ++ .f = { ++ .utmost_child = NULL, ++ .utmost_child_real_block = NULL, ++ .update = NULL, ++ .scan = NULL, ++ .convert = NULL ++ }, ++ .s = { ++ .file = { ++ .write = reiser4_write_tail, ++ .read = reiser4_read_tail, ++ .readpage = readpage_tail, ++ .get_block = get_block_address_tail, ++ .append_key = append_key_tail, ++ .init_coord_extension = ++ init_coord_extension_tail ++ } ++ } ++ }, ++ [CTAIL_ID] = { ++ .h = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .id = CTAIL_ID, ++ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE), ++ .pops = NULL, ++ .label = "ctail", ++ .desc = "cryptcompress tail item", ++ .linkage = {NULL, NULL} ++ }, ++ .b = { ++ .max_key_inside = max_key_inside_tail, ++ .can_contain_key = can_contain_key_ctail, ++ .mergeable = mergeable_ctail, ++ .nr_units = nr_units_ctail, ++ .lookup = NULL, ++ .init = init_ctail, ++ .paste = paste_ctail, ++ .fast_paste = agree_to_fast_op, ++ .can_shift = can_shift_ctail, ++ .create_hook = create_hook_ctail, ++ .copy_units = copy_units_ctail, ++ .kill_hook = kill_hook_ctail, ++ .shift_hook = shift_hook_ctail, ++ .cut_units = cut_units_ctail, ++ .kill_units = kill_units_ctail, ++ .unit_key = unit_key_tail, ++ .max_unit_key = unit_key_tail, ++ .estimate = estimate_ctail, ++ .item_data_by_flow = NULL, ++#if REISER4_DEBUG ++ .check = check_ctail ++#endif ++ }, ++ .f = { ++ .utmost_child = utmost_child_ctail, ++ /* FIXME-EDWARD: write this */ ++ .utmost_child_real_block = NULL, ++ .update = NULL, ++ .scan = scan_ctail, ++ .convert = convert_ctail ++ }, ++ .s = { ++ .file = { ++ .write = NULL, ++ .read = read_ctail, ++ .readpage = readpage_ctail, ++ .get_block = get_block_address_tail, ++ .append_key = append_key_ctail, ++ .init_coord_extension = ++ init_coord_extension_tail ++ } ++ } ++ }, ++ [BLACK_BOX_ID] = { ++ .h = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .id = BLACK_BOX_ID, ++ .groups = (1 << OTHER_ITEM_TYPE), ++ .pops = NULL, ++ .label = "blackbox", ++ .desc = "black box item", ++ .linkage = {NULL, NULL} ++ }, ++ .b = { ++ .max_key_inside = NULL, ++ .can_contain_key = NULL, ++ .mergeable = not_mergeable, ++ .nr_units = nr_units_single_unit, ++ /* to need for ->lookup method */ ++ .lookup = NULL, ++ .init = NULL, ++ .paste = NULL, ++ .fast_paste = NULL, ++ .can_shift = NULL, ++ .copy_units = NULL, ++ .create_hook = NULL, ++ .kill_hook = NULL, ++ .shift_hook = NULL, ++ .cut_units = NULL, ++ .kill_units = NULL, ++ .unit_key = NULL, ++ .max_unit_key = NULL, ++ .estimate = NULL, ++ .item_data_by_flow = NULL, ++#if REISER4_DEBUG ++ .check = NULL ++#endif ++ } ++ } ++}; ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/item.h b/fs/reiser4/plugin/item/item.h +new file mode 100644 +index 0000000..0822296 +--- /dev/null ++++ b/fs/reiser4/plugin/item/item.h +@@ -0,0 +1,400 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* first read balance.c comments before reading this */ ++ ++/* An item_plugin implements all of the operations required for ++ balancing that are item specific. */ ++ ++/* an item plugin also implements other operations that are specific to that ++ item. These go into the item specific operations portion of the item ++ handler, and all of the item specific portions of the item handler are put ++ into a union. */ ++ ++#if !defined( __REISER4_ITEM_H__ ) ++#define __REISER4_ITEM_H__ ++ ++#include "../../forward.h" ++#include "../plugin_header.h" ++#include "../../dformat.h" ++#include "../../seal.h" ++#include "../../plugin/file/file.h" ++ ++#include /* for struct file, struct inode */ ++#include /* for struct page */ ++#include /* for struct dentry */ ++ ++typedef enum { ++ STAT_DATA_ITEM_TYPE, ++ DIR_ENTRY_ITEM_TYPE, ++ INTERNAL_ITEM_TYPE, ++ UNIX_FILE_METADATA_ITEM_TYPE, ++ OTHER_ITEM_TYPE ++} item_type_id; ++ ++/* this is the part of each item plugin that all items are expected to ++ support or at least explicitly fail to support by setting the ++ pointer to null. */ ++typedef struct { ++ /* operations called by balancing ++ ++ It is interesting to consider that some of these item ++ operations could be given sources or targets that are not ++ really items in nodes. This could be ok/useful. ++ ++ */ ++ /* maximal key that can _possibly_ be occupied by this item ++ ++ When inserting, and node ->lookup() method (called by ++ coord_by_key()) reaches an item after binary search, ++ the ->max_key_inside() item plugin method is used to determine ++ whether new item should pasted into existing item ++ (new_key<=max_key_inside()) or new item has to be created ++ (new_key>max_key_inside()). ++ ++ For items that occupy exactly one key (like stat-data) ++ this method should return this key. For items that can ++ grow indefinitely (extent, directory item) this should ++ return reiser4_max_key(). ++ ++ For example extent with the key ++ ++ (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks, ++ ++ ->max_key_inside is (LOCALITY,4,OBJID,0xffffffffffffffff), and ++ */ ++ reiser4_key *(*max_key_inside) (const coord_t *, reiser4_key *); ++ ++ /* true if item @coord can merge data at @key. */ ++ int (*can_contain_key) (const coord_t *, const reiser4_key *, ++ const reiser4_item_data *); ++ /* mergeable() - check items for mergeability ++ ++ Optional method. Returns true if two items can be merged. ++ ++ */ ++ int (*mergeable) (const coord_t *, const coord_t *); ++ ++ /* number of atomic things in an item. ++ NOTE FOR CONTRIBUTORS: use a generic method ++ nr_units_single_unit() for solid (atomic) items, as ++ tree operations use it as a criterion of solidness ++ (see is_solid_item macro) */ ++ pos_in_node_t(*nr_units) (const coord_t *); ++ ++ /* search within item for a unit within the item, and return a ++ pointer to it. This can be used to calculate how many ++ bytes to shrink an item if you use pointer arithmetic and ++ compare to the start of the item body if the item's data ++ are continuous in the node, if the item's data are not ++ continuous in the node, all sorts of other things are maybe ++ going to break as well. */ ++ lookup_result(*lookup) (const reiser4_key *, lookup_bias, coord_t *); ++ /* method called by ode_plugin->create_item() to initialise new ++ item */ ++ int (*init) (coord_t * target, coord_t * from, ++ reiser4_item_data * data); ++ /* method called (e.g., by reiser4_resize_item()) to place new data ++ into item when it grows */ ++ int (*paste) (coord_t *, reiser4_item_data *, carry_plugin_info *); ++ /* return true if paste into @coord is allowed to skip ++ carry. That is, if such paste would require any changes ++ at the parent level ++ */ ++ int (*fast_paste) (const coord_t *); ++ /* how many but not more than @want units of @source can be ++ shifted into @target node. If pend == append - we try to ++ append last item of @target by first units of @source. If ++ pend == prepend - we try to "prepend" first item in @target ++ by last units of @source. @target node has @free_space ++ bytes of free space. Total size of those units are returned ++ via @size. ++ ++ @target is not NULL if shifting to the mergeable item and ++ NULL is new item will be created during shifting. ++ */ ++ int (*can_shift) (unsigned free_space, coord_t *, ++ znode *, shift_direction, unsigned *size, ++ unsigned want); ++ ++ /* starting off @from-th unit of item @source append or ++ prepend @count units to @target. @target has been already ++ expanded by @free_space bytes. That must be exactly what is ++ needed for those items in @target. If @where_is_free_space ++ == SHIFT_LEFT - free space is at the end of @target item, ++ othersize - it is in the beginning of it. */ ++ void (*copy_units) (coord_t *, coord_t *, ++ unsigned from, unsigned count, ++ shift_direction where_is_free_space, ++ unsigned free_space); ++ ++ int (*create_hook) (const coord_t *, void *); ++ /* do whatever is necessary to do when @count units starting ++ from @from-th one are removed from the tree */ ++ /* FIXME-VS: this is used to be here for, in particular, ++ extents and items of internal type to free blocks they point ++ to at the same time with removing items from a ++ tree. Problems start, however, when dealloc_block fails due ++ to some reason. Item gets removed, but blocks it pointed to ++ are not freed. It is not clear how to fix this for items of ++ internal type because a need to remove internal item may ++ appear in the middle of balancing, and there is no way to ++ undo changes made. OTOH, if space allocator involves ++ balancing to perform dealloc_block - this will probably ++ break balancing due to deadlock issues ++ */ ++ int (*kill_hook) (const coord_t *, pos_in_node_t from, ++ pos_in_node_t count, struct carry_kill_data *); ++ int (*shift_hook) (const coord_t *, unsigned from, unsigned count, ++ znode * _node); ++ ++ /* unit @*from contains @from_key. unit @*to contains @to_key. Cut all keys between @from_key and @to_key ++ including boundaries. When units are cut from item beginning - move space which gets freed to head of ++ item. When units are cut from item end - move freed space to item end. When units are cut from the middle of ++ item - move freed space to item head. Return amount of space which got freed. Save smallest removed key in ++ @smallest_removed if it is not 0. Save new first item key in @new_first_key if it is not 0 ++ */ ++ int (*cut_units) (coord_t *, pos_in_node_t from, pos_in_node_t to, ++ struct carry_cut_data *, ++ reiser4_key * smallest_removed, ++ reiser4_key * new_first_key); ++ ++ /* like cut_units, except that these units are removed from the ++ tree, not only from a node */ ++ int (*kill_units) (coord_t *, pos_in_node_t from, pos_in_node_t to, ++ struct carry_kill_data *, ++ reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++ ++ /* if @key_of_coord == 1 - returned key of coord, otherwise - ++ key of unit is returned. If @coord is not set to certain ++ unit - ERR_PTR(-ENOENT) is returned */ ++ reiser4_key *(*unit_key) (const coord_t *, reiser4_key *); ++ reiser4_key *(*max_unit_key) (const coord_t *, reiser4_key *); ++ /* estimate how much space is needed for paste @data into item at ++ @coord. if @coord==0 - estimate insertion, otherwise - estimate ++ pasting ++ */ ++ int (*estimate) (const coord_t *, const reiser4_item_data *); ++ ++ /* converts flow @f to item data. @coord == 0 on insert */ ++ int (*item_data_by_flow) (const coord_t *, const flow_t *, ++ reiser4_item_data *); ++ ++ /*void (*show) (struct seq_file *, coord_t *); */ ++ ++#if REISER4_DEBUG ++ /* used for debugging, every item should have here the most ++ complete possible check of the consistency of the item that ++ the inventor can construct */ ++ int (*check) (const coord_t *, const char **error); ++#endif ++ ++} balance_ops; ++ ++typedef struct { ++ /* return the right or left child of @coord, only if it is in memory */ ++ int (*utmost_child) (const coord_t *, sideof side, jnode ** child); ++ ++ /* return whether the right or left child of @coord has a non-fake ++ block number. */ ++ int (*utmost_child_real_block) (const coord_t *, sideof side, ++ reiser4_block_nr *); ++ /* relocate child at @coord to the @block */ ++ void (*update) (const coord_t *, const reiser4_block_nr *); ++ /* count unformatted nodes per item for leave relocation policy, etc.. */ ++ int (*scan) (flush_scan * scan); ++ /* convert item by flush */ ++ int (*convert) (flush_pos_t * pos); ++ /* backward mapping from jnode offset to a key. */ ++ int (*key_by_offset) (struct inode *, loff_t, reiser4_key *); ++} flush_ops; ++ ++/* operations specific to the directory item */ ++typedef struct { ++ /* extract stat-data key from directory entry at @coord and place it ++ into @key. */ ++ int (*extract_key) (const coord_t *, reiser4_key * key); ++ /* update object key in item. */ ++ int (*update_key) (const coord_t *, const reiser4_key *, lock_handle *); ++ /* extract name from directory entry at @coord and return it */ ++ char *(*extract_name) (const coord_t *, char *buf); ++ /* extract file type (DT_* stuff) from directory entry at @coord and ++ return it */ ++ unsigned (*extract_file_type) (const coord_t *); ++ int (*add_entry) (struct inode * dir, ++ coord_t *, lock_handle *, ++ const struct dentry * name, ++ reiser4_dir_entry_desc * entry); ++ int (*rem_entry) (struct inode * dir, const struct qstr * name, ++ coord_t *, lock_handle *, ++ reiser4_dir_entry_desc * entry); ++ int (*max_name_len) (const struct inode * dir); ++} dir_entry_ops; ++ ++/* operations specific to items regular (unix) file metadata are built of */ ++typedef struct { ++ int (*write) (struct file *, const char __user *, size_t, loff_t *pos); ++ int (*read) (struct file *, flow_t *, hint_t *); ++ int (*readpage) (void *, struct page *); ++ int (*get_block) (const coord_t *, sector_t, sector_t *); ++ /* ++ * key of first byte which is not addressed by the item @coord is set ++ * to. ++ * For example, for extent item with the key ++ * ++ * (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks, ++ * ++ * ->append_key is ++ * ++ * (LOCALITY,4,OBJID,STARTING-OFFSET + BLK * block_size) ++ */ ++ reiser4_key *(*append_key) (const coord_t *, reiser4_key *); ++ ++ void (*init_coord_extension) (uf_coord_t *, loff_t); ++} file_ops; ++ ++/* operations specific to items of stat data type */ ++typedef struct { ++ int (*init_inode) (struct inode * inode, char *sd, int len); ++ int (*save_len) (struct inode * inode); ++ int (*save) (struct inode * inode, char **area); ++} sd_ops; ++ ++/* operations specific to internal item */ ++typedef struct { ++ /* all tree traversal want to know from internal item is where ++ to go next. */ ++ void (*down_link) (const coord_t * coord, ++ const reiser4_key * key, reiser4_block_nr * block); ++ /* check that given internal item contains given pointer. */ ++ int (*has_pointer_to) (const coord_t * coord, ++ const reiser4_block_nr * block); ++} internal_item_ops; ++ ++struct item_plugin { ++ /* generic fields */ ++ plugin_header h; ++ ++ /* methods common for all item types */ ++ balance_ops b; ++ /* methods used during flush */ ++ flush_ops f; ++ ++ /* methods specific to particular type of item */ ++ union { ++ dir_entry_ops dir; ++ file_ops file; ++ sd_ops sd; ++ internal_item_ops internal; ++ } s; ++ ++}; ++ ++#define is_solid_item(iplug) ((iplug)->b.nr_units == nr_units_single_unit) ++ ++static inline item_id item_id_by_plugin(item_plugin * plugin) ++{ ++ return plugin->h.id; ++} ++ ++static inline char get_iplugid(item_plugin * iplug) ++{ ++ assert("nikita-2838", iplug != NULL); ++ assert("nikita-2839", iplug->h.id < 0xff); ++ return (char)item_id_by_plugin(iplug); ++} ++ ++extern unsigned long znode_times_locked(const znode * z); ++ ++static inline void coord_set_iplug(coord_t * coord, item_plugin * iplug) ++{ ++ assert("nikita-2837", coord != NULL); ++ assert("nikita-2838", iplug != NULL); ++ coord->iplugid = get_iplugid(iplug); ++ ON_DEBUG(coord->plug_v = znode_times_locked(coord->node)); ++} ++ ++static inline item_plugin *coord_iplug(const coord_t * coord) ++{ ++ assert("nikita-2833", coord != NULL); ++ assert("nikita-2834", coord->iplugid != INVALID_PLUGID); ++ assert("nikita-3549", coord->plug_v == znode_times_locked(coord->node)); ++ return (item_plugin *) plugin_by_id(REISER4_ITEM_PLUGIN_TYPE, ++ coord->iplugid); ++} ++ ++extern int item_can_contain_key(const coord_t * item, const reiser4_key * key, ++ const reiser4_item_data *); ++extern int are_items_mergeable(const coord_t * i1, const coord_t * i2); ++extern int item_is_extent(const coord_t *); ++extern int item_is_tail(const coord_t *); ++extern int item_is_statdata(const coord_t * item); ++extern int item_is_ctail(const coord_t *); ++ ++extern pos_in_node_t item_length_by_coord(const coord_t * coord); ++extern pos_in_node_t nr_units_single_unit(const coord_t * coord); ++extern item_id item_id_by_coord(const coord_t * coord /* coord to query */ ); ++extern reiser4_key *item_key_by_coord(const coord_t * coord, reiser4_key * key); ++extern reiser4_key *max_item_key_by_coord(const coord_t *, reiser4_key *); ++extern reiser4_key *unit_key_by_coord(const coord_t * coord, reiser4_key * key); ++extern reiser4_key *max_unit_key_by_coord(const coord_t * coord, ++ reiser4_key * key); ++extern void obtain_item_plugin(const coord_t * coord); ++ ++#if defined(REISER4_DEBUG) ++extern int znode_is_loaded(const znode * node); ++#endif ++ ++/* return plugin of item at @coord */ ++static inline item_plugin *item_plugin_by_coord(const coord_t * ++ coord /* coord to query */ ) ++{ ++ assert("nikita-330", coord != NULL); ++ assert("nikita-331", coord->node != NULL); ++ assert("nikita-332", znode_is_loaded(coord->node)); ++ ++ if (unlikely(!coord_is_iplug_set(coord))) ++ obtain_item_plugin(coord); ++ return coord_iplug(coord); ++} ++ ++/* this returns true if item is of internal type */ ++static inline int item_is_internal(const coord_t * item) ++{ ++ assert("vs-483", coord_is_existing_item(item)); ++ return plugin_of_group(item_plugin_by_coord(item), INTERNAL_ITEM_TYPE); ++} ++ ++extern void item_body_by_coord_hard(coord_t * coord); ++extern void *item_body_by_coord_easy(const coord_t * coord); ++#if REISER4_DEBUG ++extern int item_body_is_valid(const coord_t * coord); ++#endif ++ ++/* return pointer to item body */ ++static inline void *item_body_by_coord(const coord_t * ++ coord /* coord to query */ ) ++{ ++ assert("nikita-324", coord != NULL); ++ assert("nikita-325", coord->node != NULL); ++ assert("nikita-326", znode_is_loaded(coord->node)); ++ ++ if (coord->offset == INVALID_OFFSET) ++ item_body_by_coord_hard((coord_t *) coord); ++ assert("nikita-3201", item_body_is_valid(coord)); ++ assert("nikita-3550", coord->body_v == znode_times_locked(coord->node)); ++ return item_body_by_coord_easy(coord); ++} ++ ++/* __REISER4_ITEM_H__ */ ++#endif ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/sde.c b/fs/reiser4/plugin/item/sde.c +new file mode 100644 +index 0000000..27f2400 +--- /dev/null ++++ b/fs/reiser4/plugin/item/sde.c +@@ -0,0 +1,190 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Directory entry implementation */ ++#include "../../forward.h" ++#include "../../debug.h" ++#include "../../dformat.h" ++#include "../../kassign.h" ++#include "../../coord.h" ++#include "sde.h" ++#include "item.h" ++#include "../plugin.h" ++#include "../../znode.h" ++#include "../../carry.h" ++#include "../../tree.h" ++#include "../../inode.h" ++ ++#include /* for struct inode */ ++#include /* for struct dentry */ ++#include ++ ++/* ->extract_key() method of simple directory item plugin. */ ++int extract_key_de(const coord_t * coord /* coord of item */ , ++ reiser4_key * key /* resulting key */ ) ++{ ++ directory_entry_format *dent; ++ ++ assert("nikita-1458", coord != NULL); ++ assert("nikita-1459", key != NULL); ++ ++ dent = (directory_entry_format *) item_body_by_coord(coord); ++ assert("nikita-1158", item_length_by_coord(coord) >= (int)sizeof *dent); ++ return extract_key_from_id(&dent->id, key); ++} ++ ++int ++update_key_de(const coord_t * coord, const reiser4_key * key, ++ lock_handle * lh UNUSED_ARG) ++{ ++ directory_entry_format *dent; ++ obj_key_id obj_id; ++ int result; ++ ++ assert("nikita-2342", coord != NULL); ++ assert("nikita-2343", key != NULL); ++ ++ dent = (directory_entry_format *) item_body_by_coord(coord); ++ result = build_obj_key_id(key, &obj_id); ++ if (result == 0) { ++ dent->id = obj_id; ++ znode_make_dirty(coord->node); ++ } ++ return 0; ++} ++ ++char *extract_dent_name(const coord_t * coord, directory_entry_format * dent, ++ char *buf) ++{ ++ reiser4_key key; ++ ++ unit_key_by_coord(coord, &key); ++ if (get_key_type(&key) != KEY_FILE_NAME_MINOR) ++ reiser4_print_address("oops", znode_get_block(coord->node)); ++ if (!is_longname_key(&key)) { ++ if (is_dot_key(&key)) ++ return (char *)"."; ++ else ++ return extract_name_from_key(&key, buf); ++ } else ++ return (char *)dent->name; ++} ++ ++/* ->extract_name() method of simple directory item plugin. */ ++char *extract_name_de(const coord_t * coord /* coord of item */ , char *buf) ++{ ++ directory_entry_format *dent; ++ ++ assert("nikita-1460", coord != NULL); ++ ++ dent = (directory_entry_format *) item_body_by_coord(coord); ++ return extract_dent_name(coord, dent, buf); ++} ++ ++/* ->extract_file_type() method of simple directory item plugin. */ ++unsigned extract_file_type_de(const coord_t * coord UNUSED_ARG /* coord of ++ * item */ ) ++{ ++ assert("nikita-1764", coord != NULL); ++ /* we don't store file type in the directory entry yet. ++ ++ But see comments at kassign.h:obj_key_id ++ */ ++ return DT_UNKNOWN; ++} ++ ++int add_entry_de(struct inode *dir /* directory of item */ , ++ coord_t * coord /* coord of item */ , ++ lock_handle * lh /* insertion lock handle */ , ++ const struct dentry *de /* name to add */ , ++ reiser4_dir_entry_desc * entry /* parameters of new directory ++ * entry */ ) ++{ ++ reiser4_item_data data; ++ directory_entry_format *dent; ++ int result; ++ const char *name; ++ int len; ++ int longname; ++ ++ name = de->d_name.name; ++ len = de->d_name.len; ++ assert("nikita-1163", strlen(name) == len); ++ ++ longname = is_longname(name, len); ++ ++ data.length = sizeof *dent; ++ if (longname) ++ data.length += len + 1; ++ data.data = NULL; ++ data.user = 0; ++ data.iplug = item_plugin_by_id(SIMPLE_DIR_ENTRY_ID); ++ ++ /* NOTE-NIKITA quota plugin */ ++ if (DQUOT_ALLOC_SPACE_NODIRTY(dir, data.length)) ++ return -EDQUOT; ++ ++ result = insert_by_coord(coord, &data, &entry->key, lh, 0 /*flags */ ); ++ if (result != 0) ++ return result; ++ ++ dent = (directory_entry_format *) item_body_by_coord(coord); ++ build_inode_key_id(entry->obj, &dent->id); ++ if (longname) { ++ memcpy(dent->name, name, len); ++ put_unaligned(0, &dent->name[len]); ++ } ++ return 0; ++} ++ ++int rem_entry_de(struct inode *dir /* directory of item */ , ++ const struct qstr *name UNUSED_ARG, ++ coord_t * coord /* coord of item */ , ++ lock_handle * lh UNUSED_ARG /* lock handle for ++ * removal */ , ++ reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of ++ * directory entry ++ * being removed */ ) ++{ ++ coord_t shadow; ++ int result; ++ int length; ++ ++ length = item_length_by_coord(coord); ++ if (inode_get_bytes(dir) < length) { ++ warning("nikita-2627", "Dir is broke: %llu: %llu", ++ (unsigned long long)get_inode_oid(dir), ++ inode_get_bytes(dir)); ++ ++ return RETERR(-EIO); ++ } ++ ++ /* cut_node() is supposed to take pointers to _different_ ++ coords, because it will modify them without respect to ++ possible aliasing. To work around this, create temporary copy ++ of @coord. ++ */ ++ coord_dup(&shadow, coord); ++ result = ++ kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0); ++ if (result == 0) { ++ /* NOTE-NIKITA quota plugin */ ++ DQUOT_FREE_SPACE_NODIRTY(dir, length); ++ } ++ return result; ++} ++ ++int max_name_len_de(const struct inode *dir) ++{ ++ return reiser4_tree_by_inode(dir)->nplug->max_item_size() - ++ sizeof(directory_entry_format) - 2; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/sde.h b/fs/reiser4/plugin/item/sde.h +new file mode 100644 +index 0000000..f26762a +--- /dev/null ++++ b/fs/reiser4/plugin/item/sde.h +@@ -0,0 +1,66 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Directory entry. */ ++ ++#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ ) ++#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ ++ ++#include "../../forward.h" ++#include "../../dformat.h" ++#include "../../kassign.h" ++#include "../../key.h" ++ ++#include ++#include /* for struct dentry */ ++ ++typedef struct directory_entry_format { ++ /* key of object stat-data. It's not necessary to store whole ++ key here, because it's always key of stat-data, so minor ++ packing locality and offset can be omitted here. But this ++ relies on particular key allocation scheme for stat-data, so, ++ for extensibility sake, whole key can be stored here. ++ ++ We store key as array of bytes, because we don't want 8-byte ++ alignment of dir entries. ++ */ ++ obj_key_id id; ++ /* file name. Null terminated string. */ ++ d8 name[0]; ++} directory_entry_format; ++ ++void print_de(const char *prefix, coord_t * coord); ++int extract_key_de(const coord_t * coord, reiser4_key * key); ++int update_key_de(const coord_t * coord, const reiser4_key * key, ++ lock_handle * lh); ++char *extract_name_de(const coord_t * coord, char *buf); ++unsigned extract_file_type_de(const coord_t * coord); ++int add_entry_de(struct inode *dir, coord_t * coord, ++ lock_handle * lh, const struct dentry *name, ++ reiser4_dir_entry_desc * entry); ++int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord, ++ lock_handle * lh, reiser4_dir_entry_desc * entry); ++int max_name_len_de(const struct inode *dir); ++ ++int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length); ++ ++char *extract_dent_name(const coord_t * coord, ++ directory_entry_format * dent, char *buf); ++ ++#if REISER4_LARGE_KEY ++#define DE_NAME_BUF_LEN (24) ++#else ++#define DE_NAME_BUF_LEN (16) ++#endif ++ ++/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/static_stat.c b/fs/reiser4/plugin/item/static_stat.c +new file mode 100644 +index 0000000..c38e44a +--- /dev/null ++++ b/fs/reiser4/plugin/item/static_stat.c +@@ -0,0 +1,1106 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* stat data manipulation. */ ++ ++#include "../../forward.h" ++#include "../../super.h" ++#include "../../vfs_ops.h" ++#include "../../inode.h" ++#include "../../debug.h" ++#include "../../dformat.h" ++#include "../object.h" ++#include "../plugin.h" ++#include "../plugin_header.h" ++#include "static_stat.h" ++#include "item.h" ++ ++#include ++#include ++ ++/* see static_stat.h for explanation */ ++ ++/* helper function used while we are dumping/loading inode/plugin state ++ to/from the stat-data. */ ++ ++static void move_on(int *length /* space remaining in stat-data */ , ++ char **area /* current coord in stat data */ , ++ int size_of /* how many bytes to move forward */ ) ++{ ++ assert("nikita-615", length != NULL); ++ assert("nikita-616", area != NULL); ++ ++ *length -= size_of; ++ *area += size_of; ++ ++ assert("nikita-617", *length >= 0); ++} ++ ++/* helper function used while loading inode/plugin state from stat-data. ++ Complain if there is less space in stat-data than was expected. ++ Can only happen on disk corruption. */ ++static int not_enough_space(struct inode *inode /* object being processed */ , ++ const char *where /* error message */ ) ++{ ++ assert("nikita-618", inode != NULL); ++ ++ warning("nikita-619", "Not enough space in %llu while loading %s", ++ (unsigned long long)get_inode_oid(inode), where); ++ ++ return RETERR(-EINVAL); ++} ++ ++/* helper function used while loading inode/plugin state from ++ stat-data. Call it if invalid plugin id was found. */ ++static int unknown_plugin(reiser4_plugin_id id /* invalid id */ , ++ struct inode *inode /* object being processed */ ) ++{ ++ warning("nikita-620", "Unknown plugin %i in %llu", ++ id, (unsigned long long)get_inode_oid(inode)); ++ ++ return RETERR(-EINVAL); ++} ++ ++/* this is installed as ->init_inode() method of ++ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). ++ Copies data from on-disk stat-data format into inode. ++ Handles stat-data extensions. */ ++/* was sd_load */ ++int init_inode_static_sd(struct inode *inode /* object being processed */ , ++ char *sd /* stat-data body */ , ++ int len /* length of stat-data */ ) ++{ ++ int result; ++ int bit; ++ int chunk; ++ __u16 mask; ++ __u64 bigmask; ++ reiser4_stat_data_base *sd_base; ++ reiser4_inode *state; ++ ++ assert("nikita-625", inode != NULL); ++ assert("nikita-626", sd != NULL); ++ ++ result = 0; ++ sd_base = (reiser4_stat_data_base *) sd; ++ state = reiser4_inode_data(inode); ++ mask = le16_to_cpu(get_unaligned(&sd_base->extmask)); ++ bigmask = mask; ++ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN); ++ ++ move_on(&len, &sd, sizeof *sd_base); ++ for (bit = 0, chunk = 0; ++ mask != 0 || bit <= LAST_IMPORTANT_SD_EXTENSION; ++ ++bit, mask >>= 1) { ++ if (((bit + 1) % 16) != 0) { ++ /* handle extension */ ++ sd_ext_plugin *sdplug; ++ ++ if (bit >= LAST_SD_EXTENSION) { ++ warning("vpf-1904", ++ "No such extension %i in inode %llu", ++ bit, ++ (unsigned long long) ++ get_inode_oid(inode)); ++ ++ result = RETERR(-EINVAL); ++ break; ++ } ++ ++ sdplug = sd_ext_plugin_by_id(bit); ++ if (sdplug == NULL) { ++ warning("nikita-627", ++ "No such extension %i in inode %llu", ++ bit, ++ (unsigned long long) ++ get_inode_oid(inode)); ++ ++ result = RETERR(-EINVAL); ++ break; ++ } ++ if (mask & 1) { ++ assert("nikita-628", sdplug->present); ++ /* alignment is not supported in node layout ++ plugin yet. ++ result = align( inode, &len, &sd, ++ sdplug -> alignment ); ++ if( result != 0 ) ++ return result; */ ++ result = sdplug->present(inode, &sd, &len); ++ } else if (sdplug->absent != NULL) ++ result = sdplug->absent(inode); ++ if (result) ++ break; ++ /* else, we are looking at the last bit in 16-bit ++ portion of bitmask */ ++ } else if (mask & 1) { ++ /* next portion of bitmask */ ++ if (len < (int)sizeof(d16)) { ++ warning("nikita-629", ++ "No space for bitmap in inode %llu", ++ (unsigned long long) ++ get_inode_oid(inode)); ++ ++ result = RETERR(-EINVAL); ++ break; ++ } ++ mask = le16_to_cpu(get_unaligned((d16 *)sd)); ++ bigmask <<= 16; ++ bigmask |= mask; ++ move_on(&len, &sd, sizeof(d16)); ++ ++chunk; ++ if (chunk == 3) { ++ if (!(mask & 0x8000)) { ++ /* clear last bit */ ++ mask &= ~0x8000; ++ continue; ++ } ++ /* too much */ ++ warning("nikita-630", ++ "Too many extensions in %llu", ++ (unsigned long long) ++ get_inode_oid(inode)); ++ ++ result = RETERR(-EINVAL); ++ break; ++ } ++ } else ++ /* bitmask exhausted */ ++ break; ++ } ++ state->extmask = bigmask; ++ if (len - (bit / 16 * sizeof(d16)) > 0) { ++ /* alignment in save_len_static_sd() is taken into account ++ -edward */ ++ warning("nikita-631", "unused space in inode %llu", ++ (unsigned long long)get_inode_oid(inode)); ++ } ++ ++ return result; ++} ++ ++/* estimates size of stat-data required to store inode. ++ Installed as ->save_len() method of ++ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */ ++/* was sd_len */ ++int save_len_static_sd(struct inode *inode /* object being processed */ ) ++{ ++ unsigned int result; ++ __u64 mask; ++ int bit; ++ ++ assert("nikita-632", inode != NULL); ++ ++ result = sizeof(reiser4_stat_data_base); ++ mask = reiser4_inode_data(inode)->extmask; ++ for (bit = 0; mask != 0; ++bit, mask >>= 1) { ++ if (mask & 1) { ++ sd_ext_plugin *sdplug; ++ ++ sdplug = sd_ext_plugin_by_id(bit); ++ assert("nikita-633", sdplug != NULL); ++ /* no aligment support ++ result += ++ round_up( result, sdplug -> alignment ) - result; */ ++ result += sdplug->save_len(inode); ++ } ++ } ++ result += bit / 16 * sizeof(d16); ++ return result; ++} ++ ++/* saves inode into stat-data. ++ Installed as ->save() method of ++ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */ ++/* was sd_save */ ++int save_static_sd(struct inode *inode /* object being processed */ , ++ char **area /* where to save stat-data */ ) ++{ ++ int result; ++ __u64 emask; ++ int bit; ++ unsigned int len; ++ reiser4_stat_data_base *sd_base; ++ ++ assert("nikita-634", inode != NULL); ++ assert("nikita-635", area != NULL); ++ ++ result = 0; ++ emask = reiser4_inode_data(inode)->extmask; ++ sd_base = (reiser4_stat_data_base *) * area; ++ put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)), &sd_base->extmask); ++ /*cputod16((unsigned)(emask & 0xffff), &sd_base->extmask);*/ ++ ++ *area += sizeof *sd_base; ++ len = 0xffffffffu; ++ for (bit = 0; emask != 0; ++bit, emask >>= 1) { ++ if (emask & 1) { ++ if ((bit + 1) % 16 != 0) { ++ sd_ext_plugin *sdplug; ++ sdplug = sd_ext_plugin_by_id(bit); ++ assert("nikita-636", sdplug != NULL); ++ /* no alignment support yet ++ align( inode, &len, area, ++ sdplug -> alignment ); */ ++ result = sdplug->save(inode, area); ++ if (result) ++ break; ++ } else { ++ put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)), ++ (d16 *)(*area)); ++ /*cputod16((unsigned)(emask & 0xffff), ++ (d16 *) * area);*/ ++ *area += sizeof(d16); ++ } ++ } ++ } ++ return result; ++} ++ ++/* stat-data extension handling functions. */ ++ ++static int present_lw_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ , ++ int *len /* remaining length */ ) ++{ ++ if (*len >= (int)sizeof(reiser4_light_weight_stat)) { ++ reiser4_light_weight_stat *sd_lw; ++ ++ sd_lw = (reiser4_light_weight_stat *) * area; ++ ++ inode->i_mode = le16_to_cpu(get_unaligned(&sd_lw->mode)); ++ inode->i_nlink = le32_to_cpu(get_unaligned(&sd_lw->nlink)); ++ inode->i_size = le64_to_cpu(get_unaligned(&sd_lw->size)); ++ if ((inode->i_mode & S_IFMT) == (S_IFREG | S_IFIFO)) { ++ inode->i_mode &= ~S_IFIFO; ++ warning("", "partially converted file is encountered"); ++ reiser4_inode_set_flag(inode, REISER4_PART_MIXED); ++ } ++ move_on(len, area, sizeof *sd_lw); ++ return 0; ++ } else ++ return not_enough_space(inode, "lw sd"); ++} ++ ++static int save_len_lw_sd(struct inode *inode UNUSED_ARG /* object being ++ * processed */ ) ++{ ++ return sizeof(reiser4_light_weight_stat); ++} ++ ++static int save_lw_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ ) ++{ ++ reiser4_light_weight_stat *sd; ++ mode_t delta; ++ ++ assert("nikita-2705", inode != NULL); ++ assert("nikita-2706", area != NULL); ++ assert("nikita-2707", *area != NULL); ++ ++ sd = (reiser4_light_weight_stat *) * area; ++ ++ delta = (reiser4_inode_get_flag(inode, ++ REISER4_PART_MIXED) ? S_IFIFO : 0); ++ put_unaligned(cpu_to_le16(inode->i_mode | delta), &sd->mode); ++ put_unaligned(cpu_to_le32(inode->i_nlink), &sd->nlink); ++ put_unaligned(cpu_to_le64((__u64) inode->i_size), &sd->size); ++ *area += sizeof *sd; ++ return 0; ++} ++ ++static int present_unix_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ , ++ int *len /* remaining length */ ) ++{ ++ assert("nikita-637", inode != NULL); ++ assert("nikita-638", area != NULL); ++ assert("nikita-639", *area != NULL); ++ assert("nikita-640", len != NULL); ++ assert("nikita-641", *len > 0); ++ ++ if (*len >= (int)sizeof(reiser4_unix_stat)) { ++ reiser4_unix_stat *sd; ++ ++ sd = (reiser4_unix_stat *) * area; ++ ++ inode->i_uid = le32_to_cpu(get_unaligned(&sd->uid)); ++ inode->i_gid = le32_to_cpu(get_unaligned(&sd->gid)); ++ inode->i_atime.tv_sec = le32_to_cpu(get_unaligned(&sd->atime)); ++ inode->i_mtime.tv_sec = le32_to_cpu(get_unaligned(&sd->mtime)); ++ inode->i_ctime.tv_sec = le32_to_cpu(get_unaligned(&sd->ctime)); ++ if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) ++ inode->i_rdev = le64_to_cpu(get_unaligned(&sd->u.rdev)); ++ else ++ inode_set_bytes(inode, (loff_t) le64_to_cpu(get_unaligned(&sd->u.bytes))); ++ move_on(len, area, sizeof *sd); ++ return 0; ++ } else ++ return not_enough_space(inode, "unix sd"); ++} ++ ++static int absent_unix_sd(struct inode *inode /* object being processed */ ) ++{ ++ inode->i_uid = get_super_private(inode->i_sb)->default_uid; ++ inode->i_gid = get_super_private(inode->i_sb)->default_gid; ++ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; ++ inode_set_bytes(inode, inode->i_size); ++ /* mark inode as lightweight, so that caller (lookup_common) will ++ complete initialisation by copying [ug]id from a parent. */ ++ reiser4_inode_set_flag(inode, REISER4_LIGHT_WEIGHT); ++ return 0; ++} ++ ++/* Audited by: green(2002.06.14) */ ++static int save_len_unix_sd(struct inode *inode UNUSED_ARG /* object being ++ * processed */ ) ++{ ++ return sizeof(reiser4_unix_stat); ++} ++ ++static int save_unix_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ ) ++{ ++ reiser4_unix_stat *sd; ++ ++ assert("nikita-642", inode != NULL); ++ assert("nikita-643", area != NULL); ++ assert("nikita-644", *area != NULL); ++ ++ sd = (reiser4_unix_stat *) * area; ++ put_unaligned(cpu_to_le32(inode->i_uid), &sd->uid); ++ put_unaligned(cpu_to_le32(inode->i_gid), &sd->gid); ++ put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_sec), &sd->atime); ++ put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_sec), &sd->ctime); ++ put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_sec), &sd->mtime); ++ if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) ++ put_unaligned(cpu_to_le64(inode->i_rdev), &sd->u.rdev); ++ else ++ put_unaligned(cpu_to_le64((__u64) inode_get_bytes(inode)), &sd->u.bytes); ++ *area += sizeof *sd; ++ return 0; ++} ++ ++static int ++present_large_times_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ , ++ int *len /* remaining length */ ) ++{ ++ if (*len >= (int)sizeof(reiser4_large_times_stat)) { ++ reiser4_large_times_stat *sd_lt; ++ ++ sd_lt = (reiser4_large_times_stat *) * area; ++ ++ inode->i_atime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->atime)); ++ inode->i_mtime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->mtime)); ++ inode->i_ctime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->ctime)); ++ ++ move_on(len, area, sizeof *sd_lt); ++ return 0; ++ } else ++ return not_enough_space(inode, "large times sd"); ++} ++ ++static int ++save_len_large_times_sd(struct inode *inode UNUSED_ARG ++ /* object being processed */ ) ++{ ++ return sizeof(reiser4_large_times_stat); ++} ++ ++static int ++save_large_times_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ ) ++{ ++ reiser4_large_times_stat *sd; ++ ++ assert("nikita-2817", inode != NULL); ++ assert("nikita-2818", area != NULL); ++ assert("nikita-2819", *area != NULL); ++ ++ sd = (reiser4_large_times_stat *) * area; ++ ++ put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_nsec), &sd->atime); ++ put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_nsec), &sd->ctime); ++ put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_nsec), &sd->mtime); ++ ++ *area += sizeof *sd; ++ return 0; ++} ++ ++/* symlink stat data extension */ ++ ++/* allocate memory for symlink target and attach it to inode->i_private */ ++static int ++symlink_target_to_inode(struct inode *inode, const char *target, int len) ++{ ++ assert("vs-845", inode->i_private == NULL); ++ assert("vs-846", !reiser4_inode_get_flag(inode, ++ REISER4_GENERIC_PTR_USED)); ++ /* FIXME-VS: this is prone to deadlock. Not more than other similar ++ places, though */ ++ inode->i_private = kmalloc((size_t) len + 1, ++ reiser4_ctx_gfp_mask_get()); ++ if (!inode->i_private) ++ return RETERR(-ENOMEM); ++ ++ memcpy((char *)(inode->i_private), target, (size_t) len); ++ ((char *)(inode->i_private))[len] = 0; ++ reiser4_inode_set_flag(inode, REISER4_GENERIC_PTR_USED); ++ return 0; ++} ++ ++/* this is called on read_inode. There is nothing to do actually, but some ++ sanity checks */ ++static int present_symlink_sd(struct inode *inode, char **area, int *len) ++{ ++ int result; ++ int length; ++ reiser4_symlink_stat *sd; ++ ++ length = (int)inode->i_size; ++ /* ++ * *len is number of bytes in stat data item from *area to the end of ++ * item. It must be not less than size of symlink + 1 for ending 0 ++ */ ++ if (length > *len) ++ return not_enough_space(inode, "symlink"); ++ ++ if (*(*area + length) != 0) { ++ warning("vs-840", "Symlink is not zero terminated"); ++ return RETERR(-EIO); ++ } ++ ++ sd = (reiser4_symlink_stat *) * area; ++ result = symlink_target_to_inode(inode, sd->body, length); ++ ++ move_on(len, area, length + 1); ++ return result; ++} ++ ++static int save_len_symlink_sd(struct inode *inode) ++{ ++ return inode->i_size + 1; ++} ++ ++/* this is called on create and update stat data. Do nothing on update but ++ update @area */ ++static int save_symlink_sd(struct inode *inode, char **area) ++{ ++ int result; ++ int length; ++ reiser4_symlink_stat *sd; ++ ++ length = (int)inode->i_size; ++ /* inode->i_size must be set already */ ++ assert("vs-841", length); ++ ++ result = 0; ++ sd = (reiser4_symlink_stat *) * area; ++ if (!reiser4_inode_get_flag(inode, REISER4_GENERIC_PTR_USED)) { ++ const char *target; ++ ++ target = (const char *)(inode->i_private); ++ inode->i_private = NULL; ++ ++ result = symlink_target_to_inode(inode, target, length); ++ ++ /* copy symlink to stat data */ ++ memcpy(sd->body, target, (size_t) length); ++ (*area)[length] = 0; ++ } else { ++ /* there is nothing to do in update but move area */ ++ assert("vs-844", ++ !memcmp(inode->i_private, sd->body, ++ (size_t) length + 1)); ++ } ++ ++ *area += (length + 1); ++ return result; ++} ++ ++static int present_flags_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ , ++ int *len /* remaining length */ ) ++{ ++ assert("nikita-645", inode != NULL); ++ assert("nikita-646", area != NULL); ++ assert("nikita-647", *area != NULL); ++ assert("nikita-648", len != NULL); ++ assert("nikita-649", *len > 0); ++ ++ if (*len >= (int)sizeof(reiser4_flags_stat)) { ++ reiser4_flags_stat *sd; ++ ++ sd = (reiser4_flags_stat *) * area; ++ inode->i_flags = le32_to_cpu(get_unaligned(&sd->flags)); ++ move_on(len, area, sizeof *sd); ++ return 0; ++ } else ++ return not_enough_space(inode, "generation and attrs"); ++} ++ ++/* Audited by: green(2002.06.14) */ ++static int save_len_flags_sd(struct inode *inode UNUSED_ARG /* object being ++ * processed */ ) ++{ ++ return sizeof(reiser4_flags_stat); ++} ++ ++static int save_flags_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ ) ++{ ++ reiser4_flags_stat *sd; ++ ++ assert("nikita-650", inode != NULL); ++ assert("nikita-651", area != NULL); ++ assert("nikita-652", *area != NULL); ++ ++ sd = (reiser4_flags_stat *) * area; ++ put_unaligned(cpu_to_le32(inode->i_flags), &sd->flags); ++ *area += sizeof *sd; ++ return 0; ++} ++ ++static int absent_plugin_sd(struct inode *inode); ++static int present_plugin_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */ , ++ int *len /* remaining length */, ++ int is_pset /* 1 if plugin set, 0 if heir set. */) ++{ ++ reiser4_plugin_stat *sd; ++ reiser4_plugin *plugin; ++ reiser4_inode *info; ++ int i; ++ __u16 mask; ++ int result; ++ int num_of_plugins; ++ ++ assert("nikita-653", inode != NULL); ++ assert("nikita-654", area != NULL); ++ assert("nikita-655", *area != NULL); ++ assert("nikita-656", len != NULL); ++ assert("nikita-657", *len > 0); ++ ++ if (*len < (int)sizeof(reiser4_plugin_stat)) ++ return not_enough_space(inode, "plugin"); ++ ++ sd = (reiser4_plugin_stat *) * area; ++ info = reiser4_inode_data(inode); ++ ++ mask = 0; ++ num_of_plugins = le16_to_cpu(get_unaligned(&sd->plugins_no)); ++ move_on(len, area, sizeof *sd); ++ result = 0; ++ for (i = 0; i < num_of_plugins; ++i) { ++ reiser4_plugin_slot *slot; ++ reiser4_plugin_type type; ++ pset_member memb; ++ ++ slot = (reiser4_plugin_slot *) * area; ++ if (*len < (int)sizeof *slot) ++ return not_enough_space(inode, "additional plugin"); ++ ++ memb = le16_to_cpu(get_unaligned(&slot->pset_memb)); ++ type = aset_member_to_type_unsafe(memb); ++ ++ if (type == REISER4_PLUGIN_TYPES) { ++ warning("nikita-3502", ++ "wrong %s member (%i) for %llu", is_pset ? ++ "pset" : "hset", memb, ++ (unsigned long long)get_inode_oid(inode)); ++ return RETERR(-EINVAL); ++ } ++ plugin = plugin_by_disk_id(reiser4_tree_by_inode(inode), ++ type, &slot->id); ++ if (plugin == NULL) ++ return unknown_plugin(le16_to_cpu(get_unaligned(&slot->id)), inode); ++ ++ /* plugin is loaded into inode, mark this into inode's ++ bitmask of loaded non-standard plugins */ ++ if (!(mask & (1 << memb))) { ++ mask |= (1 << memb); ++ } else { ++ warning("nikita-658", "duplicate plugin for %llu", ++ (unsigned long long)get_inode_oid(inode)); ++ return RETERR(-EINVAL); ++ } ++ move_on(len, area, sizeof *slot); ++ /* load plugin data, if any */ ++ if (plugin->h.pops != NULL && plugin->h.pops->load) ++ result = plugin->h.pops->load(inode, plugin, area, len); ++ else ++ result = aset_set_unsafe(is_pset ? &info->pset : ++ &info->hset, memb, plugin); ++ if (result) ++ return result; ++ } ++ if (is_pset) { ++ /* if object plugin wasn't loaded from stat-data, guess it by ++ mode bits */ ++ plugin = file_plugin_to_plugin(inode_file_plugin(inode)); ++ if (plugin == NULL) ++ result = absent_plugin_sd(inode); ++ info->plugin_mask = mask; ++ } else ++ info->heir_mask = mask; ++ ++ return result; ++} ++ ++static int present_pset_sd(struct inode *inode, char **area, int *len) { ++ return present_plugin_sd(inode, area, len, 1 /* pset */); ++} ++ ++/* Determine object plugin for @inode based on i_mode. ++ ++ Many objects in reiser4 file system are controlled by standard object ++ plugins that emulate traditional unix objects: unix file, directory, symlink, fifo, and so on. ++ ++ For such files we don't explicitly store plugin id in object stat ++ data. Rather required plugin is guessed from mode bits, where file "type" ++ is encoded (see stat(2)). ++*/ ++static int ++guess_plugin_by_mode(struct inode *inode /* object to guess plugins for */ ) ++{ ++ int fplug_id; ++ int dplug_id; ++ reiser4_inode *info; ++ ++ assert("nikita-736", inode != NULL); ++ ++ dplug_id = fplug_id = -1; ++ ++ switch (inode->i_mode & S_IFMT) { ++ case S_IFSOCK: ++ case S_IFBLK: ++ case S_IFCHR: ++ case S_IFIFO: ++ fplug_id = SPECIAL_FILE_PLUGIN_ID; ++ break; ++ case S_IFLNK: ++ fplug_id = SYMLINK_FILE_PLUGIN_ID; ++ break; ++ case S_IFDIR: ++ fplug_id = DIRECTORY_FILE_PLUGIN_ID; ++ dplug_id = HASHED_DIR_PLUGIN_ID; ++ break; ++ default: ++ warning("nikita-737", "wrong file mode: %o", inode->i_mode); ++ return RETERR(-EIO); ++ case S_IFREG: ++ fplug_id = UNIX_FILE_PLUGIN_ID; ++ break; ++ } ++ info = reiser4_inode_data(inode); ++ set_plugin(&info->pset, PSET_FILE, (fplug_id >= 0) ? ++ plugin_by_id(REISER4_FILE_PLUGIN_TYPE, fplug_id) : NULL); ++ set_plugin(&info->pset, PSET_DIR, (dplug_id >= 0) ? ++ plugin_by_id(REISER4_DIR_PLUGIN_TYPE, dplug_id) : NULL); ++ return 0; ++} ++ ++/* Audited by: green(2002.06.14) */ ++static int absent_plugin_sd(struct inode *inode /* object being processed */ ) ++{ ++ int result; ++ ++ assert("nikita-659", inode != NULL); ++ ++ result = guess_plugin_by_mode(inode); ++ /* if mode was wrong, guess_plugin_by_mode() returns "regular file", ++ but setup_inode_ops() will call make_bad_inode(). ++ Another, more logical but bit more complex solution is to add ++ "bad-file plugin". */ ++ /* FIXME-VS: activate was called here */ ++ return result; ++} ++ ++/* helper function for plugin_sd_save_len(): calculate how much space ++ required to save state of given plugin */ ++/* Audited by: green(2002.06.14) */ ++static int len_for(reiser4_plugin * plugin /* plugin to save */ , ++ struct inode *inode /* object being processed */ , ++ pset_member memb, ++ int len, int is_pset) ++{ ++ reiser4_inode *info; ++ assert("nikita-661", inode != NULL); ++ ++ if (plugin == NULL) ++ return len; ++ ++ info = reiser4_inode_data(inode); ++ if (is_pset ? ++ info->plugin_mask & (1 << memb) : ++ info->heir_mask & (1 << memb)) { ++ len += sizeof(reiser4_plugin_slot); ++ if (plugin->h.pops && plugin->h.pops->save_len != NULL) { ++ /* non-standard plugin, call method */ ++ /* commented as it is incompatible with alignment ++ * policy in save_plug() -edward */ ++ /* len = round_up(len, plugin->h.pops->alignment); */ ++ len += plugin->h.pops->save_len(inode, plugin); ++ } ++ } ++ return len; ++} ++ ++/* calculate how much space is required to save state of all plugins, ++ associated with inode */ ++static int save_len_plugin_sd(struct inode *inode /* object being processed */, ++ int is_pset) ++{ ++ int len; ++ int last; ++ reiser4_inode *state; ++ pset_member memb; ++ ++ assert("nikita-663", inode != NULL); ++ ++ state = reiser4_inode_data(inode); ++ ++ /* common case: no non-standard plugins */ ++ if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0) ++ return 0; ++ len = sizeof(reiser4_plugin_stat); ++ last = PSET_LAST; ++ ++ for (memb = 0; memb < last; ++memb) { ++ len = len_for(aset_get(is_pset ? state->pset : state->hset, memb), ++ inode, memb, len, is_pset); ++ } ++ assert("nikita-664", len > (int)sizeof(reiser4_plugin_stat)); ++ return len; ++} ++ ++static int save_len_pset_sd(struct inode *inode) { ++ return save_len_plugin_sd(inode, 1 /* pset */); ++} ++ ++/* helper function for plugin_sd_save(): save plugin, associated with ++ inode. */ ++static int save_plug(reiser4_plugin * plugin /* plugin to save */ , ++ struct inode *inode /* object being processed */ , ++ int memb /* what element of pset is saved */ , ++ char **area /* position in stat-data */ , ++ int *count /* incremented if plugin were actually saved. */, ++ int is_pset /* 1 for plugin set, 0 for heir set */) ++{ ++ reiser4_plugin_slot *slot; ++ int fake_len; ++ int result; ++ ++ assert("nikita-665", inode != NULL); ++ assert("nikita-666", area != NULL); ++ assert("nikita-667", *area != NULL); ++ ++ if (plugin == NULL) ++ return 0; ++ ++ if (is_pset ? ++ !(reiser4_inode_data(inode)->plugin_mask & (1 << memb)) : ++ !(reiser4_inode_data(inode)->heir_mask & (1 << memb))) ++ return 0; ++ slot = (reiser4_plugin_slot *) * area; ++ put_unaligned(cpu_to_le16(memb), &slot->pset_memb); ++ put_unaligned(cpu_to_le16(plugin->h.id), &slot->id); ++ fake_len = (int)0xffff; ++ move_on(&fake_len, area, sizeof *slot); ++ ++*count; ++ result = 0; ++ if (plugin->h.pops != NULL) { ++ if (plugin->h.pops->save != NULL) ++ result = plugin->h.pops->save(inode, plugin, area); ++ } ++ return result; ++} ++ ++/* save state of all non-standard plugins associated with inode */ ++static int save_plugin_sd(struct inode *inode /* object being processed */ , ++ char **area /* position in stat-data */, ++ int is_pset /* 1 for pset, 0 for hset */) ++{ ++ int fake_len; ++ int result = 0; ++ int num_of_plugins; ++ reiser4_plugin_stat *sd; ++ reiser4_inode *state; ++ pset_member memb; ++ ++ assert("nikita-669", inode != NULL); ++ assert("nikita-670", area != NULL); ++ assert("nikita-671", *area != NULL); ++ ++ state = reiser4_inode_data(inode); ++ if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0) ++ return 0; ++ sd = (reiser4_plugin_stat *) * area; ++ fake_len = (int)0xffff; ++ move_on(&fake_len, area, sizeof *sd); ++ ++ num_of_plugins = 0; ++ for (memb = 0; memb < PSET_LAST; ++memb) { ++ result = save_plug(aset_get(is_pset ? state->pset : state->hset, ++ memb), ++ inode, memb, area, &num_of_plugins, is_pset); ++ if (result != 0) ++ break; ++ } ++ ++ put_unaligned(cpu_to_le16((__u16)num_of_plugins), &sd->plugins_no); ++ return result; ++} ++ ++static int save_pset_sd(struct inode *inode, char **area) { ++ return save_plugin_sd(inode, area, 1 /* pset */); ++} ++ ++static int present_hset_sd(struct inode *inode, char **area, int *len) { ++ return present_plugin_sd(inode, area, len, 0 /* hset */); ++} ++ ++static int save_len_hset_sd(struct inode *inode) { ++ return save_len_plugin_sd(inode, 0 /* pset */); ++} ++ ++static int save_hset_sd(struct inode *inode, char **area) { ++ return save_plugin_sd(inode, area, 0 /* hset */); ++} ++ ++/* helper function for crypto_sd_present(), crypto_sd_save. ++ Allocates memory for crypto stat, keyid and attaches it to the inode */ ++static int extract_crypto_stat (struct inode * inode, ++ reiser4_crypto_stat * sd) ++{ ++ crypto_stat_t * info; ++ assert("edward-11", !inode_crypto_stat(inode)); ++ assert("edward-1413", ++ !reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)); ++ /* create and attach a crypto-stat without secret key loaded */ ++ info = reiser4_alloc_crypto_stat(inode); ++ if (IS_ERR(info)) ++ return PTR_ERR(info); ++ info->keysize = le16_to_cpu(get_unaligned(&sd->keysize)); ++ memcpy(info->keyid, sd->keyid, inode_digest_plugin(inode)->fipsize); ++ reiser4_attach_crypto_stat(inode, info); ++ reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED); ++ return 0; ++} ++ ++/* crypto stat-data extension */ ++ ++static int present_crypto_sd(struct inode *inode, char **area, int *len) ++{ ++ int result; ++ reiser4_crypto_stat *sd; ++ digest_plugin *dplug = inode_digest_plugin(inode); ++ ++ assert("edward-06", dplug != NULL); ++ assert("edward-684", dplug->fipsize); ++ assert("edward-07", area != NULL); ++ assert("edward-08", *area != NULL); ++ assert("edward-09", len != NULL); ++ assert("edward-10", *len > 0); ++ ++ if (*len < (int)sizeof(reiser4_crypto_stat)) { ++ return not_enough_space(inode, "crypto-sd"); ++ } ++ /* *len is number of bytes in stat data item from *area to the end of ++ item. It must be not less than size of this extension */ ++ assert("edward-75", sizeof(*sd) + dplug->fipsize <= *len); ++ ++ sd = (reiser4_crypto_stat *) * area; ++ result = extract_crypto_stat(inode, sd); ++ move_on(len, area, sizeof(*sd) + dplug->fipsize); ++ ++ return result; ++} ++ ++static int save_len_crypto_sd(struct inode *inode) ++{ ++ return sizeof(reiser4_crypto_stat) + ++ inode_digest_plugin(inode)->fipsize; ++} ++ ++static int save_crypto_sd(struct inode *inode, char **area) ++{ ++ int result = 0; ++ reiser4_crypto_stat *sd; ++ crypto_stat_t * info = inode_crypto_stat(inode); ++ digest_plugin *dplug = inode_digest_plugin(inode); ++ ++ assert("edward-12", dplug != NULL); ++ assert("edward-13", area != NULL); ++ assert("edward-14", *area != NULL); ++ assert("edward-15", info != NULL); ++ assert("edward-1414", info->keyid != NULL); ++ assert("edward-1415", info->keysize != 0); ++ assert("edward-76", reiser4_inode_data(inode) != NULL); ++ ++ if (!reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) { ++ /* file is just created */ ++ sd = (reiser4_crypto_stat *) *area; ++ /* copy everything but private key to the disk stat-data */ ++ put_unaligned(cpu_to_le16(info->keysize), &sd->keysize); ++ memcpy(sd->keyid, info->keyid, (size_t) dplug->fipsize); ++ reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED); ++ } ++ *area += (sizeof(*sd) + dplug->fipsize); ++ return result; ++} ++ ++static int eio(struct inode *inode, char **area, int *len) ++{ ++ return RETERR(-EIO); ++} ++ ++sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION] = { ++ [LIGHT_WEIGHT_STAT] = { ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = LIGHT_WEIGHT_STAT, ++ .pops = NULL, ++ .label = "light-weight sd", ++ .desc = "sd for light-weight files", ++ .linkage = {NULL,NULL} ++ }, ++ .present = present_lw_sd, ++ .absent = NULL, ++ .save_len = save_len_lw_sd, ++ .save = save_lw_sd, ++ .alignment = 8 ++ }, ++ [UNIX_STAT] = { ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = UNIX_STAT, ++ .pops = NULL, ++ .label = "unix-sd", ++ .desc = "unix stat-data fields", ++ .linkage = {NULL,NULL} ++ }, ++ .present = present_unix_sd, ++ .absent = absent_unix_sd, ++ .save_len = save_len_unix_sd, ++ .save = save_unix_sd, ++ .alignment = 8 ++ }, ++ [LARGE_TIMES_STAT] = { ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = LARGE_TIMES_STAT, ++ .pops = NULL, ++ .label = "64time-sd", ++ .desc = "nanosecond resolution for times", ++ .linkage = {NULL,NULL} ++ }, ++ .present = present_large_times_sd, ++ .absent = NULL, ++ .save_len = save_len_large_times_sd, ++ .save = save_large_times_sd, ++ .alignment = 8 ++ }, ++ [SYMLINK_STAT] = { ++ /* stat data of symlink has this extension */ ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = SYMLINK_STAT, ++ .pops = NULL, ++ .label = "symlink-sd", ++ .desc = ++ "stat data is appended with symlink name", ++ .linkage = {NULL,NULL} ++ }, ++ .present = present_symlink_sd, ++ .absent = NULL, ++ .save_len = save_len_symlink_sd, ++ .save = save_symlink_sd, ++ .alignment = 8 ++ }, ++ [PLUGIN_STAT] = { ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = PLUGIN_STAT, ++ .pops = NULL, ++ .label = "plugin-sd", ++ .desc = "plugin stat-data fields", ++ .linkage = {NULL,NULL} ++ }, ++ .present = present_pset_sd, ++ .absent = absent_plugin_sd, ++ .save_len = save_len_pset_sd, ++ .save = save_pset_sd, ++ .alignment = 8 ++ }, ++ [HEIR_STAT] = { ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = HEIR_STAT, ++ .pops = NULL, ++ .label = "heir-plugin-sd", ++ .desc = "heir plugin stat-data fields", ++ .linkage = {NULL,NULL} ++ }, ++ .present = present_hset_sd, ++ .absent = NULL, ++ .save_len = save_len_hset_sd, ++ .save = save_hset_sd, ++ .alignment = 8 ++ }, ++ [FLAGS_STAT] = { ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = FLAGS_STAT, ++ .pops = NULL, ++ .label = "flags-sd", ++ .desc = "inode bit flags", ++ .linkage = {NULL, NULL} ++ }, ++ .present = present_flags_sd, ++ .absent = NULL, ++ .save_len = save_len_flags_sd, ++ .save = save_flags_sd, ++ .alignment = 8 ++ }, ++ [CAPABILITIES_STAT] = { ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = CAPABILITIES_STAT, ++ .pops = NULL, ++ .label = "capabilities-sd", ++ .desc = "capabilities", ++ .linkage = {NULL, NULL} ++ }, ++ .present = eio, ++ .absent = NULL, ++ .save_len = save_len_flags_sd, ++ .save = save_flags_sd, ++ .alignment = 8 ++ }, ++ [CRYPTO_STAT] = { ++ .h = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .id = CRYPTO_STAT, ++ .pops = NULL, ++ .label = "crypto-sd", ++ .desc = "secret key size and id", ++ .linkage = {NULL, NULL} ++ }, ++ .present = present_crypto_sd, ++ .absent = NULL, ++ .save_len = save_len_crypto_sd, ++ .save = save_crypto_sd, ++ .alignment = 8 ++ } ++}; ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/static_stat.h b/fs/reiser4/plugin/item/static_stat.h +new file mode 100644 +index 0000000..dd20eb3 +--- /dev/null ++++ b/fs/reiser4/plugin/item/static_stat.h +@@ -0,0 +1,224 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* This describes the static_stat item, used to hold all information needed by the stat() syscall. ++ ++In the case where each file has not less than the fields needed by the ++stat() syscall, it is more compact to store those fields in this ++struct. ++ ++If this item does not exist, then all stats are dynamically resolved. ++At the moment, we either resolve all stats dynamically or all of them ++statically. If you think this is not fully optimal, and the rest of ++reiser4 is working, then fix it...:-) ++ ++*/ ++ ++#if !defined( __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ ) ++#define __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ ++ ++#include "../../forward.h" ++#include "../../dformat.h" ++ ++#include /* for struct inode */ ++ ++/* Stat data layout: goals and implementation. ++ ++ We want to be able to have lightweight files which have complete flexibility in what semantic metadata is attached to ++ them, including not having semantic metadata attached to them. ++ ++ There is one problem with doing that, which is that if in fact you have exactly the same metadata for most files you ++ want to store, then it takes more space to store that metadata in a dynamically sized structure than in a statically ++ sized structure because the statically sized structure knows without recording it what the names and lengths of the ++ attributes are. ++ ++ This leads to a natural compromise, which is to special case those files which have simply the standard unix file ++ attributes, and only employ the full dynamic stat data mechanism for those files that differ from the standard unix ++ file in their use of file attributes. ++ ++ Yet this compromise deserves to be compromised a little. ++ ++ We accommodate the case where you have no more than the standard unix file attributes by using an "extension ++ bitmask": each bit in it indicates presence or absence of or particular stat data extension (see sd_ext_bits enum). ++ ++ If the first bit of the extension bitmask bit is 0, we have light-weight file whose attributes are either inherited ++ from parent directory (as uid, gid) or initialised to some sane values. ++ ++ To capitalize on existing code infrastructure, extensions are ++ implemented as plugins of type REISER4_SD_EXT_PLUGIN_TYPE. ++ Each stat-data extension plugin implements four methods: ++ ++ ->present() called by sd_load() when this extension is found in stat-data ++ ->absent() called by sd_load() when this extension is not found in stat-data ++ ->save_len() called by sd_len() to calculate total length of stat-data ++ ->save() called by sd_save() to store extension data into stat-data ++ ++ Implementation is in fs/reiser4/plugin/item/static_stat.c ++*/ ++ ++/* stat-data extension. Please order this by presumed frequency of use */ ++typedef enum { ++ /* support for light-weight files */ ++ LIGHT_WEIGHT_STAT, ++ /* data required to implement unix stat(2) call. Layout is in ++ reiser4_unix_stat. If this is not present, file is light-weight */ ++ UNIX_STAT, ++ /* this contains additional set of 32bit [anc]time fields to implement ++ nanosecond resolution. Layout is in reiser4_large_times_stat. Usage ++ if this extension is governed by 32bittimes mount option. */ ++ LARGE_TIMES_STAT, ++ /* stat data has link name included */ ++ SYMLINK_STAT, ++ /* on-disk slots of non-standard plugins for main plugin table ++ (@reiser4_inode->pset), that is, plugins that cannot be deduced ++ from file mode bits), for example, aggregation, interpolation etc. */ ++ PLUGIN_STAT, ++ /* this extension contains persistent inode flags. These flags are ++ single bits: immutable, append, only, etc. Layout is in ++ reiser4_flags_stat. */ ++ FLAGS_STAT, ++ /* this extension contains capabilities sets, associated with this ++ file. Layout is in reiser4_capabilities_stat */ ++ CAPABILITIES_STAT, ++ /* this extension contains size and public id of the secret key. ++ Layout is in reiser4_crypto_stat */ ++ CRYPTO_STAT, ++ /* on-disk slots of non-default plugins for inheritance, which ++ are extracted to special plugin table (@reiser4_inode->hset). ++ By default, children of the object will inherit plugins from ++ its main plugin table (pset). */ ++ HEIR_STAT, ++ LAST_SD_EXTENSION, ++ /* ++ * init_inode_static_sd() iterates over extension mask until all ++ * non-zero bits are processed. This means, that neither ->present(), ++ * nor ->absent() methods will be called for stat-data extensions that ++ * go after last present extension. But some basic extensions, we want ++ * either ->absent() or ->present() method to be called, because these ++ * extensions set up something in inode even when they are not ++ * present. This is what LAST_IMPORTANT_SD_EXTENSION is for: for all ++ * extensions before and including LAST_IMPORTANT_SD_EXTENSION either ++ * ->present(), or ->absent() method will be called, independently of ++ * what other extensions are present. ++ */ ++ LAST_IMPORTANT_SD_EXTENSION = PLUGIN_STAT ++} sd_ext_bits; ++ ++/* minimal stat-data. This allows to support light-weight files. */ ++typedef struct reiser4_stat_data_base { ++ /* 0 */ __le16 extmask; ++ /* 2 */ ++} PACKED reiser4_stat_data_base; ++ ++typedef struct reiser4_light_weight_stat { ++ /* 0 */ __le16 mode; ++ /* 2 */ __le32 nlink; ++ /* 6 */ __le64 size; ++ /* size in bytes */ ++ /* 14 */ ++} PACKED reiser4_light_weight_stat; ++ ++typedef struct reiser4_unix_stat { ++ /* owner id */ ++ /* 0 */ __le32 uid; ++ /* group id */ ++ /* 4 */ __le32 gid; ++ /* access time */ ++ /* 8 */ __le32 atime; ++ /* modification time */ ++ /* 12 */ __le32 mtime; ++ /* change time */ ++ /* 16 */ __le32 ctime; ++ union { ++ /* minor:major for device files */ ++ /* 20 */ __le64 rdev; ++ /* bytes used by file */ ++ /* 20 */ __le64 bytes; ++ } u; ++ /* 28 */ ++} PACKED reiser4_unix_stat; ++ ++/* symlink stored as part of inode */ ++typedef struct reiser4_symlink_stat { ++ char body[0]; ++} PACKED reiser4_symlink_stat; ++ ++typedef struct reiser4_plugin_slot { ++ /* 0 */ __le16 pset_memb; ++ /* 2 */ __le16 id; ++ /* 4 *//* here plugin stores its persistent state */ ++} PACKED reiser4_plugin_slot; ++ ++/* stat-data extension for files with non-standard plugin. */ ++typedef struct reiser4_plugin_stat { ++ /* number of additional plugins, associated with this object */ ++ /* 0 */ __le16 plugins_no; ++ /* 2 */ reiser4_plugin_slot slot[0]; ++ /* 2 */ ++} PACKED reiser4_plugin_stat; ++ ++/* stat-data extension for inode flags. Currently it is just fixed-width 32 ++ * bit mask. If need arise, this can be replaced with variable width ++ * bitmask. */ ++typedef struct reiser4_flags_stat { ++ /* 0 */ __le32 flags; ++ /* 4 */ ++} PACKED reiser4_flags_stat; ++ ++typedef struct reiser4_capabilities_stat { ++ /* 0 */ __le32 effective; ++ /* 8 */ __le32 permitted; ++ /* 16 */ ++} PACKED reiser4_capabilities_stat; ++ ++typedef struct reiser4_cluster_stat { ++/* this defines cluster size (an attribute of cryptcompress objects) as PAGE_SIZE << cluster shift */ ++ /* 0 */ d8 cluster_shift; ++ /* 1 */ ++} PACKED reiser4_cluster_stat; ++ ++typedef struct reiser4_crypto_stat { ++ /* secret key size, bits */ ++ /* 0 */ d16 keysize; ++ /* secret key id */ ++ /* 2 */ d8 keyid[0]; ++ /* 2 */ ++} PACKED reiser4_crypto_stat; ++ ++typedef struct reiser4_large_times_stat { ++ /* access time */ ++ /* 0 */ d32 atime; ++ /* modification time */ ++ /* 4 */ d32 mtime; ++ /* change time */ ++ /* 8 */ d32 ctime; ++ /* 12 */ ++} PACKED reiser4_large_times_stat; ++ ++/* this structure is filled by sd_item_stat */ ++typedef struct sd_stat { ++ int dirs; ++ int files; ++ int others; ++} sd_stat; ++ ++/* plugin->item.common.* */ ++extern void print_sd(const char *prefix, coord_t * coord); ++extern void item_stat_static_sd(const coord_t * coord, void *vp); ++ ++/* plugin->item.s.sd.* */ ++extern int init_inode_static_sd(struct inode *inode, char *sd, int len); ++extern int save_len_static_sd(struct inode *inode); ++extern int save_static_sd(struct inode *inode, char **area); ++ ++/* __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/item/tail.c b/fs/reiser4/plugin/item/tail.c +new file mode 100644 +index 0000000..281dd36 +--- /dev/null ++++ b/fs/reiser4/plugin/item/tail.c +@@ -0,0 +1,812 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "item.h" ++#include "../../inode.h" ++#include "../../page_cache.h" ++#include "../../carry.h" ++#include "../../vfs_ops.h" ++ ++#include ++#include ++#include ++#include ++ ++/* plugin->u.item.b.max_key_inside */ ++reiser4_key *max_key_inside_tail(const coord_t *coord, reiser4_key *key) ++{ ++ item_key_by_coord(coord, key); ++ set_key_offset(key, get_key_offset(reiser4_max_key())); ++ return key; ++} ++ ++/* plugin->u.item.b.can_contain_key */ ++int can_contain_key_tail(const coord_t *coord, const reiser4_key *key, ++ const reiser4_item_data *data) ++{ ++ reiser4_key item_key; ++ ++ if (item_plugin_by_coord(coord) != data->iplug) ++ return 0; ++ ++ item_key_by_coord(coord, &item_key); ++ if (get_key_locality(key) != get_key_locality(&item_key) || ++ get_key_objectid(key) != get_key_objectid(&item_key)) ++ return 0; ++ ++ return 1; ++} ++ ++/* plugin->u.item.b.mergeable ++ first item is of tail type */ ++/* Audited by: green(2002.06.14) */ ++int mergeable_tail(const coord_t *p1, const coord_t *p2) ++{ ++ reiser4_key key1, key2; ++ ++ assert("vs-535", plugin_of_group(item_plugin_by_coord(p1), ++ UNIX_FILE_METADATA_ITEM_TYPE)); ++ assert("vs-365", item_id_by_coord(p1) == FORMATTING_ID); ++ ++ if (item_id_by_coord(p2) != FORMATTING_ID) { ++ /* second item is of another type */ ++ return 0; ++ } ++ ++ item_key_by_coord(p1, &key1); ++ item_key_by_coord(p2, &key2); ++ if (get_key_locality(&key1) != get_key_locality(&key2) || ++ get_key_objectid(&key1) != get_key_objectid(&key2) ++ || get_key_type(&key1) != get_key_type(&key2)) { ++ /* items of different objects */ ++ return 0; ++ } ++ if (get_key_offset(&key1) + nr_units_tail(p1) != get_key_offset(&key2)) { ++ /* not adjacent items */ ++ return 0; ++ } ++ return 1; ++} ++ ++/* plugin->u.item.b.print ++ plugin->u.item.b.check */ ++ ++/* plugin->u.item.b.nr_units */ ++pos_in_node_t nr_units_tail(const coord_t * coord) ++{ ++ return item_length_by_coord(coord); ++} ++ ++/* plugin->u.item.b.lookup */ ++lookup_result ++lookup_tail(const reiser4_key * key, lookup_bias bias, coord_t * coord) ++{ ++ reiser4_key item_key; ++ __u64 lookuped, offset; ++ unsigned nr_units; ++ ++ item_key_by_coord(coord, &item_key); ++ offset = get_key_offset(item_key_by_coord(coord, &item_key)); ++ nr_units = nr_units_tail(coord); ++ ++ /* key we are looking for must be greater than key of item @coord */ ++ assert("vs-416", keygt(key, &item_key)); ++ ++ /* offset we are looking for */ ++ lookuped = get_key_offset(key); ++ ++ if (lookuped >= offset && lookuped < offset + nr_units) { ++ /* byte we are looking for is in this item */ ++ coord->unit_pos = lookuped - offset; ++ coord->between = AT_UNIT; ++ return CBK_COORD_FOUND; ++ } ++ ++ /* set coord after last unit */ ++ coord->unit_pos = nr_units - 1; ++ coord->between = AFTER_UNIT; ++ return bias == ++ FIND_MAX_NOT_MORE_THAN ? CBK_COORD_FOUND : CBK_COORD_NOTFOUND; ++} ++ ++/* plugin->u.item.b.paste */ ++int ++paste_tail(coord_t *coord, reiser4_item_data *data, ++ carry_plugin_info *info UNUSED_ARG) ++{ ++ unsigned old_item_length; ++ char *item; ++ ++ /* length the item had before resizing has been performed */ ++ old_item_length = item_length_by_coord(coord) - data->length; ++ ++ /* tail items never get pasted in the middle */ ++ assert("vs-363", ++ (coord->unit_pos == 0 && coord->between == BEFORE_UNIT) || ++ (coord->unit_pos == old_item_length - 1 && ++ coord->between == AFTER_UNIT) || ++ (coord->unit_pos == 0 && old_item_length == 0 ++ && coord->between == AT_UNIT)); ++ ++ item = item_body_by_coord(coord); ++ if (coord->unit_pos == 0) ++ /* make space for pasted data when pasting at the beginning of ++ the item */ ++ memmove(item + data->length, item, old_item_length); ++ ++ if (coord->between == AFTER_UNIT) ++ coord->unit_pos++; ++ ++ if (data->data) { ++ assert("vs-554", data->user == 0 || data->user == 1); ++ if (data->user) { ++ assert("nikita-3035", reiser4_schedulable()); ++ /* copy from user space */ ++ if (__copy_from_user(item + coord->unit_pos, ++ (const char __user *)data->data, ++ (unsigned)data->length)) ++ return RETERR(-EFAULT); ++ } else ++ /* copy from kernel space */ ++ memcpy(item + coord->unit_pos, data->data, ++ (unsigned)data->length); ++ } else { ++ memset(item + coord->unit_pos, 0, (unsigned)data->length); ++ } ++ return 0; ++} ++ ++/* plugin->u.item.b.fast_paste */ ++ ++/* plugin->u.item.b.can_shift ++ number of units is returned via return value, number of bytes via @size. For ++ tail items they coincide */ ++int ++can_shift_tail(unsigned free_space, coord_t * source UNUSED_ARG, ++ znode * target UNUSED_ARG, shift_direction direction UNUSED_ARG, ++ unsigned *size, unsigned want) ++{ ++ /* make sure that that we do not want to shift more than we have */ ++ assert("vs-364", want > 0 ++ && want <= (unsigned)item_length_by_coord(source)); ++ ++ *size = min(want, free_space); ++ return *size; ++} ++ ++/* plugin->u.item.b.copy_units */ ++void ++copy_units_tail(coord_t * target, coord_t * source, ++ unsigned from, unsigned count, ++ shift_direction where_is_free_space, ++ unsigned free_space UNUSED_ARG) ++{ ++ /* make sure that item @target is expanded already */ ++ assert("vs-366", (unsigned)item_length_by_coord(target) >= count); ++ assert("vs-370", free_space >= count); ++ ++ if (where_is_free_space == SHIFT_LEFT) { ++ /* append item @target with @count first bytes of @source */ ++ assert("vs-365", from == 0); ++ ++ memcpy((char *)item_body_by_coord(target) + ++ item_length_by_coord(target) - count, ++ (char *)item_body_by_coord(source), count); ++ } else { ++ /* target item is moved to right already */ ++ reiser4_key key; ++ ++ assert("vs-367", ++ (unsigned)item_length_by_coord(source) == from + count); ++ ++ memcpy((char *)item_body_by_coord(target), ++ (char *)item_body_by_coord(source) + from, count); ++ ++ /* new units are inserted before first unit in an item, ++ therefore, we have to update item key */ ++ item_key_by_coord(source, &key); ++ set_key_offset(&key, get_key_offset(&key) + from); ++ ++ node_plugin_by_node(target->node)->update_item_key(target, &key, ++ NULL /*info */); ++ } ++} ++ ++/* plugin->u.item.b.create_hook */ ++ ++/* item_plugin->b.kill_hook ++ this is called when @count units starting from @from-th one are going to be removed ++ */ ++int ++kill_hook_tail(const coord_t * coord, pos_in_node_t from, ++ pos_in_node_t count, struct carry_kill_data *kdata) ++{ ++ reiser4_key key; ++ loff_t start, end; ++ ++ assert("vs-1577", kdata); ++ assert("vs-1579", kdata->inode); ++ ++ item_key_by_coord(coord, &key); ++ start = get_key_offset(&key) + from; ++ end = start + count; ++ fake_kill_hook_tail(kdata->inode, start, end, kdata->params.truncate); ++ return 0; ++} ++ ++/* plugin->u.item.b.shift_hook */ ++ ++/* helper for kill_units_tail and cut_units_tail */ ++static int ++do_cut_or_kill(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ reiser4_key * smallest_removed, reiser4_key * new_first) ++{ ++ pos_in_node_t count; ++ ++ /* this method is only called to remove part of item */ ++ assert("vs-374", (to - from + 1) < item_length_by_coord(coord)); ++ /* tails items are never cut from the middle of an item */ ++ assert("vs-396", ergo(from != 0, to == coord_last_unit_pos(coord))); ++ assert("vs-1558", ergo(from == 0, to < coord_last_unit_pos(coord))); ++ ++ count = to - from + 1; ++ ++ if (smallest_removed) { ++ /* store smallest key removed */ ++ item_key_by_coord(coord, smallest_removed); ++ set_key_offset(smallest_removed, ++ get_key_offset(smallest_removed) + from); ++ } ++ if (new_first) { ++ /* head of item is cut */ ++ assert("vs-1529", from == 0); ++ ++ item_key_by_coord(coord, new_first); ++ set_key_offset(new_first, ++ get_key_offset(new_first) + from + count); ++ } ++ ++ if (REISER4_DEBUG) ++ memset((char *)item_body_by_coord(coord) + from, 0, count); ++ return count; ++} ++ ++/* plugin->u.item.b.cut_units */ ++int ++cut_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ struct carry_cut_data *cdata UNUSED_ARG, ++ reiser4_key * smallest_removed, reiser4_key * new_first) ++{ ++ return do_cut_or_kill(coord, from, to, smallest_removed, new_first); ++} ++ ++/* plugin->u.item.b.kill_units */ ++int ++kill_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to, ++ struct carry_kill_data *kdata, reiser4_key * smallest_removed, ++ reiser4_key * new_first) ++{ ++ kill_hook_tail(coord, from, to - from + 1, kdata); ++ return do_cut_or_kill(coord, from, to, smallest_removed, new_first); ++} ++ ++/* plugin->u.item.b.unit_key */ ++reiser4_key *unit_key_tail(const coord_t * coord, reiser4_key * key) ++{ ++ assert("vs-375", coord_is_existing_unit(coord)); ++ ++ item_key_by_coord(coord, key); ++ set_key_offset(key, (get_key_offset(key) + coord->unit_pos)); ++ ++ return key; ++} ++ ++/* plugin->u.item.b.estimate ++ plugin->u.item.b.item_data_by_flow */ ++ ++/* tail redpage function. It is called from readpage_tail(). */ ++static int do_readpage_tail(uf_coord_t *uf_coord, struct page *page) ++{ ++ tap_t tap; ++ int result; ++ coord_t coord; ++ lock_handle lh; ++ int count, mapped; ++ struct inode *inode; ++ char *pagedata; ++ ++ /* saving passed coord in order to do not move it by tap. */ ++ init_lh(&lh); ++ copy_lh(&lh, uf_coord->lh); ++ inode = page->mapping->host; ++ coord_dup(&coord, &uf_coord->coord); ++ ++ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK); ++ ++ if ((result = reiser4_tap_load(&tap))) ++ goto out_tap_done; ++ ++ /* lookup until page is filled up. */ ++ for (mapped = 0; mapped < PAGE_CACHE_SIZE; ) { ++ /* number of bytes to be copied to page */ ++ count = item_length_by_coord(&coord) - coord.unit_pos; ++ if (count > PAGE_CACHE_SIZE - mapped) ++ count = PAGE_CACHE_SIZE - mapped; ++ ++ /* attach @page to address space and get data address */ ++ pagedata = kmap_atomic(page, KM_USER0); ++ ++ /* copy tail item to page */ ++ memcpy(pagedata + mapped, ++ ((char *)item_body_by_coord(&coord) + coord.unit_pos), ++ count); ++ mapped += count; ++ ++ flush_dcache_page(page); ++ ++ /* dettach page from address space */ ++ kunmap_atomic(pagedata, KM_USER0); ++ ++ /* Getting next tail item. */ ++ if (mapped < PAGE_CACHE_SIZE) { ++ /* ++ * unlock page in order to avoid keep it locked ++ * during tree lookup, which takes long term locks ++ */ ++ unlock_page(page); ++ ++ /* getting right neighbour. */ ++ result = go_dir_el(&tap, RIGHT_SIDE, 0); ++ ++ /* lock page back */ ++ lock_page(page); ++ if (PageUptodate(page)) { ++ /* ++ * another thread read the page, we have ++ * nothing to do ++ */ ++ result = 0; ++ goto out_unlock_page; ++ } ++ ++ if (result) { ++ if (result == -E_NO_NEIGHBOR) { ++ /* ++ * rigth neighbor is not a formatted ++ * node ++ */ ++ result = 0; ++ goto done; ++ } else { ++ goto out_tap_relse; ++ } ++ } else { ++ if (!inode_file_plugin(inode)-> ++ owns_item(inode, &coord)) { ++ /* item of another file is found */ ++ result = 0; ++ goto done; ++ } ++ } ++ } ++ } ++ ++ done: ++ if (mapped != PAGE_CACHE_SIZE) { ++ pagedata = kmap_atomic(page, KM_USER0); ++ memset(pagedata + mapped, 0, PAGE_CACHE_SIZE - mapped); ++ flush_dcache_page(page); ++ kunmap_atomic(pagedata, KM_USER0); ++ } ++ SetPageUptodate(page); ++ out_unlock_page: ++ unlock_page(page); ++ out_tap_relse: ++ reiser4_tap_relse(&tap); ++ out_tap_done: ++ reiser4_tap_done(&tap); ++ return result; ++} ++ ++/* ++ plugin->s.file.readpage ++ reiser4_read->unix_file_read->page_cache_readahead->reiser4_readpage->unix_file_readpage->readpage_tail ++ or ++ filemap_nopage->reiser4_readpage->readpage_unix_file->->readpage_tail ++ ++ At the beginning: coord->node is read locked, zloaded, page is locked, coord is set to existing unit inside of tail ++ item. */ ++int readpage_tail(void *vp, struct page *page) ++{ ++ uf_coord_t *uf_coord = vp; ++ ON_DEBUG(coord_t * coord = &uf_coord->coord); ++ ON_DEBUG(reiser4_key key); ++ ++ assert("umka-2515", PageLocked(page)); ++ assert("umka-2516", !PageUptodate(page)); ++ assert("umka-2517", !jprivate(page) && !PagePrivate(page)); ++ assert("umka-2518", page->mapping && page->mapping->host); ++ ++ assert("umka-2519", znode_is_loaded(coord->node)); ++ assert("umka-2520", item_is_tail(coord)); ++ assert("umka-2521", coord_is_existing_unit(coord)); ++ assert("umka-2522", znode_is_rlocked(coord->node)); ++ assert("umka-2523", ++ page->mapping->host->i_ino == ++ get_key_objectid(item_key_by_coord(coord, &key))); ++ ++ return do_readpage_tail(uf_coord, page); ++} ++ ++/** ++ * overwrite_tail ++ * @flow: ++ * @coord: ++ * ++ * Overwrites tail item or its part by user data. Returns number of bytes ++ * written or error code. ++ */ ++static int overwrite_tail(flow_t *flow, coord_t *coord) ++{ ++ unsigned count; ++ ++ assert("vs-570", flow->user == 1); ++ assert("vs-946", flow->data); ++ assert("vs-947", coord_is_existing_unit(coord)); ++ assert("vs-948", znode_is_write_locked(coord->node)); ++ assert("nikita-3036", reiser4_schedulable()); ++ ++ count = item_length_by_coord(coord) - coord->unit_pos; ++ if (count > flow->length) ++ count = flow->length; ++ ++ if (__copy_from_user((char *)item_body_by_coord(coord) + coord->unit_pos, ++ (const char __user *)flow->data, count)) ++ return RETERR(-EFAULT); ++ ++ znode_make_dirty(coord->node); ++ return count; ++} ++ ++/** ++ * insert_first_tail ++ * @inode: ++ * @flow: ++ * @coord: ++ * @lh: ++ * ++ * Returns number of bytes written or error code. ++ */ ++static ssize_t insert_first_tail(struct inode *inode, flow_t *flow, ++ coord_t *coord, lock_handle *lh) ++{ ++ int result; ++ loff_t to_write; ++ unix_file_info_t *uf_info; ++ ++ if (get_key_offset(&flow->key) != 0) { ++ /* ++ * file is empty and we have to write not to the beginning of ++ * file. Create a hole at the beginning of file. On success ++ * insert_flow returns 0 as number of written bytes which is ++ * what we have to return on padding a file with holes ++ */ ++ flow->data = NULL; ++ flow->length = get_key_offset(&flow->key); ++ set_key_offset(&flow->key, 0); ++ /* ++ * holes in files built of tails are stored just like if there ++ * were real data which are all zeros. Therefore we have to ++ * allocate quota here as well ++ */ ++ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length)) ++ return RETERR(-EDQUOT); ++ result = reiser4_insert_flow(coord, lh, flow); ++ if (flow->length) ++ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length); ++ ++ uf_info = unix_file_inode_data(inode); ++ ++ /* ++ * first item insertion is only possible when writing to empty ++ * file or performing tail conversion ++ */ ++ assert("", (uf_info->container == UF_CONTAINER_EMPTY || ++ (reiser4_inode_get_flag(inode, ++ REISER4_PART_MIXED) && ++ reiser4_inode_get_flag(inode, ++ REISER4_PART_IN_CONV)))); ++ /* if file was empty - update its state */ ++ if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY) ++ uf_info->container = UF_CONTAINER_TAILS; ++ return result; ++ } ++ ++ /* check quota before appending data */ ++ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length)) ++ return RETERR(-EDQUOT); ++ ++ to_write = flow->length; ++ result = reiser4_insert_flow(coord, lh, flow); ++ if (flow->length) ++ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length); ++ return (to_write - flow->length) ? (to_write - flow->length) : result; ++} ++ ++/** ++ * append_tail ++ * @inode: ++ * @flow: ++ * @coord: ++ * @lh: ++ * ++ * Returns number of bytes written or error code. ++ */ ++static ssize_t append_tail(struct inode *inode, ++ flow_t *flow, coord_t *coord, lock_handle *lh) ++{ ++ int result; ++ reiser4_key append_key; ++ loff_t to_write; ++ ++ if (!keyeq(&flow->key, append_key_tail(coord, &append_key))) { ++ flow->data = NULL; ++ flow->length = get_key_offset(&flow->key) - get_key_offset(&append_key); ++ set_key_offset(&flow->key, get_key_offset(&append_key)); ++ /* ++ * holes in files built of tails are stored just like if there ++ * were real data which are all zeros. Therefore we have to ++ * allocate quota here as well ++ */ ++ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length)) ++ return RETERR(-EDQUOT); ++ result = reiser4_insert_flow(coord, lh, flow); ++ if (flow->length) ++ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length); ++ return result; ++ } ++ ++ /* check quota before appending data */ ++ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length)) ++ return RETERR(-EDQUOT); ++ ++ to_write = flow->length; ++ result = reiser4_insert_flow(coord, lh, flow); ++ if (flow->length) ++ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length); ++ return (to_write - flow->length) ? (to_write - flow->length) : result; ++} ++ ++/** ++ * write_tail_reserve_space - reserve space for tail write operation ++ * @inode: ++ * ++ * Estimates and reserves space which may be required for writing one flow to a ++ * file ++ */ ++static int write_extent_reserve_space(struct inode *inode) ++{ ++ __u64 count; ++ reiser4_tree *tree; ++ ++ /* ++ * to write one flow to a file by tails we have to reserve disk space for: ++ ++ * 1. find_file_item may have to insert empty node to the tree (empty ++ * leaf node between two extent items). This requires 1 block and ++ * number of blocks which are necessary to perform insertion of an ++ * internal item into twig level. ++ * ++ * 2. flow insertion ++ * ++ * 3. stat data update ++ */ ++ tree = reiser4_tree_by_inode(inode); ++ count = estimate_one_insert_item(tree) + ++ estimate_insert_flow(tree->height) + ++ estimate_one_insert_item(tree); ++ grab_space_enable(); ++ return reiser4_grab_space(count, 0 /* flags */); ++} ++ ++#define PAGE_PER_FLOW 4 ++ ++static loff_t faultin_user_pages(const char __user *buf, size_t count) ++{ ++ loff_t faulted; ++ int to_fault; ++ ++ if (count > PAGE_PER_FLOW * PAGE_CACHE_SIZE) ++ count = PAGE_PER_FLOW * PAGE_CACHE_SIZE; ++ faulted = 0; ++ while (count > 0) { ++ to_fault = PAGE_CACHE_SIZE; ++ if (count < to_fault) ++ to_fault = count; ++ fault_in_pages_readable(buf + faulted, to_fault); ++ count -= to_fault; ++ faulted += to_fault; ++ } ++ return faulted; ++} ++ ++/** ++ * reiser4_write_extent - write method of tail item plugin ++ * @file: file to write to ++ * @buf: address of user-space buffer ++ * @count: number of bytes to write ++ * @pos: position in file to write to ++ * ++ * Returns number of written bytes or error code. ++ */ ++ssize_t reiser4_write_tail(struct file *file, const char __user *buf, ++ size_t count, loff_t *pos) ++{ ++ struct inode *inode; ++ struct hint hint; ++ int result; ++ flow_t flow; ++ coord_t *coord; ++ lock_handle *lh; ++ znode *loaded; ++ ++ inode = file->f_dentry->d_inode; ++ ++ if (write_extent_reserve_space(inode)) ++ return RETERR(-ENOSPC); ++ ++ result = load_file_hint(file, &hint); ++ BUG_ON(result != 0); ++ ++ flow.length = faultin_user_pages(buf, count); ++ flow.user = 1; ++ memcpy(&flow.data, &buf, sizeof(buf)); ++ flow.op = WRITE_OP; ++ key_by_inode_and_offset_common(inode, *pos, &flow.key); ++ ++ result = find_file_item(&hint, &flow.key, ZNODE_WRITE_LOCK, inode); ++ if (IS_CBKERR(result)) ++ return result; ++ ++ coord = &hint.ext_coord.coord; ++ lh = hint.ext_coord.lh; ++ ++ result = zload(coord->node); ++ BUG_ON(result != 0); ++ loaded = coord->node; ++ ++ if (coord->between == AFTER_UNIT) { ++ /* append with data or hole */ ++ result = append_tail(inode, &flow, coord, lh); ++ } else if (coord->between == AT_UNIT) { ++ /* overwrite */ ++ result = overwrite_tail(&flow, coord); ++ } else { ++ /* no items of this file yet. insert data or hole */ ++ result = insert_first_tail(inode, &flow, coord, lh); ++ } ++ zrelse(loaded); ++ if (result < 0) { ++ done_lh(lh); ++ return result; ++ } ++ ++ /* seal and unlock znode */ ++ hint.ext_coord.valid = 0; ++ if (hint.ext_coord.valid) ++ reiser4_set_hint(&hint, &flow.key, ZNODE_WRITE_LOCK); ++ else ++ reiser4_unset_hint(&hint); ++ ++ save_file_hint(file, &hint); ++ return result; ++} ++ ++#if REISER4_DEBUG ++ ++static int ++coord_matches_key_tail(const coord_t * coord, const reiser4_key * key) ++{ ++ reiser4_key item_key; ++ ++ assert("vs-1356", coord_is_existing_unit(coord)); ++ assert("vs-1354", keylt(key, append_key_tail(coord, &item_key))); ++ assert("vs-1355", keyge(key, item_key_by_coord(coord, &item_key))); ++ return get_key_offset(key) == ++ get_key_offset(&item_key) + coord->unit_pos; ++ ++} ++ ++#endif ++ ++/* plugin->u.item.s.file.read */ ++int reiser4_read_tail(struct file *file UNUSED_ARG, flow_t *f, hint_t *hint) ++{ ++ unsigned count; ++ int item_length; ++ coord_t *coord; ++ uf_coord_t *uf_coord; ++ ++ uf_coord = &hint->ext_coord; ++ coord = &uf_coord->coord; ++ ++ assert("vs-571", f->user == 1); ++ assert("vs-571", f->data); ++ assert("vs-967", coord && coord->node); ++ assert("vs-1117", znode_is_rlocked(coord->node)); ++ assert("vs-1118", znode_is_loaded(coord->node)); ++ ++ assert("nikita-3037", reiser4_schedulable()); ++ assert("vs-1357", coord_matches_key_tail(coord, &f->key)); ++ ++ /* calculate number of bytes to read off the item */ ++ item_length = item_length_by_coord(coord); ++ count = item_length_by_coord(coord) - coord->unit_pos; ++ if (count > f->length) ++ count = f->length; ++ ++ /* user page has to be brought in so that major page fault does not ++ * occur here when longtem lock is held */ ++ if (__copy_to_user((char __user *)f->data, ++ ((char *)item_body_by_coord(coord) + coord->unit_pos), ++ count)) ++ return RETERR(-EFAULT); ++ ++ /* probably mark_page_accessed() should only be called if ++ * coord->unit_pos is zero. */ ++ mark_page_accessed(znode_page(coord->node)); ++ move_flow_forward(f, count); ++ ++ coord->unit_pos += count; ++ if (item_length == coord->unit_pos) { ++ coord->unit_pos--; ++ coord->between = AFTER_UNIT; ++ } ++ ++ return 0; ++} ++ ++/* ++ plugin->u.item.s.file.append_key ++ key of first byte which is the next to last byte by addressed by this item ++*/ ++reiser4_key *append_key_tail(const coord_t * coord, reiser4_key * key) ++{ ++ item_key_by_coord(coord, key); ++ set_key_offset(key, get_key_offset(key) + item_length_by_coord(coord)); ++ return key; ++} ++ ++/* plugin->u.item.s.file.init_coord_extension */ ++void init_coord_extension_tail(uf_coord_t * uf_coord, loff_t lookuped) ++{ ++ uf_coord->valid = 1; ++} ++ ++/* ++ plugin->u.item.s.file.get_block ++*/ ++int ++get_block_address_tail(const coord_t * coord, sector_t lblock, sector_t * block) ++{ ++ assert("nikita-3252", znode_get_level(coord->node) == LEAF_LEVEL); ++ ++ if (reiser4_blocknr_is_fake(znode_get_block(coord->node))) ++ /* if node has'nt obtainet its block number yet, return 0. ++ * Lets avoid upsetting users with some cosmic numbers beyond ++ * the device capacity.*/ ++ *block = 0; ++ else ++ *block = *znode_get_block(coord->node); ++ return 0; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/item/tail.h b/fs/reiser4/plugin/item/tail.h +new file mode 100644 +index 0000000..459fa27 +--- /dev/null ++++ b/fs/reiser4/plugin/item/tail.h +@@ -0,0 +1,58 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#if !defined( __REISER4_TAIL_H__ ) ++#define __REISER4_TAIL_H__ ++ ++typedef struct { ++ int not_used; ++} tail_coord_extension_t; ++ ++struct cut_list; ++ ++/* plugin->u.item.b.* */ ++reiser4_key *max_key_inside_tail(const coord_t *, reiser4_key *); ++int can_contain_key_tail(const coord_t * coord, const reiser4_key * key, ++ const reiser4_item_data *); ++int mergeable_tail(const coord_t * p1, const coord_t * p2); ++pos_in_node_t nr_units_tail(const coord_t *); ++lookup_result lookup_tail(const reiser4_key *, lookup_bias, coord_t *); ++int paste_tail(coord_t *, reiser4_item_data *, carry_plugin_info *); ++int can_shift_tail(unsigned free_space, coord_t * source, ++ znode * target, shift_direction, unsigned *size, ++ unsigned want); ++void copy_units_tail(coord_t * target, coord_t * source, unsigned from, ++ unsigned count, shift_direction, unsigned free_space); ++int kill_hook_tail(const coord_t *, pos_in_node_t from, pos_in_node_t count, ++ struct carry_kill_data *); ++int cut_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to, ++ struct carry_cut_data *, reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++int kill_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to, ++ struct carry_kill_data *, reiser4_key * smallest_removed, ++ reiser4_key * new_first); ++reiser4_key *unit_key_tail(const coord_t *, reiser4_key *); ++ ++/* plugin->u.item.s.* */ ++ssize_t reiser4_write_tail(struct file *file, const char __user *buf, ++ size_t count, loff_t *pos); ++int reiser4_read_tail(struct file *, flow_t *, hint_t *); ++int readpage_tail(void *vp, struct page *page); ++reiser4_key *append_key_tail(const coord_t *, reiser4_key *); ++void init_coord_extension_tail(uf_coord_t *, loff_t offset); ++int get_block_address_tail(const coord_t *, sector_t, sector_t *); ++int item_balance_dirty_pages(struct address_space *, const flow_t *, ++ hint_t *, int back_to_dirty, int set_hint); ++ ++/* __REISER4_TAIL_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/node/Makefile b/fs/reiser4/plugin/node/Makefile +new file mode 100644 +index 0000000..9400627 +--- /dev/null ++++ b/fs/reiser4/plugin/node/Makefile +@@ -0,0 +1,5 @@ ++obj-$(CONFIG_REISER4_FS) += node_plugins.o ++ ++node_plugins-objs := \ ++ node.o \ ++ node40.o +diff --git a/fs/reiser4/plugin/node/node.c b/fs/reiser4/plugin/node/node.c +new file mode 100644 +index 0000000..179a4a7 +--- /dev/null ++++ b/fs/reiser4/plugin/node/node.c +@@ -0,0 +1,131 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Node plugin interface. ++ ++ Description: The tree provides the abstraction of flows, which it ++ internally fragments into items which it stores in nodes. ++ ++ A key_atom is a piece of data bound to a single key. ++ ++ For reasonable space efficiency to be achieved it is often ++ necessary to store key_atoms in the nodes in the form of items, where ++ an item is a sequence of key_atoms of the same or similar type. It is ++ more space-efficient, because the item can implement (very) ++ efficient compression of key_atom's bodies using internal knowledge ++ about their semantics, and it can often avoid having a key for each ++ key_atom. Each type of item has specific operations implemented by its ++ item handler (see balance.c). ++ ++ Rationale: the rest of the code (specifically balancing routines) ++ accesses leaf level nodes through this interface. This way we can ++ implement various block layouts and even combine various layouts ++ within the same tree. Balancing/allocating algorithms should not ++ care about peculiarities of splitting/merging specific item types, ++ but rather should leave that to the item's item handler. ++ ++ Items, including those that provide the abstraction of flows, have ++ the property that if you move them in part or in whole to another ++ node, the balancing code invokes their is_left_mergeable() ++ item_operation to determine if they are mergeable with their new ++ neighbor in the node you have moved them to. For some items the ++ is_left_mergeable() function always returns null. ++ ++ When moving the bodies of items from one node to another: ++ ++ if a partial item is shifted to another node the balancing code invokes ++ an item handler method to handle the item splitting. ++ ++ if the balancing code needs to merge with an item in the node it ++ is shifting to, it will invoke an item handler method to handle ++ the item merging. ++ ++ if it needs to move whole item bodies unchanged, the balancing code uses xmemcpy() ++ adjusting the item headers after the move is done using the node handler. ++*/ ++ ++#include "../../forward.h" ++#include "../../debug.h" ++#include "../../key.h" ++#include "../../coord.h" ++#include "../plugin_header.h" ++#include "../item/item.h" ++#include "node.h" ++#include "../plugin.h" ++#include "../../znode.h" ++#include "../../tree.h" ++#include "../../super.h" ++#include "../../reiser4.h" ++ ++/** ++ * leftmost_key_in_node - get the smallest key in node ++ * @node: ++ * @key: store result here ++ * ++ * Stores the leftmost key of @node in @key. ++ */ ++reiser4_key *leftmost_key_in_node(const znode *node, reiser4_key *key) ++{ ++ assert("nikita-1634", node != NULL); ++ assert("nikita-1635", key != NULL); ++ ++ if (!node_is_empty(node)) { ++ coord_t first_item; ++ ++ coord_init_first_unit(&first_item, (znode *) node); ++ item_key_by_coord(&first_item, key); ++ } else ++ *key = *reiser4_max_key(); ++ return key; ++} ++ ++node_plugin node_plugins[LAST_NODE_ID] = { ++ [NODE40_ID] = { ++ .h = { ++ .type_id = REISER4_NODE_PLUGIN_TYPE, ++ .id = NODE40_ID, ++ .pops = NULL, ++ .label = "unified", ++ .desc = "unified node layout", ++ .linkage = {NULL, NULL} ++ }, ++ .item_overhead = item_overhead_node40, ++ .free_space = free_space_node40, ++ .lookup = lookup_node40, ++ .num_of_items = num_of_items_node40, ++ .item_by_coord = item_by_coord_node40, ++ .length_by_coord = length_by_coord_node40, ++ .plugin_by_coord = plugin_by_coord_node40, ++ .key_at = key_at_node40, ++ .estimate = estimate_node40, ++ .check = check_node40, ++ .parse = parse_node40, ++ .init = init_node40, ++#ifdef GUESS_EXISTS ++ .guess = guess_node40, ++#endif ++ .change_item_size = change_item_size_node40, ++ .create_item = create_item_node40, ++ .update_item_key = update_item_key_node40, ++ .cut_and_kill = kill_node40, ++ .cut = cut_node40, ++ .shift = shift_node40, ++ .shrink_item = shrink_item_node40, ++ .fast_insert = fast_insert_node40, ++ .fast_paste = fast_paste_node40, ++ .fast_cut = fast_cut_node40, ++ .max_item_size = max_item_size_node40, ++ .prepare_removal = prepare_removal_node40, ++ .set_item_plugin = set_item_plugin_node40 ++ } ++}; ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/node/node.h b/fs/reiser4/plugin/node/node.h +new file mode 100644 +index 0000000..af0c641 +--- /dev/null ++++ b/fs/reiser4/plugin/node/node.h +@@ -0,0 +1,272 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* We need a definition of the default node layout here. */ ++ ++/* Generally speaking, it is best to have free space in the middle of the ++ node so that two sets of things can grow towards it, and to have the ++ item bodies on the left so that the last one of them grows into free ++ space. We optimize for the case where we append new items to the end ++ of the node, or grow the last item, because it hurts nothing to so ++ optimize and it is a common special case to do massive insertions in ++ increasing key order (and one of cases more likely to have a real user ++ notice the delay time for). ++ ++ formatted leaf default layout: (leaf1) ++ ++ |node header:item bodies:free space:key + pluginid + item offset| ++ ++ We grow towards the middle, optimizing layout for the case where we ++ append new items to the end of the node. The node header is fixed ++ length. Keys, and item offsets plus pluginids for the items ++ corresponding to them are in increasing key order, and are fixed ++ length. Item offsets are relative to start of node (16 bits creating ++ a node size limit of 64k, 12 bits might be a better choice....). Item ++ bodies are in decreasing key order. Item bodies have a variable size. ++ There is a one to one to one mapping of keys to item offsets to item ++ bodies. Item offsets consist of pointers to the zeroth byte of the ++ item body. Item length equals the start of the next item minus the ++ start of this item, except the zeroth item whose length equals the end ++ of the node minus the start of that item (plus a byte). In other ++ words, the item length is not recorded anywhere, and it does not need ++ to be since it is computable. ++ ++ Leaf variable length items and keys layout : (lvar) ++ ++ |node header:key offset + item offset + pluginid triplets:free space:key bodies:item bodies| ++ ++ We grow towards the middle, optimizing layout for the case where we ++ append new items to the end of the node. The node header is fixed ++ length. Keys and item offsets for the items corresponding to them are ++ in increasing key order, and keys are variable length. Item offsets ++ are relative to start of node (16 bits). Item bodies are in ++ decreasing key order. Item bodies have a variable size. There is a ++ one to one to one mapping of keys to item offsets to item bodies. ++ Item offsets consist of pointers to the zeroth byte of the item body. ++ Item length equals the start of the next item's key minus the start of ++ this item, except the zeroth item whose length equals the end of the ++ node minus the start of that item (plus a byte). ++ ++ leaf compressed keys layout: (lcomp) ++ ++ |node header:key offset + key inherit + item offset pairs:free space:key bodies:item bodies| ++ ++ We grow towards the middle, optimizing layout for the case where we ++ append new items to the end of the node. The node header is fixed ++ length. Keys and item offsets for the items corresponding to them are ++ in increasing key order, and keys are variable length. The "key ++ inherit" field indicates how much of the key prefix is identical to ++ the previous key (stem compression as described in "Managing ++ Gigabytes" is used). key_inherit is a one byte integer. The ++ intra-node searches performed through this layout are linear searches, ++ and this is theorized to not hurt performance much due to the high ++ cost of processor stalls on modern CPUs, and the small number of keys ++ in a single node. Item offsets are relative to start of node (16 ++ bits). Item bodies are in decreasing key order. Item bodies have a ++ variable size. There is a one to one to one mapping of keys to item ++ offsets to item bodies. Item offsets consist of pointers to the ++ zeroth byte of the item body. Item length equals the start of the ++ next item minus the start of this item, except the zeroth item whose ++ length equals the end of the node minus the start of that item (plus a ++ byte). In other words, item length and key length is not recorded ++ anywhere, and it does not need to be since it is computable. ++ ++ internal node default layout: (idef1) ++ ++ just like ldef1 except that item bodies are either blocknrs of ++ children or extents, and moving them may require updating parent ++ pointers in the nodes that they point to. ++*/ ++ ++/* There is an inherent 3-way tradeoff between optimizing and ++ exchanging disks between different architectures and code ++ complexity. This is optimal and simple and inexchangeable. ++ Someone else can do the code for exchanging disks and make it ++ complex. It would not be that hard. Using other than the PAGE_SIZE ++ might be suboptimal. ++*/ ++ ++#if !defined( __REISER4_NODE_H__ ) ++#define __REISER4_NODE_H__ ++ ++#define LEAF40_NODE_SIZE PAGE_CACHE_SIZE ++ ++#include "../../dformat.h" ++#include "../plugin_header.h" ++ ++#include ++ ++typedef enum { ++ NS_FOUND = 0, ++ NS_NOT_FOUND = -ENOENT ++} node_search_result; ++ ++/* Maximal possible space overhead for creation of new item in a node */ ++#define REISER4_NODE_MAX_OVERHEAD ( sizeof( reiser4_key ) + 32 ) ++ ++typedef enum { ++ REISER4_NODE_DKEYS = (1 << 0), ++ REISER4_NODE_TREE_STABLE = (1 << 1) ++} reiser4_node_check_flag; ++ ++/* cut and cut_and_kill have too long list of parameters. This structure is just to safe some space on stack */ ++struct cut_list { ++ coord_t *from; ++ coord_t *to; ++ const reiser4_key *from_key; ++ const reiser4_key *to_key; ++ reiser4_key *smallest_removed; ++ carry_plugin_info *info; ++ __u32 flags; ++ struct inode *inode; /* this is to pass list of eflushed jnodes down to extent_kill_hook */ ++ lock_handle *left; ++ lock_handle *right; ++}; ++ ++struct carry_cut_data; ++struct carry_kill_data; ++ ++/* The responsibility of the node plugin is to store and give access ++ to the sequence of items within the node. */ ++typedef struct node_plugin { ++ /* generic plugin fields */ ++ plugin_header h; ++ ++ /* calculates the amount of space that will be required to store an ++ item which is in addition to the space consumed by the item body. ++ (the space consumed by the item body can be gotten by calling ++ item->estimate) */ ++ size_t(*item_overhead) (const znode * node, flow_t * f); ++ ++ /* returns free space by looking into node (i.e., without using ++ znode->free_space). */ ++ size_t(*free_space) (znode * node); ++ /* search within the node for the one item which might ++ contain the key, invoking item->search_within to search within ++ that item to see if it is in there */ ++ node_search_result(*lookup) (znode * node, const reiser4_key * key, ++ lookup_bias bias, coord_t * coord); ++ /* number of items in node */ ++ int (*num_of_items) (const znode * node); ++ ++ /* store information about item in @coord in @data */ ++ /* break into several node ops, don't add any more uses of this before doing so */ ++ /*int ( *item_at )( const coord_t *coord, reiser4_item_data *data ); */ ++ char *(*item_by_coord) (const coord_t * coord); ++ int (*length_by_coord) (const coord_t * coord); ++ item_plugin *(*plugin_by_coord) (const coord_t * coord); ++ ++ /* store item key in @key */ ++ reiser4_key *(*key_at) (const coord_t * coord, reiser4_key * key); ++ /* conservatively estimate whether unit of what size can fit ++ into node. This estimation should be performed without ++ actually looking into the node's content (free space is saved in ++ znode). */ ++ size_t(*estimate) (znode * node); ++ ++ /* performs every consistency check the node plugin author could ++ imagine. Optional. */ ++ int (*check) (const znode * node, __u32 flags, const char **error); ++ ++ /* Called when node is read into memory and node plugin is ++ already detected. This should read some data into znode (like free ++ space counter) and, optionally, check data consistency. ++ */ ++ int (*parse) (znode * node); ++ /* This method is called on a new node to initialise plugin specific ++ data (header, etc.) */ ++ int (*init) (znode * node); ++ /* Check whether @node content conforms to this plugin format. ++ Probably only useful after support for old V3.x formats is added. ++ Uncomment after 4.0 only. ++ */ ++ /* int ( *guess )( const znode *node ); */ ++#if REISER4_DEBUG ++ void (*print) (const char *prefix, const znode * node, __u32 flags); ++#endif ++ /* change size of @item by @by bytes. @item->node has enough free ++ space. When @by > 0 - free space is appended to end of item. When ++ @by < 0 - item is truncated - it is assumed that last @by bytes if ++ the item are freed already */ ++ void (*change_item_size) (coord_t * item, int by); ++ ++ /* create new item @length bytes long in coord @target */ ++ int (*create_item) (coord_t * target, const reiser4_key * key, ++ reiser4_item_data * data, carry_plugin_info * info); ++ ++ /* update key of item. */ ++ void (*update_item_key) (coord_t * target, const reiser4_key * key, ++ carry_plugin_info * info); ++ ++ int (*cut_and_kill) (struct carry_kill_data *, carry_plugin_info *); ++ int (*cut) (struct carry_cut_data *, carry_plugin_info *); ++ ++ /* ++ * shrink item pointed to by @coord by @delta bytes. ++ */ ++ int (*shrink_item) (coord_t * coord, int delta); ++ ++ /* copy as much as possible but not more than up to @stop from ++ @stop->node to @target. If (pend == append) then data from beginning of ++ @stop->node are copied to the end of @target. If (pend == prepend) then ++ data from the end of @stop->node are copied to the beginning of ++ @target. Copied data are removed from @stop->node. Information ++ about what to do on upper level is stored in @todo */ ++ int (*shift) (coord_t * stop, znode * target, shift_direction pend, ++ int delete_node, int including_insert_coord, ++ carry_plugin_info * info); ++ /* return true if this node allows skip carry() in some situations ++ (see fs/reiser4/tree.c:insert_by_coord()). Reiser3.x format ++ emulation doesn't. ++ ++ This will speedup insertions that doesn't require updates to the ++ parent, by bypassing initialisation of carry() structures. It's ++ believed that majority of insertions will fit there. ++ ++ */ ++ int (*fast_insert) (const coord_t * coord); ++ int (*fast_paste) (const coord_t * coord); ++ int (*fast_cut) (const coord_t * coord); ++ /* this limits max size of item which can be inserted into a node and ++ number of bytes item in a node may be appended with */ ++ int (*max_item_size) (void); ++ int (*prepare_removal) (znode * empty, carry_plugin_info * info); ++ /* change plugin id of items which are in a node already. Currently it is Used in tail conversion for regular ++ * files */ ++ int (*set_item_plugin) (coord_t * coord, item_id); ++} node_plugin; ++ ++typedef enum { ++ /* standard unified node layout used for both leaf and internal ++ nodes */ ++ NODE40_ID, ++ LAST_NODE_ID ++} reiser4_node_id; ++ ++extern reiser4_key *leftmost_key_in_node(const znode * node, reiser4_key * key); ++#if REISER4_DEBUG ++extern void print_node_content(const char *prefix, const znode * node, ++ __u32 flags); ++#endif ++ ++extern void indent_znode(const znode * node); ++ ++typedef struct common_node_header { ++ /* ++ * identifier of node plugin. Must be located at the very beginning of ++ * a node. ++ */ ++ __le16 plugin_id; ++} common_node_header; ++ ++/* __REISER4_NODE_H__ */ ++#endif ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/node/node40.c b/fs/reiser4/plugin/node/node40.c +new file mode 100644 +index 0000000..6a9cc73 +--- /dev/null ++++ b/fs/reiser4/plugin/node/node40.c +@@ -0,0 +1,2924 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "../../debug.h" ++#include "../../key.h" ++#include "../../coord.h" ++#include "../plugin_header.h" ++#include "../item/item.h" ++#include "node.h" ++#include "node40.h" ++#include "../plugin.h" ++#include "../../jnode.h" ++#include "../../znode.h" ++#include "../../pool.h" ++#include "../../carry.h" ++#include "../../tap.h" ++#include "../../tree.h" ++#include "../../super.h" ++#include "../../reiser4.h" ++ ++#include ++#include ++#include ++ ++/* leaf 40 format: ++ ++ [node header | item 0, item 1, .., item N-1 | free space | item_head N-1, .. item_head 1, item head 0 ] ++ plugin_id (16) key ++ free_space (16) pluginid (16) ++ free_space_start (16) offset (16) ++ level (8) ++ num_items (16) ++ magic (32) ++ flush_time (32) ++*/ ++/* NIKITA-FIXME-HANS: I told you guys not less than 10 times to not call it r4fs. Change to "ReIs". */ ++/* magic number that is stored in ->magic field of node header */ ++static const __u32 REISER4_NODE_MAGIC = 0x52344653; /* (*(__u32 *)"R4FS"); */ ++ ++static int prepare_for_update(znode * left, znode * right, ++ carry_plugin_info * info); ++ ++/* header of node of reiser40 format is at the beginning of node */ ++static inline node40_header *node40_node_header(const znode * node /* node to ++ * query */ ) ++{ ++ assert("nikita-567", node != NULL); ++ assert("nikita-568", znode_page(node) != NULL); ++ assert("nikita-569", zdata(node) != NULL); ++ return (node40_header *) zdata(node); ++} ++ ++/* functions to get/set fields of node40_header */ ++#define nh40_get_magic(nh) le32_to_cpu(get_unaligned(&(nh)->magic)) ++#define nh40_get_free_space(nh) le16_to_cpu(get_unaligned(&(nh)->free_space)) ++#define nh40_get_free_space_start(nh) le16_to_cpu(get_unaligned(&(nh)->free_space_start)) ++#define nh40_get_level(nh) get_unaligned(&(nh)->level) ++#define nh40_get_num_items(nh) le16_to_cpu(get_unaligned(&(nh)->nr_items)) ++#define nh40_get_flush_id(nh) le64_to_cpu(get_unaligned(&(nh)->flush_id)) ++ ++#define nh40_set_magic(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->magic) ++#define nh40_set_free_space(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space) ++#define nh40_set_free_space_start(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space_start) ++#define nh40_set_level(nh, value) put_unaligned(value, &(nh)->level) ++#define nh40_set_num_items(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->nr_items) ++#define nh40_set_mkfs_id(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->mkfs_id) ++ ++/* plugin field of node header should be read/set by ++ plugin_by_disk_id/save_disk_plugin */ ++ ++/* array of item headers is at the end of node */ ++static inline item_header40 *node40_ih_at(const znode * node, unsigned pos) ++{ ++ return (item_header40 *) (zdata(node) + znode_size(node)) - pos - 1; ++} ++ ++/* ( page_address( node -> pg ) + PAGE_CACHE_SIZE ) - pos - 1 ++ */ ++static inline item_header40 *node40_ih_at_coord(const coord_t * coord) ++{ ++ return (item_header40 *) (zdata(coord->node) + ++ znode_size(coord->node)) - (coord->item_pos) - ++ 1; ++} ++ ++/* functions to get/set fields of item_header40 */ ++#define ih40_get_offset(ih) le16_to_cpu(get_unaligned(&(ih)->offset)) ++ ++#define ih40_set_offset(ih, value) put_unaligned(cpu_to_le16(value), &(ih)->offset) ++ ++/* plugin field of item header should be read/set by ++ plugin_by_disk_id/save_disk_plugin */ ++ ++/* plugin methods */ ++ ++/* plugin->u.node.item_overhead ++ look for description of this method in plugin/node/node.h */ ++size_t ++item_overhead_node40(const znode * node UNUSED_ARG, flow_t * f UNUSED_ARG) ++{ ++ return sizeof(item_header40); ++} ++ ++/* plugin->u.node.free_space ++ look for description of this method in plugin/node/node.h */ ++size_t free_space_node40(znode * node) ++{ ++ assert("nikita-577", node != NULL); ++ assert("nikita-578", znode_is_loaded(node)); ++ assert("nikita-579", zdata(node) != NULL); ++ ++ return nh40_get_free_space(node40_node_header(node)); ++} ++ ++/* private inline version of node40_num_of_items() for use in this file. This ++ is necessary, because address of node40_num_of_items() is taken and it is ++ never inlined as a result. */ ++static inline short node40_num_of_items_internal(const znode * node) ++{ ++ return nh40_get_num_items(node40_node_header(node)); ++} ++ ++#if REISER4_DEBUG ++static inline void check_num_items(const znode * node) ++{ ++ assert("nikita-2749", ++ node40_num_of_items_internal(node) == node->nr_items); ++ assert("nikita-2746", znode_is_write_locked(node)); ++} ++#else ++#define check_num_items(node) noop ++#endif ++ ++/* plugin->u.node.num_of_items ++ look for description of this method in plugin/node/node.h */ ++int num_of_items_node40(const znode * node) ++{ ++ return node40_num_of_items_internal(node); ++} ++ ++static void ++node40_set_num_items(znode * node, node40_header * nh, unsigned value) ++{ ++ assert("nikita-2751", node != NULL); ++ assert("nikita-2750", nh == node40_node_header(node)); ++ ++ check_num_items(node); ++ nh40_set_num_items(nh, value); ++ node->nr_items = value; ++ check_num_items(node); ++} ++ ++/* plugin->u.node.item_by_coord ++ look for description of this method in plugin/node/node.h */ ++char *item_by_coord_node40(const coord_t * coord) ++{ ++ item_header40 *ih; ++ char *p; ++ ++ /* @coord is set to existing item */ ++ assert("nikita-596", coord != NULL); ++ assert("vs-255", coord_is_existing_item(coord)); ++ ++ ih = node40_ih_at_coord(coord); ++ p = zdata(coord->node) + ih40_get_offset(ih); ++ return p; ++} ++ ++/* plugin->u.node.length_by_coord ++ look for description of this method in plugin/node/node.h */ ++int length_by_coord_node40(const coord_t * coord) ++{ ++ item_header40 *ih; ++ int result; ++ ++ /* @coord is set to existing item */ ++ assert("vs-256", coord != NULL); ++ assert("vs-257", coord_is_existing_item(coord)); ++ ++ ih = node40_ih_at_coord(coord); ++ if ((int)coord->item_pos == ++ node40_num_of_items_internal(coord->node) - 1) ++ result = ++ nh40_get_free_space_start(node40_node_header(coord->node)) - ++ ih40_get_offset(ih); ++ else ++ result = ih40_get_offset(ih - 1) - ih40_get_offset(ih); ++ ++ return result; ++} ++ ++static pos_in_node_t ++node40_item_length(const znode * node, pos_in_node_t item_pos) ++{ ++ item_header40 *ih; ++ pos_in_node_t result; ++ ++ /* @coord is set to existing item */ ++ assert("vs-256", node != NULL); ++ assert("vs-257", node40_num_of_items_internal(node) > item_pos); ++ ++ ih = node40_ih_at(node, item_pos); ++ if (item_pos == node40_num_of_items_internal(node) - 1) ++ result = ++ nh40_get_free_space_start(node40_node_header(node)) - ++ ih40_get_offset(ih); ++ else ++ result = ih40_get_offset(ih - 1) - ih40_get_offset(ih); ++ ++ return result; ++} ++ ++/* plugin->u.node.plugin_by_coord ++ look for description of this method in plugin/node/node.h */ ++item_plugin *plugin_by_coord_node40(const coord_t * coord) ++{ ++ item_header40 *ih; ++ item_plugin *result; ++ ++ /* @coord is set to existing item */ ++ assert("vs-258", coord != NULL); ++ assert("vs-259", coord_is_existing_item(coord)); ++ ++ ih = node40_ih_at_coord(coord); ++ /* pass NULL in stead of current tree. This is time critical call. */ ++ result = item_plugin_by_disk_id(NULL, &ih->plugin_id); ++ return result; ++} ++ ++/* plugin->u.node.key_at ++ look for description of this method in plugin/node/node.h */ ++reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key) ++{ ++ item_header40 *ih; ++ ++ assert("nikita-1765", coord_is_existing_item(coord)); ++ ++ /* @coord is set to existing item */ ++ ih = node40_ih_at_coord(coord); ++ memcpy(key, &ih->key, sizeof(reiser4_key)); ++ return key; ++} ++ ++/* VS-FIXME-HANS: please review whether the below are properly disabled when debugging is disabled */ ++ ++#define NODE_INCSTAT(n, counter) \ ++ reiser4_stat_inc_at_level(znode_get_level(n), node.lookup.counter) ++ ++#define NODE_ADDSTAT(n, counter, val) \ ++ reiser4_stat_add_at_level(znode_get_level(n), node.lookup.counter, val) ++ ++/* plugin->u.node.lookup ++ look for description of this method in plugin/node/node.h */ ++node_search_result lookup_node40(znode * node /* node to query */ , ++ const reiser4_key * key /* key to look for */ , ++ lookup_bias bias /* search bias */ , ++ coord_t * coord /* resulting coord */ ) ++{ ++ int left; ++ int right; ++ int found; ++ int items; ++ ++ item_header40 *lefth; ++ item_header40 *righth; ++ ++ item_plugin *iplug; ++ item_header40 *bstop; ++ item_header40 *ih; ++ cmp_t order; ++ ++ assert("nikita-583", node != NULL); ++ assert("nikita-584", key != NULL); ++ assert("nikita-585", coord != NULL); ++ assert("nikita-2693", znode_is_any_locked(node)); ++ cassert(REISER4_SEQ_SEARCH_BREAK > 2); ++ ++ items = node_num_items(node); ++ ++ if (unlikely(items == 0)) { ++ coord_init_first_unit(coord, node); ++ return NS_NOT_FOUND; ++ } ++ ++ /* binary search for item that can contain given key */ ++ left = 0; ++ right = items - 1; ++ coord->node = node; ++ coord_clear_iplug(coord); ++ found = 0; ++ ++ lefth = node40_ih_at(node, left); ++ righth = node40_ih_at(node, right); ++ ++ /* It is known that for small arrays sequential search is on average ++ more efficient than binary. This is because sequential search is ++ coded as tight loop that can be better optimized by compilers and ++ for small array size gain from this optimization makes sequential ++ search the winner. Another, maybe more important, reason for this, ++ is that sequential array is more CPU cache friendly, whereas binary ++ search effectively destroys CPU caching. ++ ++ Critical here is the notion of "smallness". Reasonable value of ++ REISER4_SEQ_SEARCH_BREAK can be found by playing with code in ++ fs/reiser4/ulevel/ulevel.c:test_search(). ++ ++ Don't try to further optimize sequential search by scanning from ++ right to left in attempt to use more efficient loop termination ++ condition (comparison with 0). This doesn't work. ++ ++ */ ++ ++ while (right - left >= REISER4_SEQ_SEARCH_BREAK) { ++ int median; ++ item_header40 *medianh; ++ ++ median = (left + right) / 2; ++ medianh = node40_ih_at(node, median); ++ ++ assert("nikita-1084", median >= 0); ++ assert("nikita-1085", median < items); ++ switch (keycmp(key, &medianh->key)) { ++ case LESS_THAN: ++ right = median; ++ righth = medianh; ++ break; ++ default: ++ wrong_return_value("nikita-586", "keycmp"); ++ case GREATER_THAN: ++ left = median; ++ lefth = medianh; ++ break; ++ case EQUAL_TO: ++ do { ++ --median; ++ /* headers are ordered from right to left */ ++ ++medianh; ++ } while (median >= 0 && keyeq(key, &medianh->key)); ++ right = left = median + 1; ++ ih = lefth = righth = medianh - 1; ++ found = 1; ++ break; ++ } ++ } ++ /* sequential scan. Item headers, and, therefore, keys are stored at ++ the rightmost part of a node from right to left. We are trying to ++ access memory from left to right, and hence, scan in _descending_ ++ order of item numbers. ++ */ ++ if (!found) { ++ for (left = right, ih = righth; left >= 0; ++ih, --left) { ++ cmp_t comparison; ++ ++ prefetchkey(&(ih + 1)->key); ++ comparison = keycmp(&ih->key, key); ++ if (comparison == GREATER_THAN) ++ continue; ++ if (comparison == EQUAL_TO) { ++ found = 1; ++ do { ++ --left; ++ ++ih; ++ } while (left >= 0 && keyeq(&ih->key, key)); ++ ++left; ++ --ih; ++ } else { ++ assert("nikita-1256", comparison == LESS_THAN); ++ } ++ break; ++ } ++ if (unlikely(left < 0)) ++ left = 0; ++ } ++ ++ assert("nikita-3212", right >= left); ++ assert("nikita-3214", ++ equi(found, keyeq(&node40_ih_at(node, left)->key, key))); ++ ++ coord_set_item_pos(coord, left); ++ coord->unit_pos = 0; ++ coord->between = AT_UNIT; ++ ++ /* key < leftmost key in a mode or node is corrupted and keys ++ are not sorted */ ++ bstop = node40_ih_at(node, (unsigned)left); ++ order = keycmp(&bstop->key, key); ++ if (unlikely(order == GREATER_THAN)) { ++ if (unlikely(left != 0)) { ++ /* screw up */ ++ warning("nikita-587", "Key less than %i key in a node", ++ left); ++ reiser4_print_key("key", key); ++ reiser4_print_key("min", &bstop->key); ++ print_coord_content("coord", coord); ++ return RETERR(-EIO); ++ } else { ++ coord->between = BEFORE_UNIT; ++ return NS_NOT_FOUND; ++ } ++ } ++ /* left <= key, ok */ ++ iplug = item_plugin_by_disk_id(znode_get_tree(node), &bstop->plugin_id); ++ ++ if (unlikely(iplug == NULL)) { ++ warning("nikita-588", "Unknown plugin %i", ++ le16_to_cpu(get_unaligned(&bstop->plugin_id))); ++ reiser4_print_key("key", key); ++ print_coord_content("coord", coord); ++ return RETERR(-EIO); ++ } ++ ++ coord_set_iplug(coord, iplug); ++ ++ /* if exact key from item header was found by binary search, no ++ further checks are necessary. */ ++ if (found) { ++ assert("nikita-1259", order == EQUAL_TO); ++ return NS_FOUND; ++ } ++ if (iplug->b.max_key_inside != NULL) { ++ reiser4_key max_item_key; ++ ++ /* key > max_item_key --- outside of an item */ ++ if (keygt(key, iplug->b.max_key_inside(coord, &max_item_key))) { ++ coord->unit_pos = 0; ++ coord->between = AFTER_ITEM; ++ /* FIXME-VS: key we are looking for does not fit into ++ found item. Return NS_NOT_FOUND then. Without that ++ the following case does not work: there is extent of ++ file 10000, 10001. File 10000, 10002 has been just ++ created. When writing to position 0 in that file - ++ traverse_tree will stop here on twig level. When we ++ want it to go down to leaf level ++ */ ++ return NS_NOT_FOUND; ++ } ++ } ++ ++ if (iplug->b.lookup != NULL) { ++ return iplug->b.lookup(key, bias, coord); ++ } else { ++ assert("nikita-1260", order == LESS_THAN); ++ coord->between = AFTER_UNIT; ++ return (bias == FIND_EXACT) ? NS_NOT_FOUND : NS_FOUND; ++ } ++} ++ ++#undef NODE_ADDSTAT ++#undef NODE_INCSTAT ++ ++/* plugin->u.node.estimate ++ look for description of this method in plugin/node/node.h */ ++size_t estimate_node40(znode * node) ++{ ++ size_t result; ++ ++ assert("nikita-597", node != NULL); ++ ++ result = free_space_node40(node) - sizeof(item_header40); ++ ++ return (result > 0) ? result : 0; ++} ++ ++/* plugin->u.node.check ++ look for description of this method in plugin/node/node.h */ ++int check_node40(const znode * node /* node to check */ , ++ __u32 flags /* check flags */ , ++ const char **error /* where to store error message */ ) ++{ ++ int nr_items; ++ int i; ++ reiser4_key prev; ++ unsigned old_offset; ++ tree_level level; ++ coord_t coord; ++ int result; ++ ++ assert("nikita-580", node != NULL); ++ assert("nikita-581", error != NULL); ++ assert("nikita-2948", znode_is_loaded(node)); ++ ++ if (ZF_ISSET(node, JNODE_HEARD_BANSHEE)) ++ return 0; ++ ++ assert("nikita-582", zdata(node) != NULL); ++ ++ nr_items = node40_num_of_items_internal(node); ++ if (nr_items < 0) { ++ *error = "Negative number of items"; ++ return -1; ++ } ++ ++ if (flags & REISER4_NODE_DKEYS) ++ prev = *znode_get_ld_key((znode *) node); ++ else ++ prev = *reiser4_min_key(); ++ ++ old_offset = 0; ++ coord_init_zero(&coord); ++ coord.node = (znode *) node; ++ coord.unit_pos = 0; ++ coord.between = AT_UNIT; ++ level = znode_get_level(node); ++ for (i = 0; i < nr_items; i++) { ++ item_header40 *ih; ++ reiser4_key unit_key; ++ unsigned j; ++ ++ ih = node40_ih_at(node, (unsigned)i); ++ coord_set_item_pos(&coord, i); ++ if ((ih40_get_offset(ih) >= ++ znode_size(node) - nr_items * sizeof(item_header40)) || ++ (ih40_get_offset(ih) < sizeof(node40_header))) { ++ *error = "Offset is out of bounds"; ++ return -1; ++ } ++ if (ih40_get_offset(ih) <= old_offset) { ++ *error = "Offsets are in wrong order"; ++ return -1; ++ } ++ if ((i == 0) && (ih40_get_offset(ih) != sizeof(node40_header))) { ++ *error = "Wrong offset of first item"; ++ return -1; ++ } ++ old_offset = ih40_get_offset(ih); ++ ++ if (keygt(&prev, &ih->key)) { ++ *error = "Keys are in wrong order"; ++ return -1; ++ } ++ if (!keyeq(&ih->key, unit_key_by_coord(&coord, &unit_key))) { ++ *error = "Wrong key of first unit"; ++ return -1; ++ } ++ prev = ih->key; ++ for (j = 0; j < coord_num_units(&coord); ++j) { ++ coord.unit_pos = j; ++ unit_key_by_coord(&coord, &unit_key); ++ if (keygt(&prev, &unit_key)) { ++ *error = "Unit keys are in wrong order"; ++ return -1; ++ } ++ prev = unit_key; ++ } ++ coord.unit_pos = 0; ++ if (level != TWIG_LEVEL && item_is_extent(&coord)) { ++ *error = "extent on the wrong level"; ++ return -1; ++ } ++ if (level == LEAF_LEVEL && item_is_internal(&coord)) { ++ *error = "internal item on the wrong level"; ++ return -1; ++ } ++ if (level != LEAF_LEVEL && ++ !item_is_internal(&coord) && !item_is_extent(&coord)) { ++ *error = "wrong item on the internal level"; ++ return -1; ++ } ++ if (level > TWIG_LEVEL && !item_is_internal(&coord)) { ++ *error = "non-internal item on the internal level"; ++ return -1; ++ } ++#if REISER4_DEBUG ++ if (item_plugin_by_coord(&coord)->b.check ++ && item_plugin_by_coord(&coord)->b.check(&coord, error)) ++ return -1; ++#endif ++ if (i) { ++ coord_t prev_coord; ++ /* two neighboring items can not be mergeable */ ++ coord_dup(&prev_coord, &coord); ++ coord_prev_item(&prev_coord); ++ if (are_items_mergeable(&prev_coord, &coord)) { ++ *error = "mergeable items in one node"; ++ return -1; ++ } ++ ++ } ++ } ++ ++ if ((flags & REISER4_NODE_DKEYS) && !node_is_empty(node)) { ++ coord_t coord; ++ item_plugin *iplug; ++ ++ coord_init_last_unit(&coord, node); ++ iplug = item_plugin_by_coord(&coord); ++ if ((item_is_extent(&coord) || item_is_tail(&coord)) && ++ iplug->s.file.append_key != NULL) { ++ reiser4_key mkey; ++ ++ iplug->s.file.append_key(&coord, &mkey); ++ set_key_offset(&mkey, get_key_offset(&mkey) - 1); ++ read_lock_dk(current_tree); ++ result = keygt(&mkey, znode_get_rd_key((znode *) node)); ++ read_unlock_dk(current_tree); ++ if (result) { ++ *error = "key of rightmost item is too large"; ++ return -1; ++ } ++ } ++ } ++ if (flags & REISER4_NODE_DKEYS) { ++ read_lock_tree(current_tree); ++ read_lock_dk(current_tree); ++ ++ flags |= REISER4_NODE_TREE_STABLE; ++ ++ if (keygt(&prev, znode_get_rd_key((znode *) node))) { ++ if (flags & REISER4_NODE_TREE_STABLE) { ++ *error = "Last key is greater than rdkey"; ++ read_unlock_dk(current_tree); ++ read_unlock_tree(current_tree); ++ return -1; ++ } ++ } ++ if (keygt ++ (znode_get_ld_key((znode *) node), ++ znode_get_rd_key((znode *) node))) { ++ *error = "ldkey is greater than rdkey"; ++ read_unlock_dk(current_tree); ++ read_unlock_tree(current_tree); ++ return -1; ++ } ++ if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && ++ (node->left != NULL) && ++ !ZF_ISSET(node->left, JNODE_HEARD_BANSHEE) && ++ ergo(flags & REISER4_NODE_TREE_STABLE, ++ !keyeq(znode_get_rd_key(node->left), ++ znode_get_ld_key((znode *) node))) ++ && ergo(!(flags & REISER4_NODE_TREE_STABLE), ++ keygt(znode_get_rd_key(node->left), ++ znode_get_ld_key((znode *) node)))) { ++ *error = "left rdkey or ldkey is wrong"; ++ read_unlock_dk(current_tree); ++ read_unlock_tree(current_tree); ++ return -1; ++ } ++ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ++ (node->right != NULL) && ++ !ZF_ISSET(node->right, JNODE_HEARD_BANSHEE) && ++ ergo(flags & REISER4_NODE_TREE_STABLE, ++ !keyeq(znode_get_rd_key((znode *) node), ++ znode_get_ld_key(node->right))) ++ && ergo(!(flags & REISER4_NODE_TREE_STABLE), ++ keygt(znode_get_rd_key((znode *) node), ++ znode_get_ld_key(node->right)))) { ++ *error = "rdkey or right ldkey is wrong"; ++ read_unlock_dk(current_tree); ++ read_unlock_tree(current_tree); ++ return -1; ++ } ++ ++ read_unlock_dk(current_tree); ++ read_unlock_tree(current_tree); ++ } ++ ++ return 0; ++} ++ ++/* plugin->u.node.parse ++ look for description of this method in plugin/node/node.h */ ++int parse_node40(znode * node /* node to parse */ ) ++{ ++ node40_header *header; ++ int result; ++ d8 level; ++ ++ header = node40_node_header((znode *) node); ++ result = -EIO; ++ level = nh40_get_level(header); ++ if (unlikely(((__u8) znode_get_level(node)) != level)) ++ warning("nikita-494", "Wrong level found in node: %i != %i", ++ znode_get_level(node), level); ++ else if (unlikely(nh40_get_magic(header) != REISER4_NODE_MAGIC)) ++ warning("nikita-495", ++ "Wrong magic in tree node: want %x, got %x", ++ REISER4_NODE_MAGIC, nh40_get_magic(header)); ++ else { ++ node->nr_items = node40_num_of_items_internal(node); ++ result = 0; ++ } ++ return RETERR(result); ++} ++ ++/* plugin->u.node.init ++ look for description of this method in plugin/node/node.h */ ++int init_node40(znode * node /* node to initialise */ ) ++{ ++ node40_header *header; ++ ++ assert("nikita-570", node != NULL); ++ assert("nikita-572", zdata(node) != NULL); ++ ++ header = node40_node_header(node); ++ memset(header, 0, sizeof(node40_header)); ++ nh40_set_free_space(header, znode_size(node) - sizeof(node40_header)); ++ nh40_set_free_space_start(header, sizeof(node40_header)); ++ /* sane hypothesis: 0 in CPU format is 0 in disk format */ ++ /* items: 0 */ ++ save_plugin_id(node_plugin_to_plugin(node->nplug), ++ &header->common_header.plugin_id); ++ nh40_set_level(header, znode_get_level(node)); ++ nh40_set_magic(header, REISER4_NODE_MAGIC); ++ node->nr_items = 0; ++ nh40_set_mkfs_id(header, reiser4_mkfs_id(reiser4_get_current_sb())); ++ ++ /* flags: 0 */ ++ return 0; ++} ++ ++#ifdef GUESS_EXISTS ++int guess_node40(const znode * node /* node to guess plugin of */ ) ++{ ++ node40_header *nethack; ++ ++ assert("nikita-1058", node != NULL); ++ nethack = node40_node_header(node); ++ return ++ (nh40_get_magic(nethack) == REISER4_NODE_MAGIC) && ++ (plugin_by_disk_id(znode_get_tree(node), ++ REISER4_NODE_PLUGIN_TYPE, ++ &nethack->common_header.plugin_id)->h.id == ++ NODE40_ID); ++} ++#endif ++ ++/* plugin->u.node.chage_item_size ++ look for description of this method in plugin/node/node.h */ ++void change_item_size_node40(coord_t * coord, int by) ++{ ++ node40_header *nh; ++ item_header40 *ih; ++ char *item_data; ++ int item_length; ++ unsigned i; ++ ++ /* make sure that @item is coord of existing item */ ++ assert("vs-210", coord_is_existing_item(coord)); ++ ++ nh = node40_node_header(coord->node); ++ ++ item_data = item_by_coord_node40(coord); ++ item_length = length_by_coord_node40(coord); ++ ++ /* move item bodies */ ++ ih = node40_ih_at_coord(coord); ++ memmove(item_data + item_length + by, item_data + item_length, ++ nh40_get_free_space_start(node40_node_header(coord->node)) - ++ (ih40_get_offset(ih) + item_length)); ++ ++ /* update offsets of moved items */ ++ for (i = coord->item_pos + 1; i < nh40_get_num_items(nh); i++) { ++ ih = node40_ih_at(coord->node, i); ++ ih40_set_offset(ih, ih40_get_offset(ih) + by); ++ } ++ ++ /* update node header */ ++ nh40_set_free_space(nh, nh40_get_free_space(nh) - by); ++ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) + by); ++} ++ ++static int should_notify_parent(const znode * node) ++{ ++ /* FIXME_JMACD This looks equivalent to znode_is_root(), right? -josh */ ++ return !disk_addr_eq(znode_get_block(node), ++ &znode_get_tree(node)->root_block); ++} ++ ++/* plugin->u.node.create_item ++ look for description of this method in plugin/node/node.h */ ++int ++create_item_node40(coord_t *target, const reiser4_key *key, ++ reiser4_item_data *data, carry_plugin_info *info) ++{ ++ node40_header *nh; ++ item_header40 *ih; ++ unsigned offset; ++ unsigned i; ++ ++ nh = node40_node_header(target->node); ++ ++ assert("vs-212", coord_is_between_items(target)); ++ /* node must have enough free space */ ++ assert("vs-254", ++ free_space_node40(target->node) >= ++ data->length + sizeof(item_header40)); ++ assert("vs-1410", data->length >= 0); ++ ++ if (coord_set_to_right(target)) ++ /* there are not items to the right of @target, so, new item ++ will be inserted after last one */ ++ coord_set_item_pos(target, nh40_get_num_items(nh)); ++ ++ if (target->item_pos < nh40_get_num_items(nh)) { ++ /* there are items to be moved to prepare space for new ++ item */ ++ ih = node40_ih_at_coord(target); ++ /* new item will start at this offset */ ++ offset = ih40_get_offset(ih); ++ ++ memmove(zdata(target->node) + offset + data->length, ++ zdata(target->node) + offset, ++ nh40_get_free_space_start(nh) - offset); ++ /* update headers of moved items */ ++ for (i = target->item_pos; i < nh40_get_num_items(nh); i++) { ++ ih = node40_ih_at(target->node, i); ++ ih40_set_offset(ih, ih40_get_offset(ih) + data->length); ++ } ++ ++ /* @ih is set to item header of the last item, move item headers */ ++ memmove(ih - 1, ih, ++ sizeof(item_header40) * (nh40_get_num_items(nh) - ++ target->item_pos)); ++ } else { ++ /* new item will start at this offset */ ++ offset = nh40_get_free_space_start(nh); ++ } ++ ++ /* make item header for the new item */ ++ ih = node40_ih_at_coord(target); ++ memcpy(&ih->key, key, sizeof(reiser4_key)); ++ ih40_set_offset(ih, offset); ++ save_plugin_id(item_plugin_to_plugin(data->iplug), &ih->plugin_id); ++ ++ /* update node header */ ++ nh40_set_free_space(nh, ++ nh40_get_free_space(nh) - data->length - ++ sizeof(item_header40)); ++ nh40_set_free_space_start(nh, ++ nh40_get_free_space_start(nh) + data->length); ++ node40_set_num_items(target->node, nh, nh40_get_num_items(nh) + 1); ++ ++ /* FIXME: check how does create_item work when between is set to BEFORE_UNIT */ ++ target->unit_pos = 0; ++ target->between = AT_UNIT; ++ coord_clear_iplug(target); ++ ++ /* initialize item */ ++ if (data->iplug->b.init != NULL) { ++ data->iplug->b.init(target, NULL, data); ++ } ++ /* copy item body */ ++ if (data->iplug->b.paste != NULL) { ++ data->iplug->b.paste(target, data, info); ++ } else if (data->data != NULL) { ++ if (data->user) { ++ /* AUDIT: Are we really should not check that pointer ++ from userspace was valid and data bytes were ++ available? How will we return -EFAULT of some kind ++ without this check? */ ++ assert("nikita-3038", reiser4_schedulable()); ++ /* copy data from user space */ ++ __copy_from_user(zdata(target->node) + offset, ++ (const char __user *)data->data, ++ (unsigned)data->length); ++ } else ++ /* copy from kernel space */ ++ memcpy(zdata(target->node) + offset, data->data, ++ (unsigned)data->length); ++ } ++ ++ if (target->item_pos == 0) { ++ /* left delimiting key has to be updated */ ++ prepare_for_update(NULL, target->node, info); ++ } ++ ++ if (item_plugin_by_coord(target)->b.create_hook != NULL) { ++ item_plugin_by_coord(target)->b.create_hook(target, data->arg); ++ } ++ ++ return 0; ++} ++ ++/* plugin->u.node.update_item_key ++ look for description of this method in plugin/node/node.h */ ++void ++update_item_key_node40(coord_t * target, const reiser4_key * key, ++ carry_plugin_info * info) ++{ ++ item_header40 *ih; ++ ++ ih = node40_ih_at_coord(target); ++ memcpy(&ih->key, key, sizeof(reiser4_key)); ++ ++ if (target->item_pos == 0) { ++ prepare_for_update(NULL, target->node, info); ++ } ++} ++ ++/* this bits encode cut mode */ ++#define CMODE_TAIL 1 ++#define CMODE_WHOLE 2 ++#define CMODE_HEAD 4 ++ ++struct cut40_info { ++ int mode; ++ pos_in_node_t tail_removed; /* position of item which gets tail removed */ ++ pos_in_node_t first_removed; /* position of first the leftmost item among items removed completely */ ++ pos_in_node_t removed_count; /* number of items removed completely */ ++ pos_in_node_t head_removed; /* position of item which gets head removed */ ++ ++ pos_in_node_t freed_space_start; ++ pos_in_node_t freed_space_end; ++ pos_in_node_t first_moved; ++ pos_in_node_t head_removed_location; ++}; ++ ++static void init_cinfo(struct cut40_info *cinfo) ++{ ++ cinfo->mode = 0; ++ cinfo->tail_removed = MAX_POS_IN_NODE; ++ cinfo->first_removed = MAX_POS_IN_NODE; ++ cinfo->removed_count = MAX_POS_IN_NODE; ++ cinfo->head_removed = MAX_POS_IN_NODE; ++ cinfo->freed_space_start = MAX_POS_IN_NODE; ++ cinfo->freed_space_end = MAX_POS_IN_NODE; ++ cinfo->first_moved = MAX_POS_IN_NODE; ++ cinfo->head_removed_location = MAX_POS_IN_NODE; ++} ++ ++/* complete cut_node40/kill_node40 content by removing the gap created by */ ++static void compact(znode * node, struct cut40_info *cinfo) ++{ ++ node40_header *nh; ++ item_header40 *ih; ++ pos_in_node_t freed; ++ pos_in_node_t pos, nr_items; ++ ++ assert("vs-1526", (cinfo->freed_space_start != MAX_POS_IN_NODE && ++ cinfo->freed_space_end != MAX_POS_IN_NODE && ++ cinfo->first_moved != MAX_POS_IN_NODE)); ++ assert("vs-1523", cinfo->freed_space_end >= cinfo->freed_space_start); ++ ++ nh = node40_node_header(node); ++ nr_items = nh40_get_num_items(nh); ++ ++ /* remove gap made up by removal */ ++ memmove(zdata(node) + cinfo->freed_space_start, ++ zdata(node) + cinfo->freed_space_end, ++ nh40_get_free_space_start(nh) - cinfo->freed_space_end); ++ ++ /* update item headers of moved items - change their locations */ ++ pos = cinfo->first_moved; ++ ih = node40_ih_at(node, pos); ++ if (cinfo->head_removed_location != MAX_POS_IN_NODE) { ++ assert("vs-1580", pos == cinfo->head_removed); ++ ih40_set_offset(ih, cinfo->head_removed_location); ++ pos++; ++ ih--; ++ } ++ ++ freed = cinfo->freed_space_end - cinfo->freed_space_start; ++ for (; pos < nr_items; pos++, ih--) { ++ assert("vs-1581", ih == node40_ih_at(node, pos)); ++ ih40_set_offset(ih, ih40_get_offset(ih) - freed); ++ } ++ ++ /* free space start moved to right */ ++ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - freed); ++ ++ if (cinfo->removed_count != MAX_POS_IN_NODE) { ++ /* number of items changed. Remove item headers of those items */ ++ ih = node40_ih_at(node, nr_items - 1); ++ memmove(ih + cinfo->removed_count, ih, ++ sizeof(item_header40) * (nr_items - ++ cinfo->removed_count - ++ cinfo->first_removed)); ++ freed += sizeof(item_header40) * cinfo->removed_count; ++ node40_set_num_items(node, nh, nr_items - cinfo->removed_count); ++ } ++ ++ /* total amount of free space increased */ ++ nh40_set_free_space(nh, nh40_get_free_space(nh) + freed); ++} ++ ++int shrink_item_node40(coord_t * coord, int delta) ++{ ++ node40_header *nh; ++ item_header40 *ih; ++ pos_in_node_t pos; ++ pos_in_node_t nr_items; ++ char *end; ++ znode *node; ++ int off; ++ ++ assert("nikita-3487", coord != NULL); ++ assert("nikita-3488", delta >= 0); ++ ++ node = coord->node; ++ nh = node40_node_header(node); ++ nr_items = nh40_get_num_items(nh); ++ ++ ih = node40_ih_at_coord(coord); ++ assert("nikita-3489", delta <= length_by_coord_node40(coord)); ++ off = ih40_get_offset(ih) + length_by_coord_node40(coord); ++ end = zdata(node) + off; ++ ++ /* remove gap made up by removal */ ++ memmove(end - delta, end, nh40_get_free_space_start(nh) - off); ++ ++ /* update item headers of moved items - change their locations */ ++ pos = coord->item_pos + 1; ++ ih = node40_ih_at(node, pos); ++ for (; pos < nr_items; pos++, ih--) { ++ assert("nikita-3490", ih == node40_ih_at(node, pos)); ++ ih40_set_offset(ih, ih40_get_offset(ih) - delta); ++ } ++ ++ /* free space start moved to left */ ++ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - delta); ++ /* total amount of free space increased */ ++ nh40_set_free_space(nh, nh40_get_free_space(nh) + delta); ++ /* ++ * This method does _not_ changes number of items. Hence, it cannot ++ * make node empty. Also it doesn't remove items at all, which means ++ * that no keys have to be updated either. ++ */ ++ return 0; ++} ++ ++/* this is used by cut_node40 and kill_node40. It analyses input parameters and calculates cut mode. There are 2 types ++ of cut. First is when a unit is removed from the middle of an item. In this case this function returns 1. All the ++ rest fits into second case: 0 or 1 of items getting tail cut, 0 or more items removed completely and 0 or 1 item ++ getting head cut. Function returns 0 in this case */ ++static int ++parse_cut(struct cut40_info *cinfo, const struct cut_kill_params *params) ++{ ++ reiser4_key left_key, right_key; ++ reiser4_key min_from_key, max_to_key; ++ const reiser4_key *from_key, *to_key; ++ ++ init_cinfo(cinfo); ++ ++ /* calculate minimal key stored in first item of items to be cut (params->from) */ ++ item_key_by_coord(params->from, &min_from_key); ++ /* and max key stored in last item of items to be cut (params->to) */ ++ max_item_key_by_coord(params->to, &max_to_key); ++ ++ /* if cut key range is not defined in input parameters - define it using cut coord range */ ++ if (params->from_key == NULL) { ++ assert("vs-1513", params->to_key == NULL); ++ unit_key_by_coord(params->from, &left_key); ++ from_key = &left_key; ++ max_unit_key_by_coord(params->to, &right_key); ++ to_key = &right_key; ++ } else { ++ from_key = params->from_key; ++ to_key = params->to_key; ++ } ++ ++ if (params->from->item_pos == params->to->item_pos) { ++ if (keylt(&min_from_key, from_key) ++ && keylt(to_key, &max_to_key)) ++ return 1; ++ ++ if (keygt(from_key, &min_from_key)) { ++ /* tail of item is to be cut cut */ ++ cinfo->tail_removed = params->from->item_pos; ++ cinfo->mode |= CMODE_TAIL; ++ } else if (keylt(to_key, &max_to_key)) { ++ /* head of item is to be cut */ ++ cinfo->head_removed = params->from->item_pos; ++ cinfo->mode |= CMODE_HEAD; ++ } else { ++ /* item is removed completely */ ++ cinfo->first_removed = params->from->item_pos; ++ cinfo->removed_count = 1; ++ cinfo->mode |= CMODE_WHOLE; ++ } ++ } else { ++ cinfo->first_removed = params->from->item_pos + 1; ++ cinfo->removed_count = ++ params->to->item_pos - params->from->item_pos - 1; ++ ++ if (keygt(from_key, &min_from_key)) { ++ /* first item is not cut completely */ ++ cinfo->tail_removed = params->from->item_pos; ++ cinfo->mode |= CMODE_TAIL; ++ } else { ++ cinfo->first_removed--; ++ cinfo->removed_count++; ++ } ++ if (keylt(to_key, &max_to_key)) { ++ /* last item is not cut completely */ ++ cinfo->head_removed = params->to->item_pos; ++ cinfo->mode |= CMODE_HEAD; ++ } else { ++ cinfo->removed_count++; ++ } ++ if (cinfo->removed_count) ++ cinfo->mode |= CMODE_WHOLE; ++ } ++ ++ return 0; ++} ++ ++static void ++call_kill_hooks(znode * node, pos_in_node_t from, pos_in_node_t count, ++ carry_kill_data * kdata) ++{ ++ coord_t coord; ++ item_plugin *iplug; ++ pos_in_node_t pos; ++ ++ coord.node = node; ++ coord.unit_pos = 0; ++ coord.between = AT_UNIT; ++ for (pos = 0; pos < count; pos++) { ++ coord_set_item_pos(&coord, from + pos); ++ coord.unit_pos = 0; ++ coord.between = AT_UNIT; ++ iplug = item_plugin_by_coord(&coord); ++ if (iplug->b.kill_hook) { ++ iplug->b.kill_hook(&coord, 0, coord_num_units(&coord), ++ kdata); ++ } ++ } ++} ++ ++/* this is used to kill item partially */ ++static pos_in_node_t ++kill_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data, ++ reiser4_key * smallest_removed, reiser4_key * new_first_key) ++{ ++ struct carry_kill_data *kdata; ++ item_plugin *iplug; ++ ++ kdata = data; ++ iplug = item_plugin_by_coord(coord); ++ ++ assert("vs-1524", iplug->b.kill_units); ++ return iplug->b.kill_units(coord, from, to, kdata, smallest_removed, ++ new_first_key); ++} ++ ++/* call item plugin to cut tail of file */ ++static pos_in_node_t ++kill_tail(coord_t * coord, void *data, reiser4_key * smallest_removed) ++{ ++ struct carry_kill_data *kdata; ++ pos_in_node_t to; ++ ++ kdata = data; ++ to = coord_last_unit_pos(coord); ++ return kill_units(coord, coord->unit_pos, to, kdata, smallest_removed, ++ NULL); ++} ++ ++/* call item plugin to cut head of item */ ++static pos_in_node_t ++kill_head(coord_t * coord, void *data, reiser4_key * smallest_removed, ++ reiser4_key * new_first_key) ++{ ++ return kill_units(coord, 0, coord->unit_pos, data, smallest_removed, ++ new_first_key); ++} ++ ++/* this is used to cut item partially */ ++static pos_in_node_t ++cut_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data, ++ reiser4_key * smallest_removed, reiser4_key * new_first_key) ++{ ++ carry_cut_data *cdata; ++ item_plugin *iplug; ++ ++ cdata = data; ++ iplug = item_plugin_by_coord(coord); ++ assert("vs-302", iplug->b.cut_units); ++ return iplug->b.cut_units(coord, from, to, cdata, smallest_removed, ++ new_first_key); ++} ++ ++/* call item plugin to cut tail of file */ ++static pos_in_node_t ++cut_tail(coord_t * coord, void *data, reiser4_key * smallest_removed) ++{ ++ carry_cut_data *cdata; ++ pos_in_node_t to; ++ ++ cdata = data; ++ to = coord_last_unit_pos(cdata->params.from); ++ return cut_units(coord, coord->unit_pos, to, data, smallest_removed, NULL); ++} ++ ++/* call item plugin to cut head of item */ ++static pos_in_node_t ++cut_head(coord_t * coord, void *data, reiser4_key * smallest_removed, ++ reiser4_key * new_first_key) ++{ ++ return cut_units(coord, 0, coord->unit_pos, data, smallest_removed, ++ new_first_key); ++} ++ ++/* this returns 1 of key of first item changed, 0 - if it did not */ ++static int ++prepare_for_compact(struct cut40_info *cinfo, ++ const struct cut_kill_params *params, int is_cut, ++ void *data, carry_plugin_info * info) ++{ ++ znode *node; ++ item_header40 *ih; ++ pos_in_node_t freed; ++ pos_in_node_t item_pos; ++ coord_t coord; ++ reiser4_key new_first_key; ++ pos_in_node_t(*kill_units_f) (coord_t *, pos_in_node_t, pos_in_node_t, ++ void *, reiser4_key *, reiser4_key *); ++ pos_in_node_t(*kill_tail_f) (coord_t *, void *, reiser4_key *); ++ pos_in_node_t(*kill_head_f) (coord_t *, void *, reiser4_key *, ++ reiser4_key *); ++ int retval; ++ ++ retval = 0; ++ ++ node = params->from->node; ++ ++ assert("vs-184", node == params->to->node); ++ assert("vs-312", !node_is_empty(node)); ++ assert("vs-297", ++ coord_compare(params->from, params->to) != COORD_CMP_ON_RIGHT); ++ ++ if (is_cut) { ++ kill_units_f = cut_units; ++ kill_tail_f = cut_tail; ++ kill_head_f = cut_head; ++ } else { ++ kill_units_f = kill_units; ++ kill_tail_f = kill_tail; ++ kill_head_f = kill_head; ++ } ++ ++ if (parse_cut(cinfo, params) == 1) { ++ /* cut from the middle of item */ ++ freed = ++ kill_units_f(params->from, params->from->unit_pos, ++ params->to->unit_pos, data, ++ params->smallest_removed, NULL); ++ ++ item_pos = params->from->item_pos; ++ ih = node40_ih_at(node, item_pos); ++ cinfo->freed_space_start = ++ ih40_get_offset(ih) + node40_item_length(node, ++ item_pos) - freed; ++ cinfo->freed_space_end = cinfo->freed_space_start + freed; ++ cinfo->first_moved = item_pos + 1; ++ } else { ++ assert("vs-1521", (cinfo->tail_removed != MAX_POS_IN_NODE || ++ cinfo->first_removed != MAX_POS_IN_NODE || ++ cinfo->head_removed != MAX_POS_IN_NODE)); ++ ++ switch (cinfo->mode) { ++ case CMODE_TAIL: ++ /* one item gets cut partially from its end */ ++ assert("vs-1562", ++ cinfo->tail_removed == params->from->item_pos); ++ ++ freed = ++ kill_tail_f(params->from, data, ++ params->smallest_removed); ++ ++ item_pos = cinfo->tail_removed; ++ ih = node40_ih_at(node, item_pos); ++ cinfo->freed_space_start = ++ ih40_get_offset(ih) + node40_item_length(node, ++ item_pos) - ++ freed; ++ cinfo->freed_space_end = ++ cinfo->freed_space_start + freed; ++ cinfo->first_moved = cinfo->tail_removed + 1; ++ break; ++ ++ case CMODE_WHOLE: ++ /* one or more items get removed completely */ ++ assert("vs-1563", ++ cinfo->first_removed == params->from->item_pos); ++ assert("vs-1564", cinfo->removed_count > 0 ++ && cinfo->removed_count != MAX_POS_IN_NODE); ++ ++ /* call kill hook for all items removed completely */ ++ if (is_cut == 0) ++ call_kill_hooks(node, cinfo->first_removed, ++ cinfo->removed_count, data); ++ ++ item_pos = cinfo->first_removed; ++ ih = node40_ih_at(node, item_pos); ++ ++ if (params->smallest_removed) ++ memcpy(params->smallest_removed, &ih->key, ++ sizeof(reiser4_key)); ++ ++ cinfo->freed_space_start = ih40_get_offset(ih); ++ ++ item_pos += (cinfo->removed_count - 1); ++ ih -= (cinfo->removed_count - 1); ++ cinfo->freed_space_end = ++ ih40_get_offset(ih) + node40_item_length(node, ++ item_pos); ++ cinfo->first_moved = item_pos + 1; ++ if (cinfo->first_removed == 0) ++ /* key of first item of the node changes */ ++ retval = 1; ++ break; ++ ++ case CMODE_HEAD: ++ /* one item gets cut partially from its head */ ++ assert("vs-1565", ++ cinfo->head_removed == params->from->item_pos); ++ ++ freed = ++ kill_head_f(params->to, data, ++ params->smallest_removed, ++ &new_first_key); ++ ++ item_pos = cinfo->head_removed; ++ ih = node40_ih_at(node, item_pos); ++ cinfo->freed_space_start = ih40_get_offset(ih); ++ cinfo->freed_space_end = ih40_get_offset(ih) + freed; ++ cinfo->first_moved = cinfo->head_removed + 1; ++ ++ /* item head is removed, therefore, item key changed */ ++ coord.node = node; ++ coord_set_item_pos(&coord, item_pos); ++ coord.unit_pos = 0; ++ coord.between = AT_UNIT; ++ update_item_key_node40(&coord, &new_first_key, NULL); ++ if (item_pos == 0) ++ /* key of first item of the node changes */ ++ retval = 1; ++ break; ++ ++ case CMODE_TAIL | CMODE_WHOLE: ++ /* one item gets cut from its end and one or more items get removed completely */ ++ assert("vs-1566", ++ cinfo->tail_removed == params->from->item_pos); ++ assert("vs-1567", ++ cinfo->first_removed == cinfo->tail_removed + 1); ++ assert("vs-1564", cinfo->removed_count > 0 ++ && cinfo->removed_count != MAX_POS_IN_NODE); ++ ++ freed = ++ kill_tail_f(params->from, data, ++ params->smallest_removed); ++ ++ item_pos = cinfo->tail_removed; ++ ih = node40_ih_at(node, item_pos); ++ cinfo->freed_space_start = ++ ih40_get_offset(ih) + node40_item_length(node, ++ item_pos) - ++ freed; ++ ++ /* call kill hook for all items removed completely */ ++ if (is_cut == 0) ++ call_kill_hooks(node, cinfo->first_removed, ++ cinfo->removed_count, data); ++ ++ item_pos += cinfo->removed_count; ++ ih -= cinfo->removed_count; ++ cinfo->freed_space_end = ++ ih40_get_offset(ih) + node40_item_length(node, ++ item_pos); ++ cinfo->first_moved = item_pos + 1; ++ break; ++ ++ case CMODE_WHOLE | CMODE_HEAD: ++ /* one or more items get removed completely and one item gets cut partially from its head */ ++ assert("vs-1568", ++ cinfo->first_removed == params->from->item_pos); ++ assert("vs-1564", cinfo->removed_count > 0 ++ && cinfo->removed_count != MAX_POS_IN_NODE); ++ assert("vs-1569", ++ cinfo->head_removed == ++ cinfo->first_removed + cinfo->removed_count); ++ ++ /* call kill hook for all items removed completely */ ++ if (is_cut == 0) ++ call_kill_hooks(node, cinfo->first_removed, ++ cinfo->removed_count, data); ++ ++ item_pos = cinfo->first_removed; ++ ih = node40_ih_at(node, item_pos); ++ ++ if (params->smallest_removed) ++ memcpy(params->smallest_removed, &ih->key, ++ sizeof(reiser4_key)); ++ ++ freed = ++ kill_head_f(params->to, data, NULL, &new_first_key); ++ ++ cinfo->freed_space_start = ih40_get_offset(ih); ++ ++ ih = node40_ih_at(node, cinfo->head_removed); ++ /* this is the most complex case. Item which got head removed and items which are to be moved ++ intact change their location differently. */ ++ cinfo->freed_space_end = ih40_get_offset(ih) + freed; ++ cinfo->first_moved = cinfo->head_removed; ++ cinfo->head_removed_location = cinfo->freed_space_start; ++ ++ /* item head is removed, therefore, item key changed */ ++ coord.node = node; ++ coord_set_item_pos(&coord, cinfo->head_removed); ++ coord.unit_pos = 0; ++ coord.between = AT_UNIT; ++ update_item_key_node40(&coord, &new_first_key, NULL); ++ ++ assert("vs-1579", cinfo->first_removed == 0); ++ /* key of first item of the node changes */ ++ retval = 1; ++ break; ++ ++ case CMODE_TAIL | CMODE_HEAD: ++ /* one item get cut from its end and its neighbor gets cut from its tail */ ++ impossible("vs-1576", "this can not happen currently"); ++ break; ++ ++ case CMODE_TAIL | CMODE_WHOLE | CMODE_HEAD: ++ impossible("vs-1577", "this can not happen currently"); ++ break; ++ default: ++ impossible("vs-1578", "unexpected cut mode"); ++ break; ++ } ++ } ++ return retval; ++} ++ ++/* plugin->u.node.kill ++ return value is number of items removed completely */ ++int kill_node40(struct carry_kill_data *kdata, carry_plugin_info * info) ++{ ++ znode *node; ++ struct cut40_info cinfo; ++ int first_key_changed; ++ ++ node = kdata->params.from->node; ++ ++ first_key_changed = ++ prepare_for_compact(&cinfo, &kdata->params, 0 /* not cut */ , kdata, ++ info); ++ compact(node, &cinfo); ++ ++ if (info) { ++ /* it is not called by node40_shift, so we have to take care ++ of changes on upper levels */ ++ if (node_is_empty(node) ++ && !(kdata->flags & DELETE_RETAIN_EMPTY)) ++ /* all contents of node is deleted */ ++ prepare_removal_node40(node, info); ++ else if (first_key_changed) { ++ prepare_for_update(NULL, node, info); ++ } ++ } ++ ++ coord_clear_iplug(kdata->params.from); ++ coord_clear_iplug(kdata->params.to); ++ ++ znode_make_dirty(node); ++ return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count; ++} ++ ++/* plugin->u.node.cut ++ return value is number of items removed completely */ ++int cut_node40(struct carry_cut_data *cdata, carry_plugin_info * info) ++{ ++ znode *node; ++ struct cut40_info cinfo; ++ int first_key_changed; ++ ++ node = cdata->params.from->node; ++ ++ first_key_changed = ++ prepare_for_compact(&cinfo, &cdata->params, 1 /* not cut */ , cdata, ++ info); ++ compact(node, &cinfo); ++ ++ if (info) { ++ /* it is not called by node40_shift, so we have to take care ++ of changes on upper levels */ ++ if (node_is_empty(node)) ++ /* all contents of node is deleted */ ++ prepare_removal_node40(node, info); ++ else if (first_key_changed) { ++ prepare_for_update(NULL, node, info); ++ } ++ } ++ ++ coord_clear_iplug(cdata->params.from); ++ coord_clear_iplug(cdata->params.to); ++ ++ znode_make_dirty(node); ++ return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count; ++} ++ ++/* this structure is used by shift method of node40 plugin */ ++struct shift_params { ++ shift_direction pend; /* when @pend == append - we are shifting to ++ left, when @pend == prepend - to right */ ++ coord_t wish_stop; /* when shifting to left this is last unit we ++ want shifted, when shifting to right - this ++ is set to unit we want to start shifting ++ from */ ++ znode *target; ++ int everything; /* it is set to 1 if everything we have to shift is ++ shifted, 0 - otherwise */ ++ ++ /* FIXME-VS: get rid of read_stop */ ++ ++ /* these are set by estimate_shift */ ++ coord_t real_stop; /* this will be set to last unit which will be ++ really shifted */ ++ ++ /* coordinate in source node before operation of unit which becomes ++ first after shift to left of last after shift to right */ ++ union { ++ coord_t future_first; ++ coord_t future_last; ++ } u; ++ ++ unsigned merging_units; /* number of units of first item which have to ++ be merged with last item of target node */ ++ unsigned merging_bytes; /* number of bytes in those units */ ++ ++ unsigned entire; /* items shifted in their entirety */ ++ unsigned entire_bytes; /* number of bytes in those items */ ++ ++ unsigned part_units; /* number of units of partially copied item */ ++ unsigned part_bytes; /* number of bytes in those units */ ++ ++ unsigned shift_bytes; /* total number of bytes in items shifted (item ++ headers not included) */ ++ ++}; ++ ++static int item_creation_overhead(coord_t *item) ++{ ++ return node_plugin_by_coord(item)->item_overhead(item->node, NULL); ++} ++ ++/* how many units are there in @source starting from source->unit_pos ++ but not further than @stop_coord */ ++static int ++wanted_units(coord_t *source, coord_t *stop_coord, shift_direction pend) ++{ ++ if (pend == SHIFT_LEFT) { ++ assert("vs-181", source->unit_pos == 0); ++ } else { ++ assert("vs-182", ++ source->unit_pos == coord_last_unit_pos(source)); ++ } ++ ++ if (source->item_pos != stop_coord->item_pos) { ++ /* @source and @stop_coord are different items */ ++ return coord_last_unit_pos(source) + 1; ++ } ++ ++ if (pend == SHIFT_LEFT) { ++ return stop_coord->unit_pos + 1; ++ } else { ++ return source->unit_pos - stop_coord->unit_pos + 1; ++ } ++} ++ ++/* this calculates what can be copied from @shift->wish_stop.node to ++ @shift->target */ ++static void ++estimate_shift(struct shift_params *shift, const reiser4_context * ctx) ++{ ++ unsigned target_free_space, size; ++ pos_in_node_t stop_item; /* item which estimating should not consider */ ++ unsigned want; /* number of units of item we want shifted */ ++ coord_t source; /* item being estimated */ ++ item_plugin *iplug; ++ ++ /* shifting to left/right starts from first/last units of ++ @shift->wish_stop.node */ ++ if (shift->pend == SHIFT_LEFT) { ++ coord_init_first_unit(&source, shift->wish_stop.node); ++ } else { ++ coord_init_last_unit(&source, shift->wish_stop.node); ++ } ++ shift->real_stop = source; ++ ++ /* free space in target node and number of items in source */ ++ target_free_space = znode_free_space(shift->target); ++ ++ shift->everything = 0; ++ if (!node_is_empty(shift->target)) { ++ /* target node is not empty, check for boundary items ++ mergeability */ ++ coord_t to; ++ ++ /* item we try to merge @source with */ ++ if (shift->pend == SHIFT_LEFT) { ++ coord_init_last_unit(&to, shift->target); ++ } else { ++ coord_init_first_unit(&to, shift->target); ++ } ++ ++ if ((shift->pend == SHIFT_LEFT) ? are_items_mergeable(&to, ++ &source) : ++ are_items_mergeable(&source, &to)) { ++ /* how many units of @source do we want to merge to ++ item @to */ ++ want = ++ wanted_units(&source, &shift->wish_stop, ++ shift->pend); ++ ++ /* how many units of @source we can merge to item ++ @to */ ++ iplug = item_plugin_by_coord(&source); ++ if (iplug->b.can_shift != NULL) ++ shift->merging_units = ++ iplug->b.can_shift(target_free_space, ++ &source, shift->target, ++ shift->pend, &size, ++ want); ++ else { ++ shift->merging_units = 0; ++ size = 0; ++ } ++ shift->merging_bytes = size; ++ shift->shift_bytes += size; ++ /* update stop coord to be set to last unit of @source ++ we can merge to @target */ ++ if (shift->merging_units) ++ /* at least one unit can be shifted */ ++ shift->real_stop.unit_pos = ++ (shift->merging_units - source.unit_pos - ++ 1) * shift->pend; ++ else { ++ /* nothing can be shifted */ ++ if (shift->pend == SHIFT_LEFT) ++ coord_init_before_first_item(&shift-> ++ real_stop, ++ source. ++ node); ++ else ++ coord_init_after_last_item(&shift-> ++ real_stop, ++ source.node); ++ } ++ assert("nikita-2081", shift->real_stop.unit_pos + 1); ++ ++ if (shift->merging_units != want) { ++ /* we could not copy as many as we want, so, ++ there is no reason for estimating any ++ longer */ ++ return; ++ } ++ ++ target_free_space -= size; ++ coord_add_item_pos(&source, shift->pend); ++ } ++ } ++ ++ /* number of item nothing of which we want to shift */ ++ stop_item = shift->wish_stop.item_pos + shift->pend; ++ ++ /* calculate how many items can be copied into given free ++ space as whole */ ++ for (; source.item_pos != stop_item; ++ coord_add_item_pos(&source, shift->pend)) { ++ if (shift->pend == SHIFT_RIGHT) ++ source.unit_pos = coord_last_unit_pos(&source); ++ ++ /* how many units of @source do we want to copy */ ++ want = wanted_units(&source, &shift->wish_stop, shift->pend); ++ ++ if (want == coord_last_unit_pos(&source) + 1) { ++ /* we want this item to be copied entirely */ ++ size = ++ item_length_by_coord(&source) + ++ item_creation_overhead(&source); ++ if (size <= target_free_space) { ++ /* item fits into target node as whole */ ++ target_free_space -= size; ++ shift->shift_bytes += ++ size - item_creation_overhead(&source); ++ shift->entire_bytes += ++ size - item_creation_overhead(&source); ++ shift->entire++; ++ ++ /* update shift->real_stop coord to be set to ++ last unit of @source we can merge to ++ @target */ ++ shift->real_stop = source; ++ if (shift->pend == SHIFT_LEFT) ++ shift->real_stop.unit_pos = ++ coord_last_unit_pos(&shift-> ++ real_stop); ++ else ++ shift->real_stop.unit_pos = 0; ++ continue; ++ } ++ } ++ ++ /* we reach here only for an item which does not fit into ++ target node in its entirety. This item may be either ++ partially shifted, or not shifted at all. We will have to ++ create new item in target node, so decrease amout of free ++ space by an item creation overhead. We can reach here also ++ if stop coord is in this item */ ++ if (target_free_space >= ++ (unsigned)item_creation_overhead(&source)) { ++ target_free_space -= item_creation_overhead(&source); ++ iplug = item_plugin_by_coord(&source); ++ if (iplug->b.can_shift) { ++ shift->part_units = iplug->b.can_shift(target_free_space, ++ &source, ++ NULL, /* target */ ++ shift->pend, ++ &size, ++ want); ++ } else { ++ target_free_space = 0; ++ shift->part_units = 0; ++ size = 0; ++ } ++ } else { ++ target_free_space = 0; ++ shift->part_units = 0; ++ size = 0; ++ } ++ shift->part_bytes = size; ++ shift->shift_bytes += size; ++ ++ /* set @shift->real_stop to last unit of @source we can merge ++ to @shift->target */ ++ if (shift->part_units) { ++ shift->real_stop = source; ++ shift->real_stop.unit_pos = ++ (shift->part_units - source.unit_pos - ++ 1) * shift->pend; ++ assert("nikita-2082", shift->real_stop.unit_pos + 1); ++ } ++ ++ if (want != shift->part_units) ++ /* not everything wanted were shifted */ ++ return; ++ break; ++ } ++ ++ shift->everything = 1; ++} ++ ++static void ++copy_units(coord_t * target, coord_t * source, unsigned from, unsigned count, ++ shift_direction dir, unsigned free_space) ++{ ++ item_plugin *iplug; ++ ++ assert("nikita-1463", target != NULL); ++ assert("nikita-1464", source != NULL); ++ assert("nikita-1465", from + count <= coord_num_units(source)); ++ ++ iplug = item_plugin_by_coord(source); ++ assert("nikita-1468", iplug == item_plugin_by_coord(target)); ++ iplug->b.copy_units(target, source, from, count, dir, free_space); ++ ++ if (dir == SHIFT_RIGHT) { ++ /* FIXME-VS: this looks not necessary. update_item_key was ++ called already by copy_units method */ ++ reiser4_key split_key; ++ ++ assert("nikita-1469", target->unit_pos == 0); ++ ++ unit_key_by_coord(target, &split_key); ++ node_plugin_by_coord(target)->update_item_key(target, ++ &split_key, NULL); ++ } ++} ++ ++/* copy part of @shift->real_stop.node starting either from its beginning or ++ from its end and ending at @shift->real_stop to either the end or the ++ beginning of @shift->target */ ++static void copy(struct shift_params *shift) ++{ ++ node40_header *nh; ++ coord_t from; ++ coord_t to; ++ item_header40 *from_ih, *to_ih; ++ int free_space_start; ++ int new_items; ++ unsigned old_items; ++ int old_offset; ++ unsigned i; ++ ++ nh = node40_node_header(shift->target); ++ free_space_start = nh40_get_free_space_start(nh); ++ old_items = nh40_get_num_items(nh); ++ new_items = shift->entire + (shift->part_units ? 1 : 0); ++ assert("vs-185", ++ shift->shift_bytes == ++ shift->merging_bytes + shift->entire_bytes + shift->part_bytes); ++ ++ from = shift->wish_stop; ++ ++ coord_init_first_unit(&to, shift->target); ++ ++ /* NOTE:NIKITA->VS not sure what I am doing: shift->target is empty, ++ hence to.between is set to EMPTY_NODE above. Looks like we want it ++ to be AT_UNIT. ++ ++ Oh, wonders of ->betweeness... ++ ++ */ ++ to.between = AT_UNIT; ++ ++ if (shift->pend == SHIFT_LEFT) { ++ /* copying to left */ ++ ++ coord_set_item_pos(&from, 0); ++ from_ih = node40_ih_at(from.node, 0); ++ ++ coord_set_item_pos(&to, ++ node40_num_of_items_internal(to.node) - 1); ++ if (shift->merging_units) { ++ /* expand last item, so that plugin methods will see ++ correct data */ ++ free_space_start += shift->merging_bytes; ++ nh40_set_free_space_start(nh, ++ (unsigned)free_space_start); ++ nh40_set_free_space(nh, ++ nh40_get_free_space(nh) - ++ shift->merging_bytes); ++ ++ /* appending last item of @target */ ++ copy_units(&to, &from, 0, /* starting from 0-th unit */ ++ shift->merging_units, SHIFT_LEFT, ++ shift->merging_bytes); ++ coord_inc_item_pos(&from); ++ from_ih--; ++ coord_inc_item_pos(&to); ++ } ++ ++ to_ih = node40_ih_at(shift->target, old_items); ++ if (shift->entire) { ++ /* copy @entire items entirely */ ++ ++ /* copy item headers */ ++ memcpy(to_ih - shift->entire + 1, ++ from_ih - shift->entire + 1, ++ shift->entire * sizeof(item_header40)); ++ /* update item header offset */ ++ old_offset = ih40_get_offset(from_ih); ++ /* AUDIT: Looks like if we calculate old_offset + free_space_start here instead of just old_offset, we can perform one "add" operation less per each iteration */ ++ for (i = 0; i < shift->entire; i++, to_ih--, from_ih--) ++ ih40_set_offset(to_ih, ++ ih40_get_offset(from_ih) - ++ old_offset + free_space_start); ++ ++ /* copy item bodies */ ++ memcpy(zdata(shift->target) + free_space_start, zdata(from.node) + old_offset, /*ih40_get_offset (from_ih), */ ++ shift->entire_bytes); ++ ++ coord_add_item_pos(&from, (int)shift->entire); ++ coord_add_item_pos(&to, (int)shift->entire); ++ } ++ ++ nh40_set_free_space_start(nh, ++ free_space_start + ++ shift->shift_bytes - ++ shift->merging_bytes); ++ nh40_set_free_space(nh, ++ nh40_get_free_space(nh) - ++ (shift->shift_bytes - shift->merging_bytes + ++ sizeof(item_header40) * new_items)); ++ ++ /* update node header */ ++ node40_set_num_items(shift->target, nh, old_items + new_items); ++ assert("vs-170", ++ nh40_get_free_space(nh) < znode_size(shift->target)); ++ ++ if (shift->part_units) { ++ /* copy heading part (@part units) of @source item as ++ a new item into @target->node */ ++ ++ /* copy item header of partially copied item */ ++ coord_set_item_pos(&to, ++ node40_num_of_items_internal(to.node) ++ - 1); ++ memcpy(to_ih, from_ih, sizeof(item_header40)); ++ ih40_set_offset(to_ih, ++ nh40_get_free_space_start(nh) - ++ shift->part_bytes); ++ if (item_plugin_by_coord(&to)->b.init) ++ item_plugin_by_coord(&to)->b.init(&to, &from, ++ NULL); ++ copy_units(&to, &from, 0, shift->part_units, SHIFT_LEFT, ++ shift->part_bytes); ++ } ++ ++ } else { ++ /* copying to right */ ++ ++ coord_set_item_pos(&from, ++ node40_num_of_items_internal(from.node) - 1); ++ from_ih = node40_ih_at_coord(&from); ++ ++ coord_set_item_pos(&to, 0); ++ ++ /* prepare space for new items */ ++ memmove(zdata(to.node) + sizeof(node40_header) + ++ shift->shift_bytes, ++ zdata(to.node) + sizeof(node40_header), ++ free_space_start - sizeof(node40_header)); ++ /* update item headers of moved items */ ++ to_ih = node40_ih_at(to.node, 0); ++ /* first item gets @merging_bytes longer. free space appears ++ at its beginning */ ++ if (!node_is_empty(to.node)) ++ ih40_set_offset(to_ih, ++ ih40_get_offset(to_ih) + ++ shift->shift_bytes - ++ shift->merging_bytes); ++ ++ for (i = 1; i < old_items; i++) ++ ih40_set_offset(to_ih - i, ++ ih40_get_offset(to_ih - i) + ++ shift->shift_bytes); ++ ++ /* move item headers to make space for new items */ ++ memmove(to_ih - old_items + 1 - new_items, ++ to_ih - old_items + 1, ++ sizeof(item_header40) * old_items); ++ to_ih -= (new_items - 1); ++ ++ nh40_set_free_space_start(nh, ++ free_space_start + ++ shift->shift_bytes); ++ nh40_set_free_space(nh, ++ nh40_get_free_space(nh) - ++ (shift->shift_bytes + ++ sizeof(item_header40) * new_items)); ++ ++ /* update node header */ ++ node40_set_num_items(shift->target, nh, old_items + new_items); ++ assert("vs-170", ++ nh40_get_free_space(nh) < znode_size(shift->target)); ++ ++ if (shift->merging_units) { ++ coord_add_item_pos(&to, new_items); ++ to.unit_pos = 0; ++ to.between = AT_UNIT; ++ /* prepend first item of @to */ ++ copy_units(&to, &from, ++ coord_last_unit_pos(&from) - ++ shift->merging_units + 1, ++ shift->merging_units, SHIFT_RIGHT, ++ shift->merging_bytes); ++ coord_dec_item_pos(&from); ++ from_ih++; ++ } ++ ++ if (shift->entire) { ++ /* copy @entire items entirely */ ++ ++ /* copy item headers */ ++ memcpy(to_ih, from_ih, ++ shift->entire * sizeof(item_header40)); ++ ++ /* update item header offset */ ++ old_offset = ++ ih40_get_offset(from_ih + shift->entire - 1); ++ /* AUDIT: old_offset + sizeof (node40_header) + shift->part_bytes calculation can be taken off the loop. */ ++ for (i = 0; i < shift->entire; i++, to_ih++, from_ih++) ++ ih40_set_offset(to_ih, ++ ih40_get_offset(from_ih) - ++ old_offset + ++ sizeof(node40_header) + ++ shift->part_bytes); ++ /* copy item bodies */ ++ coord_add_item_pos(&from, -(int)(shift->entire - 1)); ++ memcpy(zdata(to.node) + sizeof(node40_header) + ++ shift->part_bytes, item_by_coord_node40(&from), ++ shift->entire_bytes); ++ coord_dec_item_pos(&from); ++ } ++ ++ if (shift->part_units) { ++ coord_set_item_pos(&to, 0); ++ to.unit_pos = 0; ++ to.between = AT_UNIT; ++ /* copy heading part (@part units) of @source item as ++ a new item into @target->node */ ++ ++ /* copy item header of partially copied item */ ++ memcpy(to_ih, from_ih, sizeof(item_header40)); ++ ih40_set_offset(to_ih, sizeof(node40_header)); ++ if (item_plugin_by_coord(&to)->b.init) ++ item_plugin_by_coord(&to)->b.init(&to, &from, ++ NULL); ++ copy_units(&to, &from, ++ coord_last_unit_pos(&from) - ++ shift->part_units + 1, shift->part_units, ++ SHIFT_RIGHT, shift->part_bytes); ++ } ++ } ++} ++ ++/* remove everything either before or after @fact_stop. Number of items ++ removed completely is returned */ ++static int delete_copied(struct shift_params *shift) ++{ ++ coord_t from; ++ coord_t to; ++ struct carry_cut_data cdata; ++ ++ if (shift->pend == SHIFT_LEFT) { ++ /* we were shifting to left, remove everything from the ++ beginning of @shift->wish_stop->node upto ++ @shift->wish_stop */ ++ coord_init_first_unit(&from, shift->real_stop.node); ++ to = shift->real_stop; ++ ++ /* store old coordinate of unit which will be first after ++ shift to left */ ++ shift->u.future_first = to; ++ coord_next_unit(&shift->u.future_first); ++ } else { ++ /* we were shifting to right, remove everything from ++ @shift->stop_coord upto to end of ++ @shift->stop_coord->node */ ++ from = shift->real_stop; ++ coord_init_last_unit(&to, from.node); ++ ++ /* store old coordinate of unit which will be last after ++ shift to right */ ++ shift->u.future_last = from; ++ coord_prev_unit(&shift->u.future_last); ++ } ++ ++ cdata.params.from = &from; ++ cdata.params.to = &to; ++ cdata.params.from_key = NULL; ++ cdata.params.to_key = NULL; ++ cdata.params.smallest_removed = NULL; ++ return cut_node40(&cdata, NULL); ++} ++ ++/* something was moved between @left and @right. Add carry operation to @info ++ list to have carry to update delimiting key between them */ ++static int ++prepare_for_update(znode * left, znode * right, carry_plugin_info * info) ++{ ++ carry_op *op; ++ carry_node *cn; ++ ++ if (info == NULL) ++ /* nowhere to send operation to. */ ++ return 0; ++ ++ if (!should_notify_parent(right)) ++ return 0; ++ ++ op = node_post_carry(info, COP_UPDATE, right, 1); ++ if (IS_ERR(op) || op == NULL) ++ return op ? PTR_ERR(op) : -EIO; ++ ++ if (left != NULL) { ++ carry_node *reference; ++ ++ if (info->doing) ++ reference = insert_carry_node(info->doing, ++ info->todo, left); ++ else ++ reference = op->node; ++ assert("nikita-2992", reference != NULL); ++ cn = reiser4_add_carry(info->todo, POOLO_BEFORE, reference); ++ if (IS_ERR(cn)) ++ return PTR_ERR(cn); ++ cn->parent = 1; ++ cn->node = left; ++ if (ZF_ISSET(left, JNODE_ORPHAN)) ++ cn->left_before = 1; ++ op->u.update.left = cn; ++ } else ++ op->u.update.left = NULL; ++ return 0; ++} ++ ++/* plugin->u.node.prepare_removal ++ to delete a pointer to @empty from the tree add corresponding carry ++ operation (delete) to @info list */ ++int prepare_removal_node40(znode * empty, carry_plugin_info * info) ++{ ++ carry_op *op; ++ reiser4_tree *tree; ++ ++ if (!should_notify_parent(empty)) ++ return 0; ++ /* already on a road to Styx */ ++ if (ZF_ISSET(empty, JNODE_HEARD_BANSHEE)) ++ return 0; ++ op = node_post_carry(info, COP_DELETE, empty, 1); ++ if (IS_ERR(op) || op == NULL) ++ return RETERR(op ? PTR_ERR(op) : -EIO); ++ ++ op->u.delete.child = NULL; ++ op->u.delete.flags = 0; ++ ++ /* fare thee well */ ++ tree = znode_get_tree(empty); ++ read_lock_tree(tree); ++ write_lock_dk(tree); ++ znode_set_ld_key(empty, znode_get_rd_key(empty)); ++ if (znode_is_left_connected(empty) && empty->left) ++ znode_set_rd_key(empty->left, znode_get_rd_key(empty)); ++ write_unlock_dk(tree); ++ read_unlock_tree(tree); ++ ++ ZF_SET(empty, JNODE_HEARD_BANSHEE); ++ return 0; ++} ++ ++/* something were shifted from @insert_coord->node to @shift->target, update ++ @insert_coord correspondingly */ ++static void ++adjust_coord(coord_t * insert_coord, struct shift_params *shift, int removed, ++ int including_insert_coord) ++{ ++ /* item plugin was invalidated by shifting */ ++ coord_clear_iplug(insert_coord); ++ ++ if (node_is_empty(shift->wish_stop.node)) { ++ assert("vs-242", shift->everything); ++ if (including_insert_coord) { ++ if (shift->pend == SHIFT_RIGHT) { ++ /* set @insert_coord before first unit of ++ @shift->target node */ ++ coord_init_before_first_item(insert_coord, ++ shift->target); ++ } else { ++ /* set @insert_coord after last in target node */ ++ coord_init_after_last_item(insert_coord, ++ shift->target); ++ } ++ } else { ++ /* set @insert_coord inside of empty node. There is ++ only one possible coord within an empty ++ node. init_first_unit will set that coord */ ++ coord_init_first_unit(insert_coord, ++ shift->wish_stop.node); ++ } ++ return; ++ } ++ ++ if (shift->pend == SHIFT_RIGHT) { ++ /* there was shifting to right */ ++ if (shift->everything) { ++ /* everything wanted was shifted */ ++ if (including_insert_coord) { ++ /* @insert_coord is set before first unit of ++ @to node */ ++ coord_init_before_first_item(insert_coord, ++ shift->target); ++ insert_coord->between = BEFORE_UNIT; ++ } else { ++ /* @insert_coord is set after last unit of ++ @insert->node */ ++ coord_init_last_unit(insert_coord, ++ shift->wish_stop.node); ++ insert_coord->between = AFTER_UNIT; ++ } ++ } ++ return; ++ } ++ ++ /* there was shifting to left */ ++ if (shift->everything) { ++ /* everything wanted was shifted */ ++ if (including_insert_coord) { ++ /* @insert_coord is set after last unit in @to node */ ++ coord_init_after_last_item(insert_coord, shift->target); ++ } else { ++ /* @insert_coord is set before first unit in the same ++ node */ ++ coord_init_before_first_item(insert_coord, ++ shift->wish_stop.node); ++ } ++ return; ++ } ++ ++ /* FIXME-VS: the code below is complicated because with between == ++ AFTER_ITEM unit_pos is set to 0 */ ++ ++ if (!removed) { ++ /* no items were shifted entirely */ ++ assert("vs-195", shift->merging_units == 0 ++ || shift->part_units == 0); ++ ++ if (shift->real_stop.item_pos == insert_coord->item_pos) { ++ if (shift->merging_units) { ++ if (insert_coord->between == AFTER_UNIT) { ++ assert("nikita-1441", ++ insert_coord->unit_pos >= ++ shift->merging_units); ++ insert_coord->unit_pos -= ++ shift->merging_units; ++ } else if (insert_coord->between == BEFORE_UNIT) { ++ assert("nikita-2090", ++ insert_coord->unit_pos > ++ shift->merging_units); ++ insert_coord->unit_pos -= ++ shift->merging_units; ++ } ++ ++ assert("nikita-2083", ++ insert_coord->unit_pos + 1); ++ } else { ++ if (insert_coord->between == AFTER_UNIT) { ++ assert("nikita-1442", ++ insert_coord->unit_pos >= ++ shift->part_units); ++ insert_coord->unit_pos -= ++ shift->part_units; ++ } else if (insert_coord->between == BEFORE_UNIT) { ++ assert("nikita-2089", ++ insert_coord->unit_pos > ++ shift->part_units); ++ insert_coord->unit_pos -= ++ shift->part_units; ++ } ++ ++ assert("nikita-2084", ++ insert_coord->unit_pos + 1); ++ } ++ } ++ return; ++ } ++ ++ /* we shifted to left and there was no enough space for everything */ ++ switch (insert_coord->between) { ++ case AFTER_UNIT: ++ case BEFORE_UNIT: ++ if (shift->real_stop.item_pos == insert_coord->item_pos) ++ insert_coord->unit_pos -= shift->part_units; ++ case AFTER_ITEM: ++ coord_add_item_pos(insert_coord, -removed); ++ break; ++ default: ++ impossible("nikita-2087", "not ready"); ++ } ++ assert("nikita-2085", insert_coord->unit_pos + 1); ++} ++ ++static int call_shift_hooks(struct shift_params *shift) ++{ ++ unsigned i, shifted; ++ coord_t coord; ++ item_plugin *iplug; ++ ++ assert("vs-275", !node_is_empty(shift->target)); ++ ++ /* number of items shift touches */ ++ shifted = ++ shift->entire + (shift->merging_units ? 1 : 0) + ++ (shift->part_units ? 1 : 0); ++ ++ if (shift->pend == SHIFT_LEFT) { ++ /* moved items are at the end */ ++ coord_init_last_unit(&coord, shift->target); ++ coord.unit_pos = 0; ++ ++ assert("vs-279", shift->pend == 1); ++ for (i = 0; i < shifted; i++) { ++ unsigned from, count; ++ ++ iplug = item_plugin_by_coord(&coord); ++ if (i == 0 && shift->part_units) { ++ assert("vs-277", ++ coord_num_units(&coord) == ++ shift->part_units); ++ count = shift->part_units; ++ from = 0; ++ } else if (i == shifted - 1 && shift->merging_units) { ++ count = shift->merging_units; ++ from = coord_num_units(&coord) - count; ++ } else { ++ count = coord_num_units(&coord); ++ from = 0; ++ } ++ ++ if (iplug->b.shift_hook) { ++ iplug->b.shift_hook(&coord, from, count, ++ shift->wish_stop.node); ++ } ++ coord_add_item_pos(&coord, -shift->pend); ++ } ++ } else { ++ /* moved items are at the beginning */ ++ coord_init_first_unit(&coord, shift->target); ++ ++ assert("vs-278", shift->pend == -1); ++ for (i = 0; i < shifted; i++) { ++ unsigned from, count; ++ ++ iplug = item_plugin_by_coord(&coord); ++ if (i == 0 && shift->part_units) { ++ assert("vs-277", ++ coord_num_units(&coord) == ++ shift->part_units); ++ count = coord_num_units(&coord); ++ from = 0; ++ } else if (i == shifted - 1 && shift->merging_units) { ++ count = shift->merging_units; ++ from = 0; ++ } else { ++ count = coord_num_units(&coord); ++ from = 0; ++ } ++ ++ if (iplug->b.shift_hook) { ++ iplug->b.shift_hook(&coord, from, count, ++ shift->wish_stop.node); ++ } ++ coord_add_item_pos(&coord, -shift->pend); ++ } ++ } ++ ++ return 0; ++} ++ ++/* shift to left is completed. Return 1 if unit @old was moved to left neighbor */ ++static int ++unit_moved_left(const struct shift_params *shift, const coord_t * old) ++{ ++ assert("vs-944", shift->real_stop.node == old->node); ++ ++ if (shift->real_stop.item_pos < old->item_pos) ++ return 0; ++ if (shift->real_stop.item_pos == old->item_pos) { ++ if (shift->real_stop.unit_pos < old->unit_pos) ++ return 0; ++ } ++ return 1; ++} ++ ++/* shift to right is completed. Return 1 if unit @old was moved to right ++ neighbor */ ++static int ++unit_moved_right(const struct shift_params *shift, const coord_t * old) ++{ ++ assert("vs-944", shift->real_stop.node == old->node); ++ ++ if (shift->real_stop.item_pos > old->item_pos) ++ return 0; ++ if (shift->real_stop.item_pos == old->item_pos) { ++ if (shift->real_stop.unit_pos > old->unit_pos) ++ return 0; ++ } ++ return 1; ++} ++ ++/* coord @old was set in node from which shift was performed. What was shifted ++ is stored in @shift. Update @old correspondingly to performed shift */ ++static coord_t *adjust_coord2(const struct shift_params *shift, ++ const coord_t * old, coord_t * new) ++{ ++ coord_clear_iplug(new); ++ new->between = old->between; ++ ++ coord_clear_iplug(new); ++ if (old->node == shift->target) { ++ if (shift->pend == SHIFT_LEFT) { ++ /* coord which is set inside of left neighbor does not ++ change during shift to left */ ++ coord_dup(new, old); ++ return new; ++ } ++ new->node = old->node; ++ coord_set_item_pos(new, ++ old->item_pos + shift->entire + ++ (shift->part_units ? 1 : 0)); ++ new->unit_pos = old->unit_pos; ++ if (old->item_pos == 0 && shift->merging_units) ++ new->unit_pos += shift->merging_units; ++ return new; ++ } ++ ++ assert("vs-977", old->node == shift->wish_stop.node); ++ if (shift->pend == SHIFT_LEFT) { ++ if (unit_moved_left(shift, old)) { ++ /* unit @old moved to left neighbor. Calculate its ++ coordinate there */ ++ new->node = shift->target; ++ coord_set_item_pos(new, ++ node_num_items(shift->target) - ++ shift->entire - ++ (shift->part_units ? 1 : 0) + ++ old->item_pos); ++ ++ new->unit_pos = old->unit_pos; ++ if (shift->merging_units) { ++ coord_dec_item_pos(new); ++ if (old->item_pos == 0) { ++ /* unit_pos only changes if item got ++ merged */ ++ new->unit_pos = ++ coord_num_units(new) - ++ (shift->merging_units - ++ old->unit_pos); ++ } ++ } ++ } else { ++ /* unit @old did not move to left neighbor. ++ ++ Use _nocheck, because @old is outside of its node. ++ */ ++ coord_dup_nocheck(new, old); ++ coord_add_item_pos(new, ++ -shift->u.future_first.item_pos); ++ if (new->item_pos == 0) ++ new->unit_pos -= shift->u.future_first.unit_pos; ++ } ++ } else { ++ if (unit_moved_right(shift, old)) { ++ /* unit @old moved to right neighbor */ ++ new->node = shift->target; ++ coord_set_item_pos(new, ++ old->item_pos - ++ shift->real_stop.item_pos); ++ if (new->item_pos == 0) { ++ /* unit @old might change unit pos */ ++ coord_set_item_pos(new, ++ old->unit_pos - ++ shift->real_stop.unit_pos); ++ } ++ } else { ++ /* unit @old did not move to right neighbor, therefore ++ it did not change */ ++ coord_dup(new, old); ++ } ++ } ++ coord_set_iplug(new, item_plugin_by_coord(new)); ++ return new; ++} ++ ++/* this is called when shift is completed (something of source node is copied ++ to target and deleted in source) to update all taps set in current ++ context */ ++static void update_taps(const struct shift_params *shift) ++{ ++ tap_t *tap; ++ coord_t new; ++ ++ for_all_taps(tap) { ++ /* update only taps set to nodes participating in shift */ ++ if (tap->coord->node == shift->wish_stop.node ++ || tap->coord->node == shift->target) ++ tap_to_coord(tap, ++ adjust_coord2(shift, tap->coord, &new)); ++ } ++} ++ ++#if REISER4_DEBUG ++ ++struct shift_check { ++ reiser4_key key; ++ __u16 plugin_id; ++ union { ++ __u64 bytes; ++ __u64 entries; ++ void *unused; ++ } u; ++}; ++ ++void *shift_check_prepare(const znode * left, const znode * right) ++{ ++ pos_in_node_t i, nr_items; ++ int mergeable; ++ struct shift_check *data; ++ item_header40 *ih; ++ ++ if (node_is_empty(left) || node_is_empty(right)) ++ mergeable = 0; ++ else { ++ coord_t l, r; ++ ++ coord_init_last_unit(&l, left); ++ coord_init_first_unit(&r, right); ++ mergeable = are_items_mergeable(&l, &r); ++ } ++ nr_items = ++ node40_num_of_items_internal(left) + ++ node40_num_of_items_internal(right) - (mergeable ? 1 : 0); ++ data = ++ kmalloc(sizeof(struct shift_check) * nr_items, ++ reiser4_ctx_gfp_mask_get()); ++ if (data != NULL) { ++ coord_t coord; ++ pos_in_node_t item_pos; ++ ++ coord_init_first_unit(&coord, left); ++ i = 0; ++ ++ for (item_pos = 0; ++ item_pos < node40_num_of_items_internal(left); ++ item_pos++) { ++ ++ coord_set_item_pos(&coord, item_pos); ++ ih = node40_ih_at_coord(&coord); ++ ++ data[i].key = ih->key; ++ data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id)); ++ switch (data[i].plugin_id) { ++ case CTAIL_ID: ++ case FORMATTING_ID: ++ data[i].u.bytes = coord_num_units(&coord); ++ break; ++ case EXTENT_POINTER_ID: ++ data[i].u.bytes = ++ reiser4_extent_size(&coord, ++ coord_num_units(&coord)); ++ break; ++ case COMPOUND_DIR_ID: ++ data[i].u.entries = coord_num_units(&coord); ++ break; ++ default: ++ data[i].u.unused = NULL; ++ break; ++ } ++ i++; ++ } ++ ++ coord_init_first_unit(&coord, right); ++ ++ if (mergeable) { ++ assert("vs-1609", i != 0); ++ ++ ih = node40_ih_at_coord(&coord); ++ ++ assert("vs-1589", ++ data[i - 1].plugin_id == ++ le16_to_cpu(get_unaligned(&ih->plugin_id))); ++ switch (data[i - 1].plugin_id) { ++ case CTAIL_ID: ++ case FORMATTING_ID: ++ data[i - 1].u.bytes += coord_num_units(&coord); ++ break; ++ case EXTENT_POINTER_ID: ++ data[i - 1].u.bytes += ++ reiser4_extent_size(&coord, ++ coord_num_units(&coord)); ++ break; ++ case COMPOUND_DIR_ID: ++ data[i - 1].u.entries += ++ coord_num_units(&coord); ++ break; ++ default: ++ impossible("vs-1605", "wrong mergeable item"); ++ break; ++ } ++ item_pos = 1; ++ } else ++ item_pos = 0; ++ for (; item_pos < node40_num_of_items_internal(right); ++ item_pos++) { ++ ++ assert("vs-1604", i < nr_items); ++ coord_set_item_pos(&coord, item_pos); ++ ih = node40_ih_at_coord(&coord); ++ ++ data[i].key = ih->key; ++ data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id)); ++ switch (data[i].plugin_id) { ++ case CTAIL_ID: ++ case FORMATTING_ID: ++ data[i].u.bytes = coord_num_units(&coord); ++ break; ++ case EXTENT_POINTER_ID: ++ data[i].u.bytes = ++ reiser4_extent_size(&coord, ++ coord_num_units(&coord)); ++ break; ++ case COMPOUND_DIR_ID: ++ data[i].u.entries = coord_num_units(&coord); ++ break; ++ default: ++ data[i].u.unused = NULL; ++ break; ++ } ++ i++; ++ } ++ assert("vs-1606", i == nr_items); ++ } ++ return data; ++} ++ ++void shift_check(void *vp, const znode * left, const znode * right) ++{ ++ pos_in_node_t i, nr_items; ++ coord_t coord; ++ __u64 last_bytes; ++ int mergeable; ++ item_header40 *ih; ++ pos_in_node_t item_pos; ++ struct shift_check *data; ++ ++ data = (struct shift_check *)vp; ++ ++ if (data == NULL) ++ return; ++ ++ if (node_is_empty(left) || node_is_empty(right)) ++ mergeable = 0; ++ else { ++ coord_t l, r; ++ ++ coord_init_last_unit(&l, left); ++ coord_init_first_unit(&r, right); ++ mergeable = are_items_mergeable(&l, &r); ++ } ++ ++ nr_items = ++ node40_num_of_items_internal(left) + ++ node40_num_of_items_internal(right) - (mergeable ? 1 : 0); ++ ++ i = 0; ++ last_bytes = 0; ++ ++ coord_init_first_unit(&coord, left); ++ ++ for (item_pos = 0; item_pos < node40_num_of_items_internal(left); ++ item_pos++) { ++ ++ coord_set_item_pos(&coord, item_pos); ++ ih = node40_ih_at_coord(&coord); ++ ++ assert("vs-1611", i == item_pos); ++ assert("vs-1590", keyeq(&ih->key, &data[i].key)); ++ assert("vs-1591", ++ le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id); ++ if ((i < (node40_num_of_items_internal(left) - 1)) ++ || !mergeable) { ++ switch (data[i].plugin_id) { ++ case CTAIL_ID: ++ case FORMATTING_ID: ++ assert("vs-1592", ++ data[i].u.bytes == ++ coord_num_units(&coord)); ++ break; ++ case EXTENT_POINTER_ID: ++ assert("vs-1593", ++ data[i].u.bytes == ++ reiser4_extent_size(&coord, ++ coord_num_units ++ (&coord))); ++ break; ++ case COMPOUND_DIR_ID: ++ assert("vs-1594", ++ data[i].u.entries == ++ coord_num_units(&coord)); ++ break; ++ default: ++ break; ++ } ++ } ++ if (item_pos == (node40_num_of_items_internal(left) - 1) ++ && mergeable) { ++ switch (data[i].plugin_id) { ++ case CTAIL_ID: ++ case FORMATTING_ID: ++ last_bytes = coord_num_units(&coord); ++ break; ++ case EXTENT_POINTER_ID: ++ last_bytes = ++ reiser4_extent_size(&coord, ++ coord_num_units(&coord)); ++ break; ++ case COMPOUND_DIR_ID: ++ last_bytes = coord_num_units(&coord); ++ break; ++ default: ++ impossible("vs-1595", "wrong mergeable item"); ++ break; ++ } ++ } ++ i++; ++ } ++ ++ coord_init_first_unit(&coord, right); ++ if (mergeable) { ++ ih = node40_ih_at_coord(&coord); ++ ++ assert("vs-1589", ++ data[i - 1].plugin_id == le16_to_cpu(get_unaligned(&ih->plugin_id))); ++ assert("vs-1608", last_bytes != 0); ++ switch (data[i - 1].plugin_id) { ++ case CTAIL_ID: ++ case FORMATTING_ID: ++ assert("vs-1596", ++ data[i - 1].u.bytes == ++ last_bytes + coord_num_units(&coord)); ++ break; ++ ++ case EXTENT_POINTER_ID: ++ assert("vs-1597", ++ data[i - 1].u.bytes == ++ last_bytes + reiser4_extent_size(&coord, ++ coord_num_units ++ (&coord))); ++ break; ++ ++ case COMPOUND_DIR_ID: ++ assert("vs-1598", ++ data[i - 1].u.bytes == ++ last_bytes + coord_num_units(&coord)); ++ break; ++ default: ++ impossible("vs-1599", "wrong mergeable item"); ++ break; ++ } ++ item_pos = 1; ++ } else ++ item_pos = 0; ++ ++ for (; item_pos < node40_num_of_items_internal(right); item_pos++) { ++ ++ coord_set_item_pos(&coord, item_pos); ++ ih = node40_ih_at_coord(&coord); ++ ++ assert("vs-1612", keyeq(&ih->key, &data[i].key)); ++ assert("vs-1613", ++ le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id); ++ switch (data[i].plugin_id) { ++ case CTAIL_ID: ++ case FORMATTING_ID: ++ assert("vs-1600", ++ data[i].u.bytes == coord_num_units(&coord)); ++ break; ++ case EXTENT_POINTER_ID: ++ assert("vs-1601", ++ data[i].u.bytes == ++ reiser4_extent_size(&coord, ++ coord_num_units ++ (&coord))); ++ break; ++ case COMPOUND_DIR_ID: ++ assert("vs-1602", ++ data[i].u.entries == coord_num_units(&coord)); ++ break; ++ default: ++ break; ++ } ++ i++; ++ } ++ ++ assert("vs-1603", i == nr_items); ++ kfree(data); ++} ++ ++#endif ++ ++/* plugin->u.node.shift ++ look for description of this method in plugin/node/node.h */ ++int shift_node40(coord_t * from, znode * to, shift_direction pend, int delete_child, /* if @from->node becomes empty - it will be ++ deleted from the tree if this is set to 1 */ ++ int including_stop_coord, carry_plugin_info * info) ++{ ++ struct shift_params shift; ++ int result; ++ znode *left, *right; ++ znode *source; ++ int target_empty; ++ ++ assert("nikita-2161", coord_check(from)); ++ ++ memset(&shift, 0, sizeof(shift)); ++ shift.pend = pend; ++ shift.wish_stop = *from; ++ shift.target = to; ++ ++ assert("nikita-1473", znode_is_write_locked(from->node)); ++ assert("nikita-1474", znode_is_write_locked(to)); ++ ++ source = from->node; ++ ++ /* set @shift.wish_stop to rightmost/leftmost unit among units we want ++ shifted */ ++ if (pend == SHIFT_LEFT) { ++ result = coord_set_to_left(&shift.wish_stop); ++ left = to; ++ right = from->node; ++ } else { ++ result = coord_set_to_right(&shift.wish_stop); ++ left = from->node; ++ right = to; ++ } ++ ++ if (result) { ++ /* move insertion coord even if there is nothing to move */ ++ if (including_stop_coord) { ++ /* move insertion coord (@from) */ ++ if (pend == SHIFT_LEFT) { ++ /* after last item in target node */ ++ coord_init_after_last_item(from, to); ++ } else { ++ /* before first item in target node */ ++ coord_init_before_first_item(from, to); ++ } ++ } ++ ++ if (delete_child && node_is_empty(shift.wish_stop.node)) ++ result = ++ prepare_removal_node40(shift.wish_stop.node, info); ++ else ++ result = 0; ++ /* there is nothing to shift */ ++ assert("nikita-2078", coord_check(from)); ++ return result; ++ } ++ ++ target_empty = node_is_empty(to); ++ ++ /* when first node plugin with item body compression is implemented, ++ this must be changed to call node specific plugin */ ++ ++ /* shift->stop_coord is updated to last unit which really will be ++ shifted */ ++ estimate_shift(&shift, get_current_context()); ++ if (!shift.shift_bytes) { ++ /* we could not shift anything */ ++ assert("nikita-2079", coord_check(from)); ++ return 0; ++ } ++ ++ copy(&shift); ++ ++ /* result value of this is important. It is used by adjust_coord below */ ++ result = delete_copied(&shift); ++ ++ assert("vs-1610", result >= 0); ++ assert("vs-1471", ++ ((reiser4_context *) current->journal_info)->magic == ++ context_magic); ++ ++ /* item which has been moved from one node to another might want to do ++ something on that event. This can be done by item's shift_hook ++ method, which will be now called for every moved items */ ++ call_shift_hooks(&shift); ++ ++ assert("vs-1472", ++ ((reiser4_context *) current->journal_info)->magic == ++ context_magic); ++ ++ update_taps(&shift); ++ ++ assert("vs-1473", ++ ((reiser4_context *) current->journal_info)->magic == ++ context_magic); ++ ++ /* adjust @from pointer in accordance with @including_stop_coord flag ++ and amount of data which was really shifted */ ++ adjust_coord(from, &shift, result, including_stop_coord); ++ ++ if (target_empty) ++ /* ++ * items were shifted into empty node. Update delimiting key. ++ */ ++ result = prepare_for_update(NULL, left, info); ++ ++ /* add update operation to @info, which is the list of operations to ++ be performed on a higher level */ ++ result = prepare_for_update(left, right, info); ++ if (!result && node_is_empty(source) && delete_child) { ++ /* all contents of @from->node is moved to @to and @from->node ++ has to be removed from the tree, so, on higher level we ++ will be removing the pointer to node @from->node */ ++ result = prepare_removal_node40(source, info); ++ } ++ assert("nikita-2080", coord_check(from)); ++ return result ? result : (int)shift.shift_bytes; ++} ++ ++/* plugin->u.node.fast_insert() ++ look for description of this method in plugin/node/node.h */ ++int fast_insert_node40(const coord_t * coord UNUSED_ARG /* node to query */ ) ++{ ++ return 1; ++} ++ ++/* plugin->u.node.fast_paste() ++ look for description of this method in plugin/node/node.h */ ++int fast_paste_node40(const coord_t * coord UNUSED_ARG /* node to query */ ) ++{ ++ return 1; ++} ++ ++/* plugin->u.node.fast_cut() ++ look for description of this method in plugin/node/node.h */ ++int fast_cut_node40(const coord_t * coord UNUSED_ARG /* node to query */ ) ++{ ++ return 1; ++} ++ ++/* plugin->u.node.modify - not defined */ ++ ++/* plugin->u.node.max_item_size */ ++int max_item_size_node40(void) ++{ ++ return reiser4_get_current_sb()->s_blocksize - sizeof(node40_header) - ++ sizeof(item_header40); ++} ++ ++/* plugin->u.node.set_item_plugin */ ++int set_item_plugin_node40(coord_t *coord, item_id id) ++{ ++ item_header40 *ih; ++ ++ ih = node40_ih_at_coord(coord); ++ put_unaligned(cpu_to_le16(id), &ih->plugin_id); ++ coord->iplugid = id; ++ return 0; ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/node/node40.h b/fs/reiser4/plugin/node/node40.h +new file mode 100644 +index 0000000..8ae375b +--- /dev/null ++++ b/fs/reiser4/plugin/node/node40.h +@@ -0,0 +1,125 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#if !defined( __REISER4_NODE40_H__ ) ++#define __REISER4_NODE40_H__ ++ ++#include "../../forward.h" ++#include "../../dformat.h" ++#include "node.h" ++ ++#include ++ ++/* format of node header for 40 node layouts. Keep bloat out of this struct. */ ++typedef struct node40_header { ++ /* identifier of node plugin. Must be located at the very beginning ++ of a node. */ ++ common_node_header common_header; /* this is 16 bits */ ++ /* number of items. Should be first element in the node header, ++ because we haven't yet finally decided whether it shouldn't go into ++ common_header. ++ */ ++/* NIKITA-FIXME-HANS: Create a macro such that if there is only one ++ * node format at compile time, and it is this one, accesses do not function dereference when ++ * accessing these fields (and otherwise they do). Probably 80% of users will only have one node format at a time throughout the life of reiser4. */ ++ d16 nr_items; ++ /* free space in node measured in bytes */ ++ d16 free_space; ++ /* offset to start of free space in node */ ++ d16 free_space_start; ++ /* for reiser4_fsck. When information about what is a free ++ block is corrupted, and we try to recover everything even ++ if marked as freed, then old versions of data may ++ duplicate newer versions, and this field allows us to ++ restore the newer version. Also useful for when users ++ who don't have the new trashcan installed on their linux distro ++ delete the wrong files and send us desperate emails ++ offering $25 for them back. */ ++ ++ /* magic field we need to tell formatted nodes NIKITA-FIXME-HANS: improve this comment */ ++ d32 magic; ++ /* flushstamp is made of mk_id and write_counter. mk_id is an ++ id generated randomly at mkreiserfs time. So we can just ++ skip all nodes with different mk_id. write_counter is d64 ++ incrementing counter of writes on disk. It is used for ++ choosing the newest data at fsck time. NIKITA-FIXME-HANS: why was field name changed but not comment? */ ++ ++ d32 mkfs_id; ++ d64 flush_id; ++ /* node flags to be used by fsck (reiser4ck or reiser4fsck?) ++ and repacker NIKITA-FIXME-HANS: say more or reference elsewhere that says more */ ++ d16 flags; ++ ++ /* 1 is leaf level, 2 is twig level, root is the numerically ++ largest level */ ++ d8 level; ++ ++ d8 pad; ++} PACKED node40_header; ++ ++/* item headers are not standard across all node layouts, pass ++ pos_in_node to functions instead */ ++typedef struct item_header40 { ++ /* key of item */ ++ /* 0 */ reiser4_key key; ++ /* offset from start of a node measured in 8-byte chunks */ ++ /* 24 */ d16 offset; ++ /* 26 */ d16 flags; ++ /* 28 */ d16 plugin_id; ++} PACKED item_header40; ++ ++size_t item_overhead_node40(const znode * node, flow_t * aflow); ++size_t free_space_node40(znode * node); ++node_search_result lookup_node40(znode * node, const reiser4_key * key, ++ lookup_bias bias, coord_t * coord); ++int num_of_items_node40(const znode * node); ++char *item_by_coord_node40(const coord_t * coord); ++int length_by_coord_node40(const coord_t * coord); ++item_plugin *plugin_by_coord_node40(const coord_t * coord); ++reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key); ++size_t estimate_node40(znode * node); ++int check_node40(const znode * node, __u32 flags, const char **error); ++int parse_node40(znode * node); ++int init_node40(znode * node); ++#ifdef GUESS_EXISTS ++int guess_node40(const znode * node); ++#endif ++void change_item_size_node40(coord_t * coord, int by); ++int create_item_node40(coord_t * target, const reiser4_key * key, ++ reiser4_item_data * data, carry_plugin_info * info); ++void update_item_key_node40(coord_t * target, const reiser4_key * key, ++ carry_plugin_info * info); ++int kill_node40(struct carry_kill_data *, carry_plugin_info *); ++int cut_node40(struct carry_cut_data *, carry_plugin_info *); ++int shift_node40(coord_t * from, znode * to, shift_direction pend, ++ /* if @from->node becomes ++ empty - it will be deleted from ++ the tree if this is set to 1 ++ */ ++ int delete_child, int including_stop_coord, ++ carry_plugin_info * info); ++ ++int fast_insert_node40(const coord_t * coord); ++int fast_paste_node40(const coord_t * coord); ++int fast_cut_node40(const coord_t * coord); ++int max_item_size_node40(void); ++int prepare_removal_node40(znode * empty, carry_plugin_info * info); ++int set_item_plugin_node40(coord_t * coord, item_id id); ++int shrink_item_node40(coord_t * coord, int delta); ++ ++#if REISER4_DEBUG ++void *shift_check_prepare(const znode *left, const znode *right); ++void shift_check(void *vp, const znode *left, const znode *right); ++#endif ++ ++/* __REISER4_NODE40_H__ */ ++#endif ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/object.c b/fs/reiser4/plugin/object.c +new file mode 100644 +index 0000000..ae999e3 +--- /dev/null ++++ b/fs/reiser4/plugin/object.c +@@ -0,0 +1,516 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* ++ * Examples of object plugins: file, directory, symlink, special file. ++ * ++ * Plugins associated with inode: ++ * ++ * Plugin of inode is plugin referenced by plugin-id field of on-disk ++ * stat-data. How we store this plugin in in-core inode is not ++ * important. Currently pointers are used, another variant is to store offsets ++ * and do array lookup on each access. ++ * ++ * Now, each inode has one selected plugin: object plugin that ++ * determines what type of file this object is: directory, regular etc. ++ * ++ * This main plugin can use other plugins that are thus subordinated to ++ * it. Directory instance of object plugin uses hash; regular file ++ * instance uses tail policy plugin. ++ * ++ * Object plugin is either taken from id in stat-data or guessed from ++ * i_mode bits. Once it is established we ask it to install its ++ * subordinate plugins, by looking again in stat-data or inheriting them ++ * from parent. ++ * ++ * How new inode is initialized during ->read_inode(): ++ * 1 read stat-data and initialize inode fields: i_size, i_mode, ++ * i_generation, capabilities etc. ++ * 2 read plugin id from stat data or try to guess plugin id ++ * from inode->i_mode bits if plugin id is missing. ++ * 3 Call ->init_inode() method of stat-data plugin to initialise inode fields. ++ * ++ * NIKITA-FIXME-HANS: can you say a little about 1 being done before 3? What ++ * if stat data does contain i_size, etc., due to it being an unusual plugin? ++ * ++ * 4 Call ->activate() method of object's plugin. Plugin is either read from ++ * from stat-data or guessed from mode bits ++ * 5 Call ->inherit() method of object plugin to inherit as yet un initialized ++ * plugins from parent. ++ * ++ * Easy induction proves that on last step all plugins of inode would be ++ * initialized. ++ * ++ * When creating new object: ++ * 1 obtain object plugin id (see next period) ++ * NIKITA-FIXME-HANS: period? ++ * 2 ->install() this plugin ++ * 3 ->inherit() the rest from the parent ++ * ++ * We need some examples of creating an object with default and non-default ++ * plugin ids. Nikita, please create them. ++ */ ++ ++#include "../inode.h" ++ ++static int _bugop(void) ++{ ++ BUG_ON(1); ++ return 0; ++} ++ ++#define bugop ((void *)_bugop) ++ ++static int _dummyop(void) ++{ ++ return 0; ++} ++ ++#define dummyop ((void *)_dummyop) ++ ++static int change_file(struct inode *inode, ++ reiser4_plugin * plugin, ++ pset_member memb) ++{ ++ /* cannot change object plugin of already existing object */ ++ if (memb == PSET_FILE) ++ return RETERR(-EINVAL); ++ ++ /* Change PSET_CREATE */ ++ return aset_set_unsafe(&reiser4_inode_data(inode)->pset, memb, plugin); ++} ++ ++static reiser4_plugin_ops file_plugin_ops = { ++ .change = change_file ++}; ++ ++/* ++ * Definitions of object plugins. ++ */ ++ ++file_plugin file_plugins[LAST_FILE_PLUGIN_ID] = { ++ [UNIX_FILE_PLUGIN_ID] = { ++ .h = { ++ .type_id = REISER4_FILE_PLUGIN_TYPE, ++ .id = UNIX_FILE_PLUGIN_ID, ++ .groups = (1 << REISER4_REGULAR_FILE), ++ .pops = &file_plugin_ops, ++ .label = "reg", ++ .desc = "regular file", ++ .linkage = {NULL, NULL}, ++ }, ++ .inode_ops = { ++ .permission = reiser4_permission_common, ++ .setattr = setattr_unix_file, ++ .getattr = reiser4_getattr_common ++ }, ++ .file_ops = { ++ .llseek = generic_file_llseek, ++ .read = read_unix_file, ++ .write = write_unix_file, ++ .aio_read = generic_file_aio_read, ++ .ioctl = ioctl_unix_file, ++ .mmap = mmap_unix_file, ++ .open = open_unix_file, ++ .release = release_unix_file, ++ .fsync = sync_unix_file, ++ .sendfile = sendfile_unix_file ++ }, ++ .as_ops = { ++ .writepage = reiser4_writepage, ++ .readpage = readpage_unix_file, ++ .sync_page = block_sync_page, ++ .writepages = writepages_unix_file, ++ .set_page_dirty = reiser4_set_page_dirty, ++ .readpages = readpages_unix_file, ++ .prepare_write = prepare_write_unix_file, ++ .commit_write = commit_write_unix_file, ++ .bmap = bmap_unix_file, ++ .invalidatepage = reiser4_invalidatepage, ++ .releasepage = reiser4_releasepage ++ }, ++ .write_sd_by_inode = write_sd_by_inode_common, ++ .flow_by_inode = flow_by_inode_unix_file, ++ .key_by_inode = key_by_inode_and_offset_common, ++ .set_plug_in_inode = set_plug_in_inode_common, ++ .adjust_to_parent = adjust_to_parent_common, ++ .create_object = reiser4_create_object_common, ++ .delete_object = delete_object_unix_file, ++ .add_link = reiser4_add_link_common, ++ .rem_link = reiser4_rem_link_common, ++ .owns_item = owns_item_unix_file, ++ .can_add_link = can_add_link_common, ++ .detach = dummyop, ++ .bind = dummyop, ++ .safelink = safelink_common, ++ .estimate = { ++ .create = estimate_create_common, ++ .update = estimate_update_common, ++ .unlink = estimate_unlink_common ++ }, ++ .init_inode_data = init_inode_data_unix_file, ++ .cut_tree_worker = cut_tree_worker_common, ++ .wire = { ++ .write = wire_write_common, ++ .read = wire_read_common, ++ .get = wire_get_common, ++ .size = wire_size_common, ++ .done = wire_done_common ++ } ++ }, ++ [DIRECTORY_FILE_PLUGIN_ID] = { ++ .h = { ++ .type_id = REISER4_FILE_PLUGIN_TYPE, ++ .id = DIRECTORY_FILE_PLUGIN_ID, ++ .groups = (1 << REISER4_DIRECTORY_FILE), ++ .pops = &file_plugin_ops, ++ .label = "dir", ++ .desc = "directory", ++ .linkage = {NULL, NULL} ++ }, ++ .inode_ops = {.create = NULL}, ++ .file_ops = {.owner = NULL}, ++ .as_ops = {.writepage = NULL}, ++ ++ .write_sd_by_inode = write_sd_by_inode_common, ++ .flow_by_inode = bugop, ++ .key_by_inode = bugop, ++ .set_plug_in_inode = set_plug_in_inode_common, ++ .adjust_to_parent = adjust_to_parent_common_dir, ++ .create_object = reiser4_create_object_common, ++ .delete_object = reiser4_delete_dir_common, ++ .add_link = reiser4_add_link_common, ++ .rem_link = rem_link_common_dir, ++ .owns_item = owns_item_common_dir, ++ .can_add_link = can_add_link_common, ++ .can_rem_link = can_rem_link_common_dir, ++ .detach = reiser4_detach_common_dir, ++ .bind = reiser4_bind_common_dir, ++ .safelink = safelink_common, ++ .estimate = { ++ .create = estimate_create_common_dir, ++ .update = estimate_update_common, ++ .unlink = estimate_unlink_common_dir ++ }, ++ .wire = { ++ .write = wire_write_common, ++ .read = wire_read_common, ++ .get = wire_get_common, ++ .size = wire_size_common, ++ .done = wire_done_common ++ }, ++ .init_inode_data = init_inode_ordering, ++ .cut_tree_worker = cut_tree_worker_common, ++ }, ++ [SYMLINK_FILE_PLUGIN_ID] = { ++ .h = { ++ .type_id = REISER4_FILE_PLUGIN_TYPE, ++ .id = SYMLINK_FILE_PLUGIN_ID, ++ .groups = (1 << REISER4_SYMLINK_FILE), ++ .pops = &file_plugin_ops, ++ .label = "symlink", ++ .desc = "symbolic link", ++ .linkage = {NULL,NULL} ++ }, ++ .inode_ops = { ++ .readlink = generic_readlink, ++ .follow_link = reiser4_follow_link_common, ++ .permission = reiser4_permission_common, ++ .setattr = reiser4_setattr_common, ++ .getattr = reiser4_getattr_common ++ }, ++ /* inode->i_fop of symlink is initialized by NULL in setup_inode_ops */ ++ .file_ops = {.owner = NULL}, ++ .as_ops = {.writepage = NULL}, ++ ++ .write_sd_by_inode = write_sd_by_inode_common, ++ .set_plug_in_inode = set_plug_in_inode_common, ++ .adjust_to_parent = adjust_to_parent_common, ++ .create_object = reiser4_create_symlink, ++ .delete_object = reiser4_delete_object_common, ++ .add_link = reiser4_add_link_common, ++ .rem_link = reiser4_rem_link_common, ++ .can_add_link = can_add_link_common, ++ .detach = dummyop, ++ .bind = dummyop, ++ .safelink = safelink_common, ++ .estimate = { ++ .create = estimate_create_common, ++ .update = estimate_update_common, ++ .unlink = estimate_unlink_common ++ }, ++ .init_inode_data = init_inode_ordering, ++ .cut_tree_worker = cut_tree_worker_common, ++ .destroy_inode = destroy_inode_symlink, ++ .wire = { ++ .write = wire_write_common, ++ .read = wire_read_common, ++ .get = wire_get_common, ++ .size = wire_size_common, ++ .done = wire_done_common ++ } ++ }, ++ [SPECIAL_FILE_PLUGIN_ID] = { ++ .h = { ++ .type_id = REISER4_FILE_PLUGIN_TYPE, ++ .id = SPECIAL_FILE_PLUGIN_ID, ++ .groups = (1 << REISER4_SPECIAL_FILE), ++ .pops = &file_plugin_ops, ++ .label = "special", ++ .desc = ++ "special: fifo, device or socket", ++ .linkage = {NULL, NULL} ++ }, ++ .inode_ops = { ++ .permission = reiser4_permission_common, ++ .setattr = reiser4_setattr_common, ++ .getattr = reiser4_getattr_common ++ }, ++ /* file_ops of special files (sockets, block, char, fifo) are ++ initialized by init_special_inode. */ ++ .file_ops = {.owner = NULL}, ++ .as_ops = {.writepage = NULL}, ++ ++ .write_sd_by_inode = write_sd_by_inode_common, ++ .set_plug_in_inode = set_plug_in_inode_common, ++ .adjust_to_parent = adjust_to_parent_common, ++ .create_object = reiser4_create_object_common, ++ .delete_object = reiser4_delete_object_common, ++ .add_link = reiser4_add_link_common, ++ .rem_link = reiser4_rem_link_common, ++ .owns_item = owns_item_common, ++ .can_add_link = can_add_link_common, ++ .detach = dummyop, ++ .bind = dummyop, ++ .safelink = safelink_common, ++ .estimate = { ++ .create = estimate_create_common, ++ .update = estimate_update_common, ++ .unlink = estimate_unlink_common ++ }, ++ .init_inode_data = init_inode_ordering, ++ .cut_tree_worker = cut_tree_worker_common, ++ .wire = { ++ .write = wire_write_common, ++ .read = wire_read_common, ++ .get = wire_get_common, ++ .size = wire_size_common, ++ .done = wire_done_common ++ } ++ }, ++ [CRYPTCOMPRESS_FILE_PLUGIN_ID] = { ++ .h = { ++ .type_id = REISER4_FILE_PLUGIN_TYPE, ++ .id = CRYPTCOMPRESS_FILE_PLUGIN_ID, ++ .groups = (1 << REISER4_REGULAR_FILE), ++ .pops = &file_plugin_ops, ++ .label = "cryptcompress", ++ .desc = "cryptcompress file", ++ .linkage = {NULL, NULL} ++ }, ++ .inode_ops = { ++ .permission = reiser4_permission_common, ++ .setattr = prot_setattr_cryptcompress, ++ .getattr = reiser4_getattr_common ++ }, ++ .file_ops = { ++ .llseek = generic_file_llseek, ++ .read = prot_read_cryptcompress, ++ .write = prot_write_cryptcompress, ++ .aio_read = generic_file_aio_read, ++ .mmap = prot_mmap_cryptcompress, ++ .release = prot_release_cryptcompress, ++ .fsync = reiser4_sync_common, ++ .sendfile = prot_sendfile_cryptcompress ++ }, ++ .as_ops = { ++ .writepage = reiser4_writepage, ++ .readpage = readpage_cryptcompress, ++ .sync_page = block_sync_page, ++ .writepages = writepages_cryptcompress, ++ .set_page_dirty = reiser4_set_page_dirty, ++ .readpages = readpages_cryptcompress, ++ .prepare_write = prepare_write_common, ++ .invalidatepage = reiser4_invalidatepage, ++ .releasepage = reiser4_releasepage ++ }, ++ .write_sd_by_inode = write_sd_by_inode_common, ++ .flow_by_inode = flow_by_inode_cryptcompress, ++ .key_by_inode = key_by_inode_cryptcompress, ++ .set_plug_in_inode = set_plug_in_inode_common, ++ .adjust_to_parent = adjust_to_parent_cryptcompress, ++ .create_object = create_cryptcompress, ++ .open_object = open_object_cryptcompress, ++ .delete_object = delete_object_cryptcompress, ++ .add_link = reiser4_add_link_common, ++ .rem_link = reiser4_rem_link_common, ++ .owns_item = owns_item_common, ++ .can_add_link = can_add_link_common, ++ .detach = dummyop, ++ .bind = dummyop, ++ .safelink = safelink_common, ++ .estimate = { ++ .create = estimate_create_common, ++ .update = estimate_update_common, ++ .unlink = estimate_unlink_common ++ }, ++ .init_inode_data = init_inode_data_cryptcompress, ++ .cut_tree_worker = cut_tree_worker_cryptcompress, ++ .destroy_inode = destroy_inode_cryptcompress, ++ .wire = { ++ .write = wire_write_common, ++ .read = wire_read_common, ++ .get = wire_get_common, ++ .size = wire_size_common, ++ .done = wire_done_common ++ } ++ } ++}; ++ ++static int change_dir(struct inode *inode, ++ reiser4_plugin * plugin, ++ pset_member memb) ++{ ++ /* cannot change dir plugin of already existing object */ ++ return RETERR(-EINVAL); ++} ++ ++static reiser4_plugin_ops dir_plugin_ops = { ++ .change = change_dir ++}; ++ ++/* ++ * definition of directory plugins ++ */ ++ ++dir_plugin dir_plugins[LAST_DIR_ID] = { ++ /* standard hashed directory plugin */ ++ [HASHED_DIR_PLUGIN_ID] = { ++ .h = { ++ .type_id = REISER4_DIR_PLUGIN_TYPE, ++ .id = HASHED_DIR_PLUGIN_ID, ++ .pops = &dir_plugin_ops, ++ .label = "dir", ++ .desc = "hashed directory", ++ .linkage = {NULL, NULL} ++ }, ++ .inode_ops = { ++ .create = reiser4_create_common, ++ .lookup = reiser4_lookup_common, ++ .link = reiser4_link_common, ++ .unlink = reiser4_unlink_common, ++ .symlink = reiser4_symlink_common, ++ .mkdir = reiser4_mkdir_common, ++ .rmdir = reiser4_unlink_common, ++ .mknod = reiser4_mknod_common, ++ .rename = reiser4_rename_common, ++ .permission = reiser4_permission_common, ++ .setattr = reiser4_setattr_common, ++ .getattr = reiser4_getattr_common ++ }, ++ .file_ops = { ++ .llseek = reiser4_llseek_dir_common, ++ .read = generic_read_dir, ++ .readdir = reiser4_readdir_common, ++ .release = reiser4_release_dir_common, ++ .fsync = reiser4_sync_common ++ }, ++ .as_ops = { ++ .writepage = bugop, ++ .sync_page = bugop, ++ .writepages = dummyop, ++ .set_page_dirty = bugop, ++ .readpages = bugop, ++ .prepare_write = bugop, ++ .commit_write = bugop, ++ .bmap = bugop, ++ .invalidatepage = bugop, ++ .releasepage = bugop ++ }, ++ .get_parent = get_parent_common, ++ .is_name_acceptable = is_name_acceptable_common, ++ .build_entry_key = build_entry_key_hashed, ++ .build_readdir_key = build_readdir_key_common, ++ .add_entry = reiser4_add_entry_common, ++ .rem_entry = reiser4_rem_entry_common, ++ .init = reiser4_dir_init_common, ++ .done = reiser4_dir_done_common, ++ .attach = reiser4_attach_common, ++ .detach = reiser4_detach_common, ++ .estimate = { ++ .add_entry = estimate_add_entry_common, ++ .rem_entry = estimate_rem_entry_common, ++ .unlink = dir_estimate_unlink_common ++ } ++ }, ++ /* hashed directory for which seekdir/telldir are guaranteed to ++ * work. Brain-damage. */ ++ [SEEKABLE_HASHED_DIR_PLUGIN_ID] = { ++ .h = { ++ .type_id = REISER4_DIR_PLUGIN_TYPE, ++ .id = SEEKABLE_HASHED_DIR_PLUGIN_ID, ++ .pops = &dir_plugin_ops, ++ .label = "dir32", ++ .desc = "directory hashed with 31 bit hash", ++ .linkage = {NULL, NULL} ++ }, ++ .inode_ops = { ++ .create = reiser4_create_common, ++ .lookup = reiser4_lookup_common, ++ .link = reiser4_link_common, ++ .unlink = reiser4_unlink_common, ++ .symlink = reiser4_symlink_common, ++ .mkdir = reiser4_mkdir_common, ++ .rmdir = reiser4_unlink_common, ++ .mknod = reiser4_mknod_common, ++ .rename = reiser4_rename_common, ++ .permission = reiser4_permission_common, ++ .setattr = reiser4_setattr_common, ++ .getattr = reiser4_getattr_common ++ }, ++ .file_ops = { ++ .llseek = reiser4_llseek_dir_common, ++ .read = generic_read_dir, ++ .readdir = reiser4_readdir_common, ++ .release = reiser4_release_dir_common, ++ .fsync = reiser4_sync_common ++ }, ++ .as_ops = { ++ .writepage = bugop, ++ .sync_page = bugop, ++ .writepages = dummyop, ++ .set_page_dirty = bugop, ++ .readpages = bugop, ++ .prepare_write = bugop, ++ .commit_write = bugop, ++ .bmap = bugop, ++ .invalidatepage = bugop, ++ .releasepage = bugop ++ }, ++ .get_parent = get_parent_common, ++ .is_name_acceptable = is_name_acceptable_common, ++ .build_entry_key = build_entry_key_seekable, ++ .build_readdir_key = build_readdir_key_common, ++ .add_entry = reiser4_add_entry_common, ++ .rem_entry = reiser4_rem_entry_common, ++ .init = reiser4_dir_init_common, ++ .done = reiser4_dir_done_common, ++ .attach = reiser4_attach_common, ++ .detach = reiser4_detach_common, ++ .estimate = { ++ .add_entry = estimate_add_entry_common, ++ .rem_entry = estimate_rem_entry_common, ++ .unlink = dir_estimate_unlink_common ++ } ++ } ++}; ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/object.h b/fs/reiser4/plugin/object.h +new file mode 100644 +index 0000000..440c369 +--- /dev/null ++++ b/fs/reiser4/plugin/object.h +@@ -0,0 +1,121 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Declaration of object plugin functions. */ ++ ++#if !defined( __FS_REISER4_PLUGIN_OBJECT_H__ ) ++#define __FS_REISER4_PLUGIN_OBJECT_H__ ++ ++#include "../type_safe_hash.h" ++ ++/* common implementations of inode operations */ ++int reiser4_create_common(struct inode *parent, struct dentry *dentry, ++ int mode, struct nameidata *); ++struct dentry * reiser4_lookup_common(struct inode *parent, ++ struct dentry *dentry, ++ struct nameidata *nameidata); ++int reiser4_link_common(struct dentry *existing, struct inode *parent, ++ struct dentry *newname); ++int reiser4_unlink_common(struct inode *parent, struct dentry *victim); ++int reiser4_mkdir_common(struct inode *parent, struct dentry *dentry, int mode); ++int reiser4_symlink_common(struct inode *parent, struct dentry *dentry, ++ const char *linkname); ++int reiser4_mknod_common(struct inode *parent, struct dentry *dentry, ++ int mode, dev_t rdev); ++int reiser4_rename_common(struct inode *old_dir, struct dentry *old_name, ++ struct inode *new_dir, struct dentry *new_name); ++void *reiser4_follow_link_common(struct dentry *, struct nameidata *data); ++int reiser4_permission_common(struct inode *, int mask, ++ struct nameidata *nameidata); ++int reiser4_setattr_common(struct dentry *, struct iattr *); ++int reiser4_getattr_common(struct vfsmount *mnt, struct dentry *, ++ struct kstat *); ++ ++/* common implementations of file operations */ ++loff_t reiser4_llseek_dir_common(struct file *, loff_t off, int origin); ++int reiser4_readdir_common(struct file *, void *dirent, filldir_t); ++int reiser4_release_dir_common(struct inode *, struct file *); ++int reiser4_sync_common(struct file *, struct dentry *, int datasync); ++ ++/* common implementations of address space operations */ ++int prepare_write_common(struct file *, struct page *, unsigned from, ++ unsigned to); ++ ++/* file plugin operations: common implementations */ ++int write_sd_by_inode_common(struct inode *); ++int key_by_inode_and_offset_common(struct inode *, loff_t, reiser4_key *); ++int set_plug_in_inode_common(struct inode *object, struct inode *parent, ++ reiser4_object_create_data *); ++int adjust_to_parent_common(struct inode *object, struct inode *parent, ++ struct inode *root); ++int adjust_to_parent_common_dir(struct inode *object, struct inode *parent, ++ struct inode *root); ++int adjust_to_parent_cryptcompress(struct inode *object, struct inode *parent, ++ struct inode *root); ++int reiser4_create_object_common(struct inode *object, struct inode *parent, ++ reiser4_object_create_data *); ++int reiser4_delete_object_common(struct inode *); ++int reiser4_delete_dir_common(struct inode *); ++int reiser4_add_link_common(struct inode *object, struct inode *parent); ++int reiser4_rem_link_common(struct inode *object, struct inode *parent); ++int rem_link_common_dir(struct inode *object, struct inode *parent); ++int owns_item_common(const struct inode *, const coord_t *); ++int owns_item_common_dir(const struct inode *, const coord_t *); ++int can_add_link_common(const struct inode *); ++int can_rem_link_common_dir(const struct inode *); ++int reiser4_detach_common_dir(struct inode *child, struct inode *parent); ++int reiser4_bind_common_dir(struct inode *child, struct inode *parent); ++int safelink_common(struct inode *, reiser4_safe_link_t, __u64 value); ++reiser4_block_nr estimate_create_common(const struct inode *); ++reiser4_block_nr estimate_create_common_dir(const struct inode *); ++reiser4_block_nr estimate_update_common(const struct inode *); ++reiser4_block_nr estimate_unlink_common(const struct inode *, ++ const struct inode *); ++reiser4_block_nr estimate_unlink_common_dir(const struct inode *, ++ const struct inode *); ++char *wire_write_common(struct inode *, char *start); ++char *wire_read_common(char *addr, reiser4_object_on_wire *); ++struct dentry *wire_get_common(struct super_block *, reiser4_object_on_wire *); ++int wire_size_common(struct inode *); ++void wire_done_common(reiser4_object_on_wire *); ++ ++/* dir plugin operations: common implementations */ ++struct dentry *get_parent_common(struct inode *child); ++int is_name_acceptable_common(const struct inode *, const char *name, int len); ++void build_entry_key_common(const struct inode *, ++ const struct qstr *qname, reiser4_key *); ++int build_readdir_key_common(struct file *dir, reiser4_key *); ++int reiser4_add_entry_common(struct inode *object, struct dentry *where, ++ reiser4_object_create_data *, reiser4_dir_entry_desc *); ++int reiser4_rem_entry_common(struct inode *object, struct dentry *where, ++ reiser4_dir_entry_desc *); ++int reiser4_dir_init_common(struct inode *object, struct inode *parent, ++ reiser4_object_create_data *); ++int reiser4_dir_done_common(struct inode *); ++int reiser4_attach_common(struct inode *child, struct inode *parent); ++int reiser4_detach_common(struct inode *object, struct inode *parent); ++reiser4_block_nr estimate_add_entry_common(const struct inode *); ++reiser4_block_nr estimate_rem_entry_common(const struct inode *); ++reiser4_block_nr dir_estimate_unlink_common(const struct inode *, ++ const struct inode *); ++ ++/* these are essential parts of common implementations, they are to make ++ customized implementations easier */ ++int do_prepare_write(struct file *, struct page *, unsigned from, unsigned to); ++ ++/* merely useful functions */ ++int lookup_sd(struct inode *, znode_lock_mode, coord_t *, lock_handle *, ++ const reiser4_key *, int silent); ++ ++/* __FS_REISER4_PLUGIN_OBJECT_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/plugin.c b/fs/reiser4/plugin/plugin.c +new file mode 100644 +index 0000000..8261878 +--- /dev/null ++++ b/fs/reiser4/plugin/plugin.c +@@ -0,0 +1,578 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Basic plugin infrastructure, lookup etc. */ ++ ++/* PLUGINS: ++ ++ Plugins are internal Reiser4 "modules" or "objects" used to increase ++ extensibility and allow external users to easily adapt reiser4 to ++ their needs. ++ ++ Plugins are classified into several disjoint "types". Plugins ++ belonging to the particular plugin type are termed "instances" of ++ this type. Currently the following types are present: ++ ++ . object plugin ++ . hash plugin ++ . tail plugin ++ . perm plugin ++ . item plugin ++ . node layout plugin ++ ++NIKITA-FIXME-HANS: update this list, and review this entire comment for currency ++ ++ Object (file) plugin determines how given file-system object serves ++ standard VFS requests for read, write, seek, mmap etc. Instances of ++ file plugins are: regular file, directory, symlink. Another example ++ of file plugin is audit plugin, that optionally records accesses to ++ underlying object and forwards requests to it. ++ ++ Hash plugins compute hashes used by reiser4 to store and locate ++ files within directories. Instances of hash plugin type are: r5, ++ tea, rupasov. ++ ++ Tail plugins (or, more precisely, tail policy plugins) determine ++ when last part of the file should be stored in a formatted item. ++ ++ Perm plugins control permissions granted for a process accessing a file. ++ ++ Scope and lookup: ++ ++ label such that pair ( type_label, plugin_label ) is unique. This ++ pair is a globally persistent and user-visible plugin ++ identifier. Internally kernel maintains plugins and plugin types in ++ arrays using an index into those arrays as plugin and plugin type ++ identifiers. File-system in turn, also maintains persistent ++ "dictionary" which is mapping from plugin label to numerical ++ identifier which is stored in file-system objects. That is, we ++ store the offset into the plugin array for that plugin type as the ++ plugin id in the stat data of the filesystem object. ++ ++ plugin_labels have meaning for the user interface that assigns ++ plugins to files, and may someday have meaning for dynamic loading of ++ plugins and for copying of plugins from one fs instance to ++ another by utilities like cp and tar. ++ ++ Internal kernel plugin type identifier (index in plugins[] array) is ++ of type reiser4_plugin_type. Set of available plugin types is ++ currently static, but dynamic loading doesn't seem to pose ++ insurmountable problems. ++ ++ Within each type plugins are addressed by the identifiers of type ++ reiser4_plugin_id (indices in ++ reiser4_plugin_type_data.builtin[]). Such identifiers are only ++ required to be unique within one type, not globally. ++ ++ Thus, plugin in memory is uniquely identified by the pair (type_id, ++ id). ++ ++ Usage: ++ ++ There exists only one instance of each plugin instance, but this ++ single instance can be associated with many entities (file-system ++ objects, items, nodes, transactions, file-descriptors etc.). Entity ++ to which plugin of given type is termed (due to the lack of ++ imagination) "subject" of this plugin type and, by abuse of ++ terminology, subject of particular instance of this type to which ++ it's attached currently. For example, inode is subject of object ++ plugin type. Inode representing directory is subject of directory ++ plugin, hash plugin type and some particular instance of hash plugin ++ type. Inode, representing regular file is subject of "regular file" ++ plugin, tail-policy plugin type etc. ++ ++ With each subject the plugin possibly stores some state. For example, ++ the state of a directory plugin (instance of object plugin type) is pointer ++ to hash plugin (if directories always use hashing that is). State of ++ audit plugin is file descriptor (struct file) of log file or some ++ magic value to do logging through printk(). ++ ++ Interface: ++ ++ In addition to a scalar identifier, each plugin type and plugin ++ proper has a "label": short string and a "description"---longer ++ descriptive string. Labels and descriptions of plugin types are ++ hard-coded into plugins[] array, declared and defined in ++ plugin.c. Label and description of plugin are stored in .label and ++ .desc fields of reiser4_plugin_header respectively. It's possible to ++ locate plugin by the pair of labels. ++ ++ Features: ++ ++ . user-level plugin manipulations: ++ + reiser4("filename/..file_plugin<='audit'"); ++ + write(open("filename/..file_plugin"), "audit", 8); ++ ++ . user level utilities lsplug and chplug to manipulate plugins. ++ Utilities are not of primary priority. Possibly they will be not ++ working on v4.0 ++ ++NIKITA-FIXME-HANS: this should be a mkreiserfs option not a mount option, do you agree? I don't think that specifying it at mount time, and then changing it with each mount, is a good model for usage. ++ ++ . mount option "plug" to set-up plugins of root-directory. ++ "plug=foo:bar" will set "bar" as default plugin of type "foo". ++ ++ Limitations: ++ ++ . each plugin type has to provide at least one builtin ++ plugin. This is technical limitation and it can be lifted in the ++ future. ++ ++ TODO: ++ ++ New plugin types/plugings: ++ Things we should be able to separately choose to inherit: ++ ++ security plugins ++ ++ stat data ++ ++ file bodies ++ ++ file plugins ++ ++ dir plugins ++ ++ . perm:acl ++ ++ d audi---audit plugin intercepting and possibly logging all ++ accesses to object. Requires to put stub functions in file_operations ++ in stead of generic_file_*. ++ ++NIKITA-FIXME-HANS: why make overflows a plugin? ++ . over---handle hash overflows ++ ++ . sqnt---handle different access patterns and instruments read-ahead ++ ++NIKITA-FIXME-HANS: describe the line below in more detail. ++ ++ . hier---handle inheritance of plugins along file-system hierarchy ++ ++ Different kinds of inheritance: on creation vs. on access. ++ Compatible/incompatible plugins. ++ Inheritance for multi-linked files. ++ Layered plugins. ++ Notion of plugin context is abandoned. ++ ++Each file is associated ++ with one plugin and dependant plugins (hash, etc.) are stored as ++ main plugin state. Now, if we have plugins used for regular files ++ but not for directories, how such plugins would be inherited? ++ . always store them with directories also ++ ++NIKTIA-FIXME-HANS: Do the line above. It is not exclusive of doing the line below which is also useful. ++ ++ . use inheritance hierarchy, independent of file-system namespace ++ ++*/ ++ ++#include "../debug.h" ++#include "../dformat.h" ++#include "plugin_header.h" ++#include "item/static_stat.h" ++#include "node/node.h" ++#include "security/perm.h" ++#include "space/space_allocator.h" ++#include "disk_format/disk_format.h" ++#include "plugin.h" ++#include "../reiser4.h" ++#include "../jnode.h" ++#include "../inode.h" ++ ++#include /* for struct super_block */ ++ ++/* public interface */ ++ ++/* initialise plugin sub-system. Just call this once on reiser4 startup. */ ++int init_plugins(void); ++int setup_plugins(struct super_block *super, reiser4_plugin ** area); ++int locate_plugin(struct inode *inode, plugin_locator * loc); ++ ++/** ++ * init_plugins - initialize plugins ++ * ++ * Initializes plugin sub-system. It is part of reiser4 module ++ * initialization. For each plugin of each type init method is called and each ++ * plugin is put into list of plugins. ++ */ ++int init_plugins(void) ++{ ++ reiser4_plugin_type type_id; ++ ++ for (type_id = 0; type_id < REISER4_PLUGIN_TYPES; ++type_id) { ++ reiser4_plugin_type_data *ptype; ++ int i; ++ ++ ptype = &plugins[type_id]; ++ assert("nikita-3508", ptype->label != NULL); ++ assert("nikita-3509", ptype->type_id == type_id); ++ ++ INIT_LIST_HEAD(&ptype->plugins_list); ++/* NIKITA-FIXME-HANS: change builtin_num to some other name lacking the term builtin. */ ++ for (i = 0; i < ptype->builtin_num; ++i) { ++ reiser4_plugin *plugin; ++ ++ plugin = plugin_at(ptype, i); ++ ++ if (plugin->h.label == NULL) ++ /* uninitialized slot encountered */ ++ continue; ++ assert("nikita-3445", plugin->h.type_id == type_id); ++ plugin->h.id = i; ++ if (plugin->h.pops != NULL && ++ plugin->h.pops->init != NULL) { ++ int result; ++ ++ result = plugin->h.pops->init(plugin); ++ if (result != 0) ++ return result; ++ } ++ INIT_LIST_HEAD(&plugin->h.linkage); ++ list_add_tail(&plugin->h.linkage, &ptype->plugins_list); ++ } ++ } ++ return 0; ++} ++ ++/* true if plugin type id is valid */ ++int is_plugin_type_valid(reiser4_plugin_type type) ++{ ++ /* "type" is unsigned, so no comparison with 0 is ++ necessary */ ++ return (type < REISER4_PLUGIN_TYPES); ++} ++ ++/* true if plugin id is valid */ ++int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id) ++{ ++ assert("nikita-1653", is_plugin_type_valid(type)); ++ return id < plugins[type].builtin_num; ++} ++ ++/* return plugin by its @type and @id. ++ ++ Both arguments are checked for validness: this is supposed to be called ++ from user-level. ++ ++NIKITA-FIXME-HANS: Do you instead mean that this checks ids created in ++user space, and passed to the filesystem by use of method files? Your ++comment really confused me on the first reading.... ++ ++*/ ++reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type /* plugin type ++ * unchecked */, ++ reiser4_plugin_id id /* plugin id, ++ * unchecked */) ++{ ++ if (is_plugin_type_valid(type)) { ++ if (is_plugin_id_valid(type, id)) ++ return plugin_at(&plugins[type], id); ++ else ++ /* id out of bounds */ ++ warning("nikita-2913", ++ "Invalid plugin id: [%i:%i]", type, id); ++ } else ++ /* type_id out of bounds */ ++ warning("nikita-2914", "Invalid type_id: %i", type); ++ return NULL; ++} ++ ++/** ++ * save_plugin_id - store plugin id in disk format ++ * @plugin: plugin to convert ++ * @area: where to store result ++ * ++ * Puts id of @plugin in little endian format to address @area. ++ */ ++int save_plugin_id(reiser4_plugin *plugin /* plugin to convert */ , ++ d16 *area /* where to store result */ ) ++{ ++ assert("nikita-1261", plugin != NULL); ++ assert("nikita-1262", area != NULL); ++ ++ put_unaligned(cpu_to_le16(plugin->h.id), area); ++ return 0; ++} ++ ++/* list of all plugins of given type */ ++struct list_head *get_plugin_list(reiser4_plugin_type type) ++{ ++ assert("nikita-1056", is_plugin_type_valid(type)); ++ return &plugins[type].plugins_list; ++} ++ ++static void update_pset_mask(reiser4_inode * info, pset_member memb) ++{ ++ struct dentry *rootdir; ++ reiser4_inode *root; ++ ++ assert("edward-1443", memb != PSET_FILE); ++ ++ rootdir = inode_by_reiser4_inode(info)->i_sb->s_root; ++ if (rootdir != NULL) { ++ root = reiser4_inode_data(rootdir->d_inode); ++ /* ++ * if inode is different from the default one, or we are ++ * changing plugin of root directory, update plugin_mask ++ */ ++ if (aset_get(info->pset, memb) != ++ aset_get(root->pset, memb) || ++ info == root) ++ info->plugin_mask |= (1 << memb); ++ else ++ info->plugin_mask &= ~(1 << memb); ++ } ++} ++ ++/* Get specified plugin set member from parent, ++ or from fs-defaults (if no parent is given) and ++ install the result to pset of @self */ ++int grab_plugin_pset(struct inode *self, ++ struct inode *ancestor, ++ pset_member memb) ++{ ++ reiser4_plugin *plug; ++ reiser4_inode *info; ++ int result = 0; ++ ++ /* Do not grab if initialised already. */ ++ info = reiser4_inode_data(self); ++ if (aset_get(info->pset, memb) != NULL) ++ return 0; ++ if (ancestor) { ++ reiser4_inode *parent; ++ ++ parent = reiser4_inode_data(ancestor); ++ plug = aset_get(parent->hset, memb) ? : ++ aset_get(parent->pset, memb); ++ } ++ else ++ plug = get_default_plugin(memb); ++ ++ result = set_plugin(&info->pset, memb, plug); ++ if (result == 0) { ++ if (!ancestor || self->i_sb->s_root->d_inode != self) ++ update_pset_mask(info, memb); ++ } ++ return result; ++} ++ ++/* Take missing pset members from root inode */ ++int finish_pset(struct inode *inode) ++{ ++ reiser4_plugin *plug; ++ reiser4_inode *root; ++ reiser4_inode *info; ++ pset_member memb; ++ int result = 0; ++ ++ root = reiser4_inode_data(inode->i_sb->s_root->d_inode); ++ info = reiser4_inode_data(inode); ++ ++ assert("edward-1455", root != NULL); ++ assert("edward-1456", info != NULL); ++ ++ /* file and directory plugins are already initialized. */ ++ for (memb = PSET_DIR + 1; memb < PSET_LAST; ++memb) { ++ ++ /* Do not grab if initialised already. */ ++ if (aset_get(info->pset, memb) != NULL) ++ continue; ++ ++ plug = aset_get(root->pset, memb); ++ result = set_plugin(&info->pset, memb, plug); ++ if (result != 0) ++ break; ++ } ++ if (result != 0) { ++ warning("nikita-3447", ++ "Cannot set up plugins for %lli", ++ (unsigned long long) ++ get_inode_oid(inode)); ++ } ++ return result; ++} ++ ++int force_plugin_pset(struct inode *self, pset_member memb, reiser4_plugin * plug) ++{ ++ reiser4_inode *info; ++ int result = 0; ++ ++ if (!self->i_sb->s_root || self->i_sb->s_root->d_inode == self) { ++ /* Changing pset in the root object. */ ++ return RETERR(-EINVAL); ++ } ++ ++ info = reiser4_inode_data(self); ++ if (plug->h.pops != NULL && plug->h.pops->change != NULL) ++ result = plug->h.pops->change(self, plug, memb); ++ else ++ result = aset_set_unsafe(&info->pset, memb, plug); ++ if (result == 0) { ++ __u16 oldmask = info->plugin_mask; ++ ++ update_pset_mask(info, memb); ++ if (oldmask != info->plugin_mask) ++ reiser4_inode_clr_flag(self, REISER4_SDLEN_KNOWN); ++ } ++ return result; ++} ++ ++reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES] = { ++ /* C90 initializers */ ++ [REISER4_FILE_PLUGIN_TYPE] = { ++ .type_id = REISER4_FILE_PLUGIN_TYPE, ++ .label = "file", ++ .desc = "Object plugins", ++ .builtin_num = sizeof_array(file_plugins), ++ .builtin = file_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(file_plugin) ++ }, ++ [REISER4_DIR_PLUGIN_TYPE] = { ++ .type_id = REISER4_DIR_PLUGIN_TYPE, ++ .label = "dir", ++ .desc = "Directory plugins", ++ .builtin_num = sizeof_array(dir_plugins), ++ .builtin = dir_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(dir_plugin) ++ }, ++ [REISER4_HASH_PLUGIN_TYPE] = { ++ .type_id = REISER4_HASH_PLUGIN_TYPE, ++ .label = "hash", ++ .desc = "Directory hashes", ++ .builtin_num = sizeof_array(hash_plugins), ++ .builtin = hash_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(hash_plugin) ++ }, ++ [REISER4_FIBRATION_PLUGIN_TYPE] = { ++ .type_id = ++ REISER4_FIBRATION_PLUGIN_TYPE, ++ .label = "fibration", ++ .desc = "Directory fibrations", ++ .builtin_num = sizeof_array(fibration_plugins), ++ .builtin = fibration_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(fibration_plugin) ++ }, ++ [REISER4_CIPHER_PLUGIN_TYPE] = { ++ .type_id = REISER4_CIPHER_PLUGIN_TYPE, ++ .label = "cipher", ++ .desc = "Cipher plugins", ++ .builtin_num = sizeof_array(cipher_plugins), ++ .builtin = cipher_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(cipher_plugin) ++ }, ++ [REISER4_DIGEST_PLUGIN_TYPE] = { ++ .type_id = REISER4_DIGEST_PLUGIN_TYPE, ++ .label = "digest", ++ .desc = "Digest plugins", ++ .builtin_num = sizeof_array(digest_plugins), ++ .builtin = digest_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(digest_plugin) ++ }, ++ [REISER4_COMPRESSION_PLUGIN_TYPE] = { ++ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE, ++ .label = "compression", ++ .desc = "Compression plugins", ++ .builtin_num = sizeof_array(compression_plugins), ++ .builtin = compression_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(compression_plugin) ++ }, ++ [REISER4_FORMATTING_PLUGIN_TYPE] = { ++ .type_id = REISER4_FORMATTING_PLUGIN_TYPE, ++ .label = "formatting", ++ .desc = "Tail inlining policies", ++ .builtin_num = sizeof_array(formatting_plugins), ++ .builtin = formatting_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(formatting_plugin) ++ }, ++ [REISER4_PERM_PLUGIN_TYPE] = { ++ .type_id = REISER4_PERM_PLUGIN_TYPE, ++ .label = "perm", ++ .desc = "Permission checks", ++ .builtin_num = sizeof_array(perm_plugins), ++ .builtin = perm_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(perm_plugin) ++ }, ++ [REISER4_ITEM_PLUGIN_TYPE] = { ++ .type_id = REISER4_ITEM_PLUGIN_TYPE, ++ .label = "item", ++ .desc = "Item handlers", ++ .builtin_num = sizeof_array(item_plugins), ++ .builtin = item_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(item_plugin) ++ }, ++ [REISER4_NODE_PLUGIN_TYPE] = { ++ .type_id = REISER4_NODE_PLUGIN_TYPE, ++ .label = "node", ++ .desc = "node layout handlers", ++ .builtin_num = sizeof_array(node_plugins), ++ .builtin = node_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(node_plugin) ++ }, ++ [REISER4_SD_EXT_PLUGIN_TYPE] = { ++ .type_id = REISER4_SD_EXT_PLUGIN_TYPE, ++ .label = "sd_ext", ++ .desc = "Parts of stat-data", ++ .builtin_num = sizeof_array(sd_ext_plugins), ++ .builtin = sd_ext_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(sd_ext_plugin) ++ }, ++ [REISER4_FORMAT_PLUGIN_TYPE] = { ++ .type_id = REISER4_FORMAT_PLUGIN_TYPE, ++ .label = "disk_layout", ++ .desc = "defines filesystem on disk layout", ++ .builtin_num = sizeof_array(format_plugins), ++ .builtin = format_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(disk_format_plugin) ++ }, ++ [REISER4_JNODE_PLUGIN_TYPE] = { ++ .type_id = REISER4_JNODE_PLUGIN_TYPE, ++ .label = "jnode", ++ .desc = "defines kind of jnode", ++ .builtin_num = sizeof_array(jnode_plugins), ++ .builtin = jnode_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(jnode_plugin) ++ }, ++ [REISER4_COMPRESSION_MODE_PLUGIN_TYPE] = { ++ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ .label = "compression_mode", ++ .desc = "Defines compression mode", ++ .builtin_num = sizeof_array(compression_mode_plugins), ++ .builtin = compression_mode_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(compression_mode_plugin) ++ }, ++ [REISER4_CLUSTER_PLUGIN_TYPE] = { ++ .type_id = REISER4_CLUSTER_PLUGIN_TYPE, ++ .label = "cluster", ++ .desc = "Defines cluster size", ++ .builtin_num = sizeof_array(cluster_plugins), ++ .builtin = cluster_plugins, ++ .plugins_list = {NULL, NULL}, ++ .size = sizeof(cluster_plugin) ++ } ++}; ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 120 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/plugin.h b/fs/reiser4/plugin/plugin.h +new file mode 100644 +index 0000000..a1d1097 +--- /dev/null ++++ b/fs/reiser4/plugin/plugin.h +@@ -0,0 +1,920 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Basic plugin data-types. ++ see fs/reiser4/plugin/plugin.c for details */ ++ ++#if !defined( __FS_REISER4_PLUGIN_TYPES_H__ ) ++#define __FS_REISER4_PLUGIN_TYPES_H__ ++ ++#include "../forward.h" ++#include "../debug.h" ++#include "../dformat.h" ++#include "../key.h" ++#include "compress/compress.h" ++#include "crypto/cipher.h" ++#include "plugin_header.h" ++#include "item/static_stat.h" ++#include "item/internal.h" ++#include "item/sde.h" ++#include "item/cde.h" ++#include "item/item.h" ++#include "node/node.h" ++#include "node/node40.h" ++#include "security/perm.h" ++#include "fibration.h" ++ ++#include "space/bitmap.h" ++#include "space/space_allocator.h" ++ ++#include "disk_format/disk_format40.h" ++#include "disk_format/disk_format.h" ++ ++#include /* for struct super_block, address_space */ ++#include /* for struct page */ ++#include /* for struct buffer_head */ ++#include /* for struct dentry */ ++#include ++#include ++ ++typedef struct reiser4_object_on_wire reiser4_object_on_wire; ++ ++/* ++ * File plugin. Defines the set of methods that file plugins implement, some ++ * of which are optional. ++ * ++ * A file plugin offers to the caller an interface for IO ( writing to and/or ++ * reading from) to what the caller sees as one sequence of bytes. An IO to it ++ * may affect more than one physical sequence of bytes, or no physical sequence ++ * of bytes, it may affect sequences of bytes offered by other file plugins to ++ * the semantic layer, and the file plugin may invoke other plugins and ++ * delegate work to them, but its interface is structured for offering the ++ * caller the ability to read and/or write what the caller sees as being a ++ * single sequence of bytes. ++ * ++ * The file plugin must present a sequence of bytes to the caller, but it does ++ * not necessarily have to store a sequence of bytes, it does not necessarily ++ * have to support efficient tree traversal to any offset in the sequence of ++ * bytes (tail and extent items, whose keys contain offsets, do however provide ++ * efficient non-sequential lookup of any offset in the sequence of bytes). ++ * ++ * Directory plugins provide methods for selecting file plugins by resolving a ++ * name for them. ++ * ++ * The functionality other filesystems call an attribute, and rigidly tie ++ * together, we decompose into orthogonal selectable features of files. Using ++ * the terminology we will define next, an attribute is a perhaps constrained, ++ * perhaps static length, file whose parent has a uni-count-intra-link to it, ++ * which might be grandparent-major-packed, and whose parent has a deletion ++ * method that deletes it. ++ * ++ * File plugins can implement constraints. ++ * ++ * Files can be of variable length (e.g. regular unix files), or of static ++ * length (e.g. static sized attributes). ++ * ++ * An object may have many sequences of bytes, and many file plugins, but, it ++ * has exactly one objectid. It is usually desirable that an object has a ++ * deletion method which deletes every item with that objectid. Items cannot ++ * in general be found by just their objectids. This means that an object must ++ * have either a method built into its deletion plugin method for knowing what ++ * items need to be deleted, or links stored with the object that provide the ++ * plugin with a method for finding those items. Deleting a file within an ++ * object may or may not have the effect of deleting the entire object, ++ * depending on the file plugin's deletion method. ++ * ++ * LINK TAXONOMY: ++ * ++ * Many objects have a reference count, and when the reference count reaches 0 ++ * the object's deletion method is invoked. Some links embody a reference ++ * count increase ("countlinks"), and others do not ("nocountlinks"). ++ * ++ * Some links are bi-directional links ("bilinks"), and some are ++ * uni-directional("unilinks"). ++ * ++ * Some links are between parts of the same object ("intralinks"), and some are ++ * between different objects ("interlinks"). ++ * ++ * PACKING TAXONOMY: ++ * ++ * Some items of an object are stored with a major packing locality based on ++ * their object's objectid (e.g. unix directory items in plan A), and these are ++ * called "self-major-packed". ++ * ++ * Some items of an object are stored with a major packing locality based on ++ * their semantic parent object's objectid (e.g. unix file bodies in plan A), ++ * and these are called "parent-major-packed". ++ * ++ * Some items of an object are stored with a major packing locality based on ++ * their semantic grandparent, and these are called "grandparent-major-packed". ++ * Now carefully notice that we run into trouble with key length if we have to ++ * store a 8 byte major+minor grandparent based packing locality, an 8 byte ++ * parent objectid, an 8 byte attribute objectid, and an 8 byte offset, all in ++ * a 24 byte key. One of these fields must be sacrificed if an item is to be ++ * grandparent-major-packed, and which to sacrifice is left to the item author ++ * choosing to make the item grandparent-major-packed. You cannot make tail ++ * items and extent items grandparent-major-packed, though you could make them ++ * self-major-packed (usually they are parent-major-packed). ++ * ++ * In the case of ACLs (which are composed of fixed length ACEs which consist ++ * of {subject-type, subject, and permission bitmask} triples), it makes sense ++ * to not have an offset field in the ACE item key, and to allow duplicate keys ++ * for ACEs. Thus, the set of ACES for a given file is found by looking for a ++ * key consisting of the objectid of the grandparent (thus grouping all ACLs in ++ * a directory together), the minor packing locality of ACE, the objectid of ++ * the file, and 0. ++ * ++ * IO involves moving data from one location to another, which means that two ++ * locations must be specified, source and destination. ++ * ++ * This source and destination can be in the filesystem, or they can be a ++ * pointer in the user process address space plus a byte count. ++ * ++ * If both source and destination are in the filesystem, then at least one of ++ * them must be representable as a pure stream of bytes (which we call a flow, ++ * and define as a struct containing a key, a data pointer, and a length). ++ * This may mean converting one of them into a flow. We provide a generic ++ * cast_into_flow() method, which will work for any plugin supporting ++ * read_flow(), though it is inefficiently implemented in that it temporarily ++ * stores the flow in a buffer (Question: what to do with huge flows that ++ * cannot fit into memory? Answer: we must not convert them all at once. ) ++ * ++ * Performing a write requires resolving the write request into a flow defining ++ * the source, and a method that performs the write, and a key that defines ++ * where in the tree the write is to go. ++ * ++ * Performing a read requires resolving the read request into a flow defining ++ * the target, and a method that performs the read, and a key that defines ++ * where in the tree the read is to come from. ++ * ++ * There will exist file plugins which have no pluginid stored on the disk for ++ * them, and which are only invoked by other plugins. ++ */ ++ ++/* This should be incremented with each new contributed ++ pair (plugin type, plugin id). ++ NOTE: Make sure there is a release of reiser4progs ++ with the corresponding version number */ ++#define PLUGIN_LIBRARY_VERSION 0 ++ ++ /* enumeration of fields within plugin_set */ ++typedef enum { ++ PSET_FILE, ++ PSET_DIR, /* PSET_FILE and PSET_DIR should be first elements: ++ * inode.c:read_inode() depends on this. */ ++ PSET_PERM, ++ PSET_FORMATTING, ++ PSET_HASH, ++ PSET_FIBRATION, ++ PSET_SD, ++ PSET_DIR_ITEM, ++ PSET_CIPHER, ++ PSET_DIGEST, ++ PSET_COMPRESSION, ++ PSET_COMPRESSION_MODE, ++ PSET_CLUSTER, ++ PSET_CREATE, ++ PSET_LAST ++} pset_member; ++ ++/* builtin file-plugins */ ++typedef enum { ++ /* regular file */ ++ UNIX_FILE_PLUGIN_ID, ++ /* directory */ ++ DIRECTORY_FILE_PLUGIN_ID, ++ /* symlink */ ++ SYMLINK_FILE_PLUGIN_ID, ++ /* for objects completely handled by the VFS: fifos, devices, ++ sockets */ ++ SPECIAL_FILE_PLUGIN_ID, ++ /* regular cryptcompress file */ ++ CRYPTCOMPRESS_FILE_PLUGIN_ID, ++ /* number of file plugins. Used as size of arrays to hold ++ file plugins. */ ++ LAST_FILE_PLUGIN_ID ++} reiser4_file_id; ++ ++typedef struct file_plugin { ++ ++ /* generic fields */ ++ plugin_header h; ++ ++ struct inode_operations inode_ops; ++ struct file_operations file_ops; ++ struct address_space_operations as_ops; ++ ++ /* save inode cached stat-data onto disk. It was called ++ reiserfs_update_sd() in 3.x */ ++ int (*write_sd_by_inode) (struct inode *); ++ ++ /* ++ * private methods: These are optional. If used they will allow you to ++ * minimize the amount of code needed to implement a deviation from ++ * some other method that also uses them. ++ */ ++ ++ /* ++ * Construct flow into @flow according to user-supplied data. ++ * ++ * This is used by read/write methods to construct a flow to ++ * write/read. ->flow_by_inode() is plugin method, rather than single ++ * global implementation, because key in a flow used by plugin may ++ * depend on data in a @buf. ++ * ++ * NIKITA-FIXME-HANS: please create statistics on what functions are ++ * dereferenced how often for the mongo benchmark. You can supervise ++ * Elena doing this for you if that helps. Email me the list of the ++ * top 10, with their counts, and an estimate of the total number of ++ * CPU cycles spent dereferencing as a percentage of CPU cycles spent ++ * processing (non-idle processing). If the total percent is, say, ++ * less than 1%, it will make our coding discussions much easier, and ++ * keep me from questioning whether functions like the below are too ++ * frequently called to be dereferenced. If the total percent is more ++ * than 1%, perhaps private methods should be listed in a "required" ++ * comment at the top of each plugin (with stern language about how if ++ * the comment is missing it will not be accepted by the maintainer), ++ * and implemented using macros not dereferenced functions. How about ++ * replacing this whole private methods part of the struct with a ++ * thorough documentation of what the standard helper functions are for ++ * use in constructing plugins? I think users have been asking for ++ * that, though not in so many words. ++ */ ++ int (*flow_by_inode) (struct inode *, const char __user *buf, ++ int user, loff_t size, ++ loff_t off, rw_op op, flow_t *); ++ ++ /* ++ * Return the key used to retrieve an offset of a file. It is used by ++ * default implementation of ->flow_by_inode() method ++ * (common_build_flow()) and, among other things, to get to the extent ++ * from jnode of unformatted node. ++ */ ++ int (*key_by_inode) (struct inode *, loff_t off, reiser4_key *); ++ ++ /* NIKITA-FIXME-HANS: this comment is not as clear to others as you think.... */ ++ /* ++ * set the plugin for a file. Called during file creation in creat() ++ * but not reiser4() unless an inode already exists for the file. ++ */ ++ int (*set_plug_in_inode) (struct inode *inode, struct inode *parent, ++ reiser4_object_create_data *); ++ ++ /* NIKITA-FIXME-HANS: comment and name seem to say different things, ++ * are you setting up the object itself also or just adjusting the ++ * parent?.... */ ++ /* set up plugins for new @object created in @parent. @root is root ++ directory. */ ++ int (*adjust_to_parent) (struct inode *object, struct inode *parent, ++ struct inode *root); ++ /* ++ * this does whatever is necessary to do when object is created. For ++ * instance, for unix files stat data is inserted. It is supposed to be ++ * called by create of struct inode_operations. ++ */ ++ int (*create_object) (struct inode *object, struct inode *parent, ++ reiser4_object_create_data *); ++ ++ /* this does whatever is necessary to do when object is opened */ ++ int (*open_object) (struct inode * inode, struct file * file); ++ /* ++ * this method should check REISER4_NO_SD and set REISER4_NO_SD on ++ * success. Deletion of an object usually includes removal of items ++ * building file body (for directories this is removal of "." and "..") ++ * and removal of stat-data item. ++ */ ++ int (*delete_object) (struct inode *); ++ ++ /* add link from @parent to @object */ ++ int (*add_link) (struct inode *object, struct inode *parent); ++ ++ /* remove link from @parent to @object */ ++ int (*rem_link) (struct inode *object, struct inode *parent); ++ ++ /* ++ * return true if item addressed by @coord belongs to @inode. This is ++ * used by read/write to properly slice flow into items in presence of ++ * multiple key assignment policies, because items of a file are not ++ * necessarily contiguous in a key space, for example, in a plan-b. ++ */ ++ int (*owns_item) (const struct inode *, const coord_t *); ++ ++ /* checks whether yet another hard links to this object can be ++ added */ ++ int (*can_add_link) (const struct inode *); ++ ++ /* checks whether hard links to this object can be removed */ ++ int (*can_rem_link) (const struct inode *); ++ ++ /* not empty for DIRECTORY_FILE_PLUGIN_ID only currently. It calls ++ detach of directory plugin to remove ".." */ ++ int (*detach) (struct inode * child, struct inode * parent); ++ ++ /* called when @child was just looked up in the @parent. It is not ++ empty for DIRECTORY_FILE_PLUGIN_ID only where it calls attach of ++ directory plugin */ ++ int (*bind) (struct inode * child, struct inode * parent); ++ ++ /* process safe-link during mount */ ++ int (*safelink) (struct inode * object, reiser4_safe_link_t link, ++ __u64 value); ++ ++ /* The couple of estimate methods for all file operations */ ++ struct { ++ reiser4_block_nr(*create) (const struct inode *); ++ reiser4_block_nr(*update) (const struct inode *); ++ reiser4_block_nr(*unlink) (const struct inode *, ++ const struct inode *); ++ } estimate; ++ ++ /* ++ * reiser4 specific part of inode has a union of structures which are ++ * specific to a plugin. This method is called when inode is read ++ * (read_inode) and when file is created (common_create_child) so that ++ * file plugin could initialize its inode data ++ */ ++ void (*init_inode_data) (struct inode *, reiser4_object_create_data *, ++ int); ++ ++ /* ++ * This method performs progressive deletion of items and whole nodes ++ * from right to left. ++ * ++ * @tap: the point deletion process begins from, ++ * @from_key: the beginning of the deleted key range, ++ * @to_key: the end of the deleted key range, ++ * @smallest_removed: the smallest removed key, ++ * ++ * @return: 0 if success, error code otherwise, -E_REPEAT means that long cut_tree ++ * operation was interrupted for allowing atom commit . ++ */ ++ int (*cut_tree_worker) (tap_t *, const reiser4_key * from_key, ++ const reiser4_key * to_key, ++ reiser4_key * smallest_removed, struct inode *, ++ int, int *); ++ ++ /* called from ->destroy_inode() */ ++ void (*destroy_inode) (struct inode *); ++ ++ /* ++ * methods to serialize object identify. This is used, for example, by ++ * reiser4_{en,de}code_fh(). ++ */ ++ struct { ++ /* store object's identity at @area */ ++ char *(*write) (struct inode * inode, char *area); ++ /* parse object from wire to the @obj */ ++ char *(*read) (char *area, reiser4_object_on_wire * obj); ++ /* given object identity in @obj, find or create its dentry */ ++ struct dentry *(*get) (struct super_block * s, ++ reiser4_object_on_wire * obj); ++ /* how many bytes ->wire.write() consumes */ ++ int (*size) (struct inode * inode); ++ /* finish with object identify */ ++ void (*done) (reiser4_object_on_wire * obj); ++ } wire; ++} file_plugin; ++ ++extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID]; ++ ++struct reiser4_object_on_wire { ++ file_plugin *plugin; ++ union { ++ struct { ++ obj_key_id key_id; ++ } std; ++ void *generic; ++ } u; ++}; ++ ++/* builtin dir-plugins */ ++typedef enum { ++ HASHED_DIR_PLUGIN_ID, ++ SEEKABLE_HASHED_DIR_PLUGIN_ID, ++ LAST_DIR_ID ++} reiser4_dir_id; ++ ++typedef struct dir_plugin { ++ /* generic fields */ ++ plugin_header h; ++ ++ struct inode_operations inode_ops; ++ struct file_operations file_ops; ++ struct address_space_operations as_ops; ++ ++ /* ++ * private methods: These are optional. If used they will allow you to ++ * minimize the amount of code needed to implement a deviation from ++ * some other method that uses them. You could logically argue that ++ * they should be a separate type of plugin. ++ */ ++ ++ struct dentry *(*get_parent) (struct inode * childdir); ++ ++ /* ++ * check whether "name" is acceptable name to be inserted into this ++ * object. Optionally implemented by directory-like objects. Can check ++ * for maximal length, reserved symbols etc ++ */ ++ int (*is_name_acceptable) (const struct inode * inode, const char *name, ++ int len); ++ ++ void (*build_entry_key) (const struct inode * dir /* directory where ++ * entry is (or will ++ * be) in.*/ , ++ const struct qstr * name /* name of file ++ * referenced by this ++ * entry */ , ++ reiser4_key * result /* resulting key of ++ * directory entry */ ); ++ int (*build_readdir_key) (struct file * dir, reiser4_key * result); ++ int (*add_entry) (struct inode * object, struct dentry * where, ++ reiser4_object_create_data * data, ++ reiser4_dir_entry_desc * entry); ++ int (*rem_entry) (struct inode * object, struct dentry * where, ++ reiser4_dir_entry_desc * entry); ++ ++ /* ++ * initialize directory structure for newly created object. For normal ++ * unix directories, insert dot and dotdot. ++ */ ++ int (*init) (struct inode * object, struct inode * parent, ++ reiser4_object_create_data * data); ++ ++ /* destroy directory */ ++ int (*done) (struct inode * child); ++ ++ /* called when @subdir was just looked up in the @dir */ ++ int (*attach) (struct inode * subdir, struct inode * dir); ++ int (*detach) (struct inode * subdir, struct inode * dir); ++ ++ struct { ++ reiser4_block_nr(*add_entry) (const struct inode *); ++ reiser4_block_nr(*rem_entry) (const struct inode *); ++ reiser4_block_nr(*unlink) (const struct inode *, ++ const struct inode *); ++ } estimate; ++} dir_plugin; ++ ++extern dir_plugin dir_plugins[LAST_DIR_ID]; ++ ++typedef struct formatting_plugin { ++ /* generic fields */ ++ plugin_header h; ++ /* returns non-zero iff file's tail has to be stored ++ in a direct item. */ ++ int (*have_tail) (const struct inode * inode, loff_t size); ++} formatting_plugin; ++ ++typedef struct hash_plugin { ++ /* generic fields */ ++ plugin_header h; ++ /* computes hash of the given name */ ++ __u64(*hash) (const unsigned char *name, int len); ++} hash_plugin; ++ ++typedef struct cipher_plugin { ++ /* generic fields */ ++ plugin_header h; ++ struct crypto_blkcipher * (*alloc) (void); ++ void (*free) (struct crypto_blkcipher * tfm); ++ /* Offset translator. For each offset this returns (k * offset), where ++ k (k >= 1) is an expansion factor of the cipher algorithm. ++ For all symmetric algorithms k == 1. For asymmetric algorithms (which ++ inflate data) offset translation guarantees that all disk cluster's ++ units will have keys smaller then next cluster's one. ++ */ ++ loff_t(*scale) (struct inode * inode, size_t blocksize, loff_t src); ++ /* Cipher algorithms can accept data only by chunks of cipher block ++ size. This method is to align any flow up to cipher block size when ++ we pass it to cipher algorithm. To align means to append padding of ++ special format specific to the cipher algorithm */ ++ int (*align_stream) (__u8 * tail, int clust_size, int blocksize); ++ /* low-level key manager (check, install, etc..) */ ++ int (*setkey) (struct crypto_tfm * tfm, const __u8 * key, ++ unsigned int keylen); ++ /* main text processing procedures */ ++ void (*encrypt) (__u32 * expkey, __u8 * dst, const __u8 * src); ++ void (*decrypt) (__u32 * expkey, __u8 * dst, const __u8 * src); ++} cipher_plugin; ++ ++typedef struct digest_plugin { ++ /* generic fields */ ++ plugin_header h; ++ /* fingerprint size in bytes */ ++ int fipsize; ++ struct crypto_hash * (*alloc) (void); ++ void (*free) (struct crypto_hash * tfm); ++} digest_plugin; ++ ++typedef struct compression_plugin { ++ /* generic fields */ ++ plugin_header h; ++ int (*init) (void); ++ /* the maximum number of bytes the size of the "compressed" data can ++ * exceed the uncompressed data. */ ++ int (*overrun) (unsigned src_len); ++ coa_t(*alloc) (tfm_action act); ++ void (*free) (coa_t coa, tfm_action act); ++ /* minimal size of the flow we still try to compress */ ++ int (*min_size_deflate) (void); ++ __u32(*checksum) (char *data, __u32 length); ++ /* main transform procedures */ ++ void (*compress) (coa_t coa, __u8 * src_first, unsigned src_len, ++ __u8 * dst_first, unsigned *dst_len); ++ void (*decompress) (coa_t coa, __u8 * src_first, unsigned src_len, ++ __u8 * dst_first, unsigned *dst_len); ++} compression_plugin; ++ ++typedef struct compression_mode_plugin { ++ /* generic fields */ ++ plugin_header h; ++ /* this is called when estimating compressibility ++ of a logical cluster by its content */ ++ int (*should_deflate) (struct inode * inode, cloff_t index); ++ /* this is called when results of compression should be saved */ ++ int (*accept_hook) (struct inode * inode, cloff_t index); ++ /* this is called when results of compression should be discarded */ ++ int (*discard_hook) (struct inode * inode, cloff_t index); ++} compression_mode_plugin; ++ ++typedef struct cluster_plugin { ++ /* generic fields */ ++ plugin_header h; ++ int shift; ++} cluster_plugin; ++ ++typedef struct sd_ext_plugin { ++ /* generic fields */ ++ plugin_header h; ++ int (*present) (struct inode * inode, char **area, int *len); ++ int (*absent) (struct inode * inode); ++ int (*save_len) (struct inode * inode); ++ int (*save) (struct inode * inode, char **area); ++ /* alignment requirement for this stat-data part */ ++ int alignment; ++} sd_ext_plugin; ++ ++/* this plugin contains methods to allocate objectid for newly created files, ++ to deallocate objectid when file gets removed, to report number of used and ++ free objectids */ ++typedef struct oid_allocator_plugin { ++ /* generic fields */ ++ plugin_header h; ++ int (*init_oid_allocator) (reiser4_oid_allocator * map, __u64 nr_files, ++ __u64 oids); ++ /* used to report statfs->f_files */ ++ __u64(*oids_used) (reiser4_oid_allocator * map); ++ /* get next oid to use */ ++ __u64(*next_oid) (reiser4_oid_allocator * map); ++ /* used to report statfs->f_ffree */ ++ __u64(*oids_free) (reiser4_oid_allocator * map); ++ /* allocate new objectid */ ++ int (*allocate_oid) (reiser4_oid_allocator * map, oid_t *); ++ /* release objectid */ ++ int (*release_oid) (reiser4_oid_allocator * map, oid_t); ++ /* how many pages to reserve in transaction for allocation of new ++ objectid */ ++ int (*oid_reserve_allocate) (reiser4_oid_allocator * map); ++ /* how many pages to reserve in transaction for freeing of an ++ objectid */ ++ int (*oid_reserve_release) (reiser4_oid_allocator * map); ++ void (*print_info) (const char *, reiser4_oid_allocator *); ++} oid_allocator_plugin; ++ ++/* disk layout plugin: this specifies super block, journal, bitmap (if there ++ are any) locations, etc */ ++typedef struct disk_format_plugin { ++ /* generic fields */ ++ plugin_header h; ++ /* replay journal, initialize super_info_data, etc */ ++ int (*init_format) (struct super_block *, void *data); ++ ++ /* key of root directory stat data */ ++ const reiser4_key *(*root_dir_key) (const struct super_block *); ++ ++ int (*release) (struct super_block *); ++ jnode *(*log_super) (struct super_block *); ++ int (*check_open) (const struct inode * object); ++ int (*version_update) (struct super_block *); ++} disk_format_plugin; ++ ++struct jnode_plugin { ++ /* generic fields */ ++ plugin_header h; ++ int (*init) (jnode * node); ++ int (*parse) (jnode * node); ++ struct address_space *(*mapping) (const jnode * node); ++ unsigned long (*index) (const jnode * node); ++ jnode *(*clone) (jnode * node); ++}; ++ ++/* plugin instance. */ ++/* */ ++/* This is "wrapper" union for all types of plugins. Most of the code uses */ ++/* plugins of particular type (file_plugin, dir_plugin, etc.) rather than */ ++/* operates with pointers to reiser4_plugin. This union is only used in */ ++/* some generic code in plugin/plugin.c that operates on all */ ++/* plugins. Technically speaking purpose of this union is to add type */ ++/* safety to said generic code: each plugin type (file_plugin, for */ ++/* example), contains plugin_header as its first memeber. This first member */ ++/* is located at the same place in memory as .h member of */ ++/* reiser4_plugin. Generic code, obtains pointer to reiser4_plugin and */ ++/* looks in the .h which is header of plugin type located in union. This */ ++/* allows to avoid type-casts. */ ++union reiser4_plugin { ++ /* generic fields */ ++ plugin_header h; ++ /* file plugin */ ++ file_plugin file; ++ /* directory plugin */ ++ dir_plugin dir; ++ /* hash plugin, used by directory plugin */ ++ hash_plugin hash; ++ /* fibration plugin used by directory plugin */ ++ fibration_plugin fibration; ++ /* cipher transform plugin, used by file plugin */ ++ cipher_plugin cipher; ++ /* digest transform plugin, used by file plugin */ ++ digest_plugin digest; ++ /* compression transform plugin, used by file plugin */ ++ compression_plugin compression; ++ /* tail plugin, used by file plugin */ ++ formatting_plugin formatting; ++ /* permission plugin */ ++ perm_plugin perm; ++ /* node plugin */ ++ node_plugin node; ++ /* item plugin */ ++ item_plugin item; ++ /* stat-data extension plugin */ ++ sd_ext_plugin sd_ext; ++ /* disk layout plugin */ ++ disk_format_plugin format; ++ /* object id allocator plugin */ ++ oid_allocator_plugin oid_allocator; ++ /* plugin for different jnode types */ ++ jnode_plugin jnode; ++ /* compression mode plugin, used by object plugin */ ++ compression_mode_plugin compression_mode; ++ /* cluster plugin, used by object plugin */ ++ cluster_plugin clust; ++ /* place-holder for new plugin types that can be registered ++ dynamically, and used by other dynamically loaded plugins. */ ++ void *generic; ++}; ++ ++struct reiser4_plugin_ops { ++ /* called when plugin is initialized */ ++ int (*init) (reiser4_plugin * plugin); ++ /* called when plugin is unloaded */ ++ int (*done) (reiser4_plugin * plugin); ++ /* load given plugin from disk */ ++ int (*load) (struct inode * inode, ++ reiser4_plugin * plugin, char **area, int *len); ++ /* how many space is required to store this plugin's state ++ in stat-data */ ++ int (*save_len) (struct inode * inode, reiser4_plugin * plugin); ++ /* save persistent plugin-data to disk */ ++ int (*save) (struct inode * inode, reiser4_plugin * plugin, ++ char **area); ++ /* alignment requirement for on-disk state of this plugin ++ in number of bytes */ ++ int alignment; ++ /* install itself into given inode. This can return error ++ (e.g., you cannot change hash of non-empty directory). */ ++ int (*change) (struct inode * inode, reiser4_plugin * plugin, ++ pset_member memb); ++ /* install itself into given inode. This can return error ++ (e.g., you cannot change hash of non-empty directory). */ ++ int (*inherit) (struct inode * inode, struct inode * parent, ++ reiser4_plugin * plugin); ++}; ++ ++/* functions implemented in fs/reiser4/plugin/plugin.c */ ++ ++/* stores plugin reference in reiser4-specific part of inode */ ++extern int set_object_plugin(struct inode *inode, reiser4_plugin_id id); ++extern int setup_plugins(struct super_block *super, reiser4_plugin ** area); ++extern int init_plugins(void); ++ ++/* builtin plugins */ ++ ++/* builtin hash-plugins */ ++ ++typedef enum { ++ RUPASOV_HASH_ID, ++ R5_HASH_ID, ++ TEA_HASH_ID, ++ FNV1_HASH_ID, ++ DEGENERATE_HASH_ID, ++ LAST_HASH_ID ++} reiser4_hash_id; ++ ++/* builtin cipher plugins */ ++ ++typedef enum { ++ NONE_CIPHER_ID, ++ LAST_CIPHER_ID ++} reiser4_cipher_id; ++ ++/* builtin digest plugins */ ++ ++typedef enum { ++ SHA256_32_DIGEST_ID, ++ LAST_DIGEST_ID ++} reiser4_digest_id; ++ ++/* builtin compression mode plugins */ ++typedef enum { ++ NONE_COMPRESSION_MODE_ID, ++ LATTD_COMPRESSION_MODE_ID, ++ ULTIM_COMPRESSION_MODE_ID, ++ FORCE_COMPRESSION_MODE_ID, ++ CONVX_COMPRESSION_MODE_ID, ++ LAST_COMPRESSION_MODE_ID ++} reiser4_compression_mode_id; ++ ++/* builtin cluster plugins */ ++typedef enum { ++ CLUSTER_64K_ID, ++ CLUSTER_32K_ID, ++ CLUSTER_16K_ID, ++ CLUSTER_8K_ID, ++ CLUSTER_4K_ID, ++ LAST_CLUSTER_ID ++} reiser4_cluster_id; ++ ++/* builtin tail-plugins */ ++ ++typedef enum { ++ NEVER_TAILS_FORMATTING_ID, ++ ALWAYS_TAILS_FORMATTING_ID, ++ SMALL_FILE_FORMATTING_ID, ++ LAST_TAIL_FORMATTING_ID ++} reiser4_formatting_id; ++ ++/* compression/clustering specific data */ ++typedef struct compression_data { ++ reiser4_compression_id coa; /* id of the compression algorithm */ ++} compression_data_t; ++ ++typedef __u8 cluster_data_t; /* cluster info */ ++ ++/* data type used to pack parameters that we pass to vfs object creation ++ function create_object() */ ++struct reiser4_object_create_data { ++ /* plugin to control created object */ ++ reiser4_file_id id; ++ /* mode of regular file, directory or special file */ ++/* what happens if some other sort of perm plugin is in use? */ ++ int mode; ++ /* rdev of special file */ ++ dev_t rdev; ++ /* symlink target */ ++ const char *name; ++ /* add here something for non-standard objects you invent, like ++ query for interpolation file etc. */ ++ ++ crypto_stat_t * crypto; ++ compression_data_t *compression; ++ cluster_data_t *cluster; ++ ++ struct inode *parent; ++ struct dentry *dentry; ++}; ++ ++/* description of directory entry being created/destroyed/sought for ++ ++ It is passed down to the directory plugin and farther to the ++ directory item plugin methods. Creation of new directory is done in ++ several stages: first we search for an entry with the same name, then ++ create new one. reiser4_dir_entry_desc is used to store some information ++ collected at some stage of this process and required later: key of ++ item that we want to insert/delete and pointer to an object that will ++ be bound by the new directory entry. Probably some more fields will ++ be added there. ++ ++*/ ++struct reiser4_dir_entry_desc { ++ /* key of directory entry */ ++ reiser4_key key; ++ /* object bound by this entry. */ ++ struct inode *obj; ++}; ++ ++#define MAX_PLUGIN_TYPE_LABEL_LEN 32 ++#define MAX_PLUGIN_PLUG_LABEL_LEN 32 ++ ++/* used for interface with user-land: table-driven parsing in ++ reiser4(). */ ++typedef struct plugin_locator { ++ reiser4_plugin_type type_id; ++ reiser4_plugin_id id; ++ char type_label[MAX_PLUGIN_TYPE_LABEL_LEN]; ++ char plug_label[MAX_PLUGIN_PLUG_LABEL_LEN]; ++} plugin_locator; ++ ++extern int locate_plugin(struct inode *inode, plugin_locator * loc); ++ ++#define PLUGIN_BY_ID(TYPE,ID,FIELD) \ ++static inline TYPE *TYPE ## _by_id( reiser4_plugin_id id ) \ ++{ \ ++ reiser4_plugin *plugin = plugin_by_id ( ID, id ); \ ++ return plugin ? & plugin -> FIELD : NULL; \ ++} \ ++static inline TYPE *TYPE ## _by_disk_id( reiser4_tree *tree, d16 *id ) \ ++{ \ ++ reiser4_plugin *plugin = plugin_by_disk_id ( tree, ID, id ); \ ++ return plugin ? & plugin -> FIELD : NULL; \ ++} \ ++static inline TYPE *TYPE ## _by_unsafe_id( reiser4_plugin_id id ) \ ++{ \ ++ reiser4_plugin *plugin = plugin_by_unsafe_id ( ID, id ); \ ++ return plugin ? & plugin -> FIELD : NULL; \ ++} \ ++static inline reiser4_plugin* TYPE ## _to_plugin( TYPE* plugin ) \ ++{ \ ++ return ( reiser4_plugin * ) plugin; \ ++} \ ++static inline reiser4_plugin_id TYPE ## _id( TYPE* plugin ) \ ++{ \ ++ return TYPE ## _to_plugin (plugin) -> h.id; \ ++} \ ++typedef struct { int foo; } TYPE ## _plugin_dummy ++ ++PLUGIN_BY_ID(item_plugin, REISER4_ITEM_PLUGIN_TYPE, item); ++PLUGIN_BY_ID(file_plugin, REISER4_FILE_PLUGIN_TYPE, file); ++PLUGIN_BY_ID(dir_plugin, REISER4_DIR_PLUGIN_TYPE, dir); ++PLUGIN_BY_ID(node_plugin, REISER4_NODE_PLUGIN_TYPE, node); ++PLUGIN_BY_ID(sd_ext_plugin, REISER4_SD_EXT_PLUGIN_TYPE, sd_ext); ++PLUGIN_BY_ID(perm_plugin, REISER4_PERM_PLUGIN_TYPE, perm); ++PLUGIN_BY_ID(hash_plugin, REISER4_HASH_PLUGIN_TYPE, hash); ++PLUGIN_BY_ID(fibration_plugin, REISER4_FIBRATION_PLUGIN_TYPE, fibration); ++PLUGIN_BY_ID(cipher_plugin, REISER4_CIPHER_PLUGIN_TYPE, cipher); ++PLUGIN_BY_ID(digest_plugin, REISER4_DIGEST_PLUGIN_TYPE, digest); ++PLUGIN_BY_ID(compression_plugin, REISER4_COMPRESSION_PLUGIN_TYPE, compression); ++PLUGIN_BY_ID(formatting_plugin, REISER4_FORMATTING_PLUGIN_TYPE, formatting); ++PLUGIN_BY_ID(disk_format_plugin, REISER4_FORMAT_PLUGIN_TYPE, format); ++PLUGIN_BY_ID(jnode_plugin, REISER4_JNODE_PLUGIN_TYPE, jnode); ++PLUGIN_BY_ID(compression_mode_plugin, REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ compression_mode); ++PLUGIN_BY_ID(cluster_plugin, REISER4_CLUSTER_PLUGIN_TYPE, clust); ++ ++extern int save_plugin_id(reiser4_plugin * plugin, d16 * area); ++ ++extern struct list_head *get_plugin_list(reiser4_plugin_type type_id); ++ ++#define for_all_plugins(ptype, plugin) \ ++for (plugin = list_entry(get_plugin_list(ptype)->next, reiser4_plugin, h.linkage); \ ++ get_plugin_list(ptype) != &plugin->h.linkage; \ ++ plugin = list_entry(plugin->h.linkage.next, reiser4_plugin, h.linkage)) ++ ++ ++extern int grab_plugin_pset(struct inode *self, struct inode *ancestor, pset_member memb); ++extern int force_plugin_pset(struct inode *self, pset_member memb, reiser4_plugin *plug); ++extern int finish_pset(struct inode *inode); ++ ++/* defined in fs/reiser4/plugin/object.c */ ++extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID]; ++/* defined in fs/reiser4/plugin/object.c */ ++extern dir_plugin dir_plugins[LAST_DIR_ID]; ++/* defined in fs/reiser4/plugin/item/static_stat.c */ ++extern sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION]; ++/* defined in fs/reiser4/plugin/hash.c */ ++extern hash_plugin hash_plugins[LAST_HASH_ID]; ++/* defined in fs/reiser4/plugin/fibration.c */ ++extern fibration_plugin fibration_plugins[LAST_FIBRATION_ID]; ++/* defined in fs/reiser4/plugin/crypt.c */ ++extern cipher_plugin cipher_plugins[LAST_CIPHER_ID]; ++/* defined in fs/reiser4/plugin/digest.c */ ++extern digest_plugin digest_plugins[LAST_DIGEST_ID]; ++/* defined in fs/reiser4/plugin/compress/compress.c */ ++extern compression_plugin compression_plugins[LAST_COMPRESSION_ID]; ++/* defined in fs/reiser4/plugin/compress/compression_mode.c */ ++extern compression_mode_plugin ++compression_mode_plugins[LAST_COMPRESSION_MODE_ID]; ++/* defined in fs/reiser4/plugin/cluster.c */ ++extern cluster_plugin cluster_plugins[LAST_CLUSTER_ID]; ++/* defined in fs/reiser4/plugin/tail.c */ ++extern formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID]; ++/* defined in fs/reiser4/plugin/security/security.c */ ++extern perm_plugin perm_plugins[LAST_PERM_ID]; ++/* defined in fs/reiser4/plugin/item/item.c */ ++extern item_plugin item_plugins[LAST_ITEM_ID]; ++/* defined in fs/reiser4/plugin/node/node.c */ ++extern node_plugin node_plugins[LAST_NODE_ID]; ++/* defined in fs/reiser4/plugin/disk_format/disk_format.c */ ++extern disk_format_plugin format_plugins[LAST_FORMAT_ID]; ++ ++/* __FS_REISER4_PLUGIN_TYPES_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/plugin_header.h b/fs/reiser4/plugin/plugin_header.h +new file mode 100644 +index 0000000..68cf5b0 +--- /dev/null ++++ b/fs/reiser4/plugin/plugin_header.h +@@ -0,0 +1,144 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* plugin header. Data structures required by all plugin types. */ ++ ++#if !defined( __PLUGIN_HEADER_H__ ) ++#define __PLUGIN_HEADER_H__ ++ ++/* plugin data-types and constants */ ++ ++#include "../debug.h" ++#include "../dformat.h" ++ ++typedef enum { ++ REISER4_FILE_PLUGIN_TYPE, ++ REISER4_DIR_PLUGIN_TYPE, ++ REISER4_ITEM_PLUGIN_TYPE, ++ REISER4_NODE_PLUGIN_TYPE, ++ REISER4_HASH_PLUGIN_TYPE, ++ REISER4_FIBRATION_PLUGIN_TYPE, ++ REISER4_FORMATTING_PLUGIN_TYPE, ++ REISER4_PERM_PLUGIN_TYPE, ++ REISER4_SD_EXT_PLUGIN_TYPE, ++ REISER4_FORMAT_PLUGIN_TYPE, ++ REISER4_JNODE_PLUGIN_TYPE, ++ REISER4_CIPHER_PLUGIN_TYPE, ++ REISER4_DIGEST_PLUGIN_TYPE, ++ REISER4_COMPRESSION_PLUGIN_TYPE, ++ REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ REISER4_CLUSTER_PLUGIN_TYPE, ++ REISER4_PLUGIN_TYPES ++} reiser4_plugin_type; ++ ++typedef enum { ++ REISER4_DIRECTORY_FILE, ++ REISER4_REGULAR_FILE, ++ REISER4_SYMLINK_FILE, ++ REISER4_SPECIAL_FILE, ++} reiser4_plugin_group; ++ ++struct reiser4_plugin_ops; ++/* generic plugin operations, supported by each ++ plugin type. */ ++typedef struct reiser4_plugin_ops reiser4_plugin_ops; ++ ++/* the common part of all plugin instances. */ ++typedef struct plugin_header { ++ /* plugin type */ ++ reiser4_plugin_type type_id; ++ /* id of this plugin */ ++ reiser4_plugin_id id; ++ /* bitmask of groups the plugin belongs to. */ ++ reiser4_plugin_groups groups; ++ /* plugin operations */ ++ reiser4_plugin_ops *pops; ++/* NIKITA-FIXME-HANS: usage of and access to label and desc is not commented and defined. */ ++ /* short label of this plugin */ ++ const char *label; ++ /* descriptive string.. */ ++ const char *desc; ++ /* list linkage */ ++ struct list_head linkage; ++} plugin_header; ++ ++#define plugin_of_group(plug, group) (plug->h.groups & (1 << group)) ++ ++/* PRIVATE INTERFACES */ ++/* NIKITA-FIXME-HANS: what is this for and why does it duplicate what is in plugin_header? */ ++/* plugin type representation. */ ++typedef struct reiser4_plugin_type_data { ++ /* internal plugin type identifier. Should coincide with ++ index of this item in plugins[] array. */ ++ reiser4_plugin_type type_id; ++ /* short symbolic label of this plugin type. Should be no longer ++ than MAX_PLUGIN_TYPE_LABEL_LEN characters including '\0'. */ ++ const char *label; ++ /* plugin type description longer than .label */ ++ const char *desc; ++ ++/* NIKITA-FIXME-HANS: define built-in */ ++ /* number of built-in plugin instances of this type */ ++ int builtin_num; ++ /* array of built-in plugins */ ++ void *builtin; ++ struct list_head plugins_list; ++ size_t size; ++} reiser4_plugin_type_data; ++ ++extern reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES]; ++ ++int is_plugin_type_valid(reiser4_plugin_type type); ++int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id); ++ ++static inline reiser4_plugin *plugin_at(reiser4_plugin_type_data * ptype, int i) ++{ ++ char *builtin; ++ ++ builtin = ptype->builtin; ++ return (reiser4_plugin *) (builtin + i * ptype->size); ++} ++ ++/* return plugin by its @type_id and @id */ ++static inline reiser4_plugin *plugin_by_id(reiser4_plugin_type type, ++ reiser4_plugin_id id) ++{ ++ assert("nikita-1651", is_plugin_type_valid(type)); ++ assert("nikita-1652", is_plugin_id_valid(type, id)); ++ return plugin_at(&plugins[type], id); ++} ++ ++extern reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type_id, ++ reiser4_plugin_id id); ++ ++/** ++ * plugin_by_disk_id - get reiser4_plugin ++ * @type_id: plugin type id ++ * @did: plugin id in disk format ++ * ++ * Returns reiser4_plugin by plugin type id an dplugin_id. ++ */ ++static inline reiser4_plugin *plugin_by_disk_id(reiser4_tree * tree UNUSED_ARG, ++ reiser4_plugin_type type_id, ++ __le16 *plugin_id) ++{ ++ /* ++ * what we should do properly is to maintain within each file-system a ++ * dictionary that maps on-disk plugin ids to "universal" ids. This ++ * dictionary will be resolved on mount time, so that this function ++ * will perform just one additional array lookup. ++ */ ++ return plugin_by_unsafe_id(type_id, le16_to_cpu(*plugin_id)); ++} ++ ++/* __PLUGIN_HEADER_H__ */ ++#endif ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/plugin_set.c b/fs/reiser4/plugin/plugin_set.c +new file mode 100644 +index 0000000..528632d +--- /dev/null ++++ b/fs/reiser4/plugin/plugin_set.c +@@ -0,0 +1,379 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++/* This file contains Reiser4 plugin set operations */ ++ ++/* plugin sets ++ * ++ * Each file in reiser4 is controlled by a whole set of plugins (file plugin, ++ * directory plugin, hash plugin, tail policy plugin, security plugin, etc.) ++ * assigned (inherited, deduced from mode bits, etc.) at creation time. This ++ * set of plugins (so called pset) is described by structure plugin_set (see ++ * plugin/plugin_set.h), which contains pointers to all required plugins. ++ * ++ * Children can inherit some pset members from their parent, however sometimes ++ * it is useful to specify members different from parent ones. Since object's ++ * pset can not be easily changed without fatal consequences, we use for this ++ * purpose another special plugin table (so called hset, or heir set) described ++ * by the same structure. ++ * ++ * Inode only stores a pointers to pset and hset. Different inodes with the ++ * same set of pset (hset) members point to the same pset (hset). This is ++ * archived by storing psets and hsets in global hash table. Races are avoided ++ * by simple (and efficient so far) solution of never recycling psets, even ++ * when last inode pointing to it is destroyed. ++ */ ++ ++#include "../debug.h" ++#include "../super.h" ++#include "plugin_set.h" ++ ++#include ++#include ++ ++/* slab for plugin sets */ ++static struct kmem_cache *plugin_set_slab; ++ ++static spinlock_t plugin_set_lock[8] __cacheline_aligned_in_smp = { ++ [0 ... 7] = SPIN_LOCK_UNLOCKED ++}; ++ ++/* hash table support */ ++ ++#define PS_TABLE_SIZE (32) ++ ++static inline plugin_set *cast_to(const unsigned long *a) ++{ ++ return container_of(a, plugin_set, hashval); ++} ++ ++static inline int pseq(const unsigned long *a1, const unsigned long *a2) ++{ ++ plugin_set *set1; ++ plugin_set *set2; ++ ++ /* make sure fields are not missed in the code below */ ++ cassert(sizeof *set1 == ++ sizeof set1->hashval + ++ sizeof set1->link + ++ sizeof set1->file + ++ sizeof set1->dir + ++ sizeof set1->perm + ++ sizeof set1->formatting + ++ sizeof set1->hash + ++ sizeof set1->fibration + ++ sizeof set1->sd + ++ sizeof set1->dir_item + ++ sizeof set1->cipher + ++ sizeof set1->digest + ++ sizeof set1->compression + ++ sizeof set1->compression_mode + ++ sizeof set1->cluster + ++ sizeof set1->create); ++ ++ set1 = cast_to(a1); ++ set2 = cast_to(a2); ++ return ++ set1->hashval == set2->hashval && ++ set1->file == set2->file && ++ set1->dir == set2->dir && ++ set1->perm == set2->perm && ++ set1->formatting == set2->formatting && ++ set1->hash == set2->hash && ++ set1->fibration == set2->fibration && ++ set1->sd == set2->sd && ++ set1->dir_item == set2->dir_item && ++ set1->cipher == set2->cipher && ++ set1->digest == set2->digest && ++ set1->compression == set2->compression && ++ set1->compression_mode == set2->compression_mode && ++ set1->cluster == set2->cluster && ++ set1->create == set2->create; ++} ++ ++#define HASH_FIELD(hash, set, field) \ ++({ \ ++ (hash) += (unsigned long)(set)->field >> 2; \ ++}) ++ ++static inline unsigned long calculate_hash(const plugin_set * set) ++{ ++ unsigned long result; ++ ++ result = 0; ++ HASH_FIELD(result, set, file); ++ HASH_FIELD(result, set, dir); ++ HASH_FIELD(result, set, perm); ++ HASH_FIELD(result, set, formatting); ++ HASH_FIELD(result, set, hash); ++ HASH_FIELD(result, set, fibration); ++ HASH_FIELD(result, set, sd); ++ HASH_FIELD(result, set, dir_item); ++ HASH_FIELD(result, set, cipher); ++ HASH_FIELD(result, set, digest); ++ HASH_FIELD(result, set, compression); ++ HASH_FIELD(result, set, compression_mode); ++ HASH_FIELD(result, set, cluster); ++ HASH_FIELD(result, set, create); ++ return result & (PS_TABLE_SIZE - 1); ++} ++ ++static inline unsigned long ++pshash(ps_hash_table * table, const unsigned long *a) ++{ ++ return *a; ++} ++ ++/* The hash table definition */ ++#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get()) ++#define KFREE(ptr, size) kfree(ptr) ++TYPE_SAFE_HASH_DEFINE(ps, plugin_set, unsigned long, hashval, link, pshash, ++ pseq); ++#undef KFREE ++#undef KMALLOC ++ ++static ps_hash_table ps_table; ++static plugin_set empty_set = { ++ .hashval = 0, ++ .file = NULL, ++ .dir = NULL, ++ .perm = NULL, ++ .formatting = NULL, ++ .hash = NULL, ++ .fibration = NULL, ++ .sd = NULL, ++ .dir_item = NULL, ++ .cipher = NULL, ++ .digest = NULL, ++ .compression = NULL, ++ .compression_mode = NULL, ++ .cluster = NULL, ++ .create = NULL, ++ .link = {NULL} ++}; ++ ++plugin_set *plugin_set_get_empty(void) ++{ ++ return &empty_set; ++} ++ ++void plugin_set_put(plugin_set * set) ++{ ++} ++ ++static inline unsigned long *pset_field(plugin_set * set, int offset) ++{ ++ return (unsigned long *)(((char *)set) + offset); ++} ++ ++static int plugin_set_field(plugin_set ** set, const unsigned long val, ++ const int offset) ++{ ++ unsigned long *spot; ++ spinlock_t *lock; ++ plugin_set replica; ++ plugin_set *twin; ++ plugin_set *psal; ++ plugin_set *orig; ++ ++ assert("nikita-2902", set != NULL); ++ assert("nikita-2904", *set != NULL); ++ ++ spot = pset_field(*set, offset); ++ if (unlikely(*spot == val)) ++ return 0; ++ ++ replica = *(orig = *set); ++ *pset_field(&replica, offset) = val; ++ replica.hashval = calculate_hash(&replica); ++ rcu_read_lock(); ++ twin = ps_hash_find(&ps_table, &replica.hashval); ++ if (unlikely(twin == NULL)) { ++ rcu_read_unlock(); ++ psal = kmem_cache_alloc(plugin_set_slab, ++ reiser4_ctx_gfp_mask_get()); ++ if (psal == NULL) ++ return RETERR(-ENOMEM); ++ *psal = replica; ++ lock = &plugin_set_lock[replica.hashval & 7]; ++ spin_lock(lock); ++ twin = ps_hash_find(&ps_table, &replica.hashval); ++ if (likely(twin == NULL)) { ++ *set = psal; ++ ps_hash_insert_rcu(&ps_table, psal); ++ } else { ++ *set = twin; ++ kmem_cache_free(plugin_set_slab, psal); ++ } ++ spin_unlock(lock); ++ } else { ++ rcu_read_unlock(); ++ *set = twin; ++ } ++ return 0; ++} ++ ++static struct { ++ int offset; ++ reiser4_plugin_groups groups; ++ reiser4_plugin_type type; ++} pset_descr[PSET_LAST] = { ++ [PSET_FILE] = { ++ .offset = offsetof(plugin_set, file), ++ .type = REISER4_FILE_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_DIR] = { ++ .offset = offsetof(plugin_set, dir), ++ .type = REISER4_DIR_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_PERM] = { ++ .offset = offsetof(plugin_set, perm), ++ .type = REISER4_PERM_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_FORMATTING] = { ++ .offset = offsetof(plugin_set, formatting), ++ .type = REISER4_FORMATTING_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_HASH] = { ++ .offset = offsetof(plugin_set, hash), ++ .type = REISER4_HASH_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_FIBRATION] = { ++ .offset = offsetof(plugin_set, fibration), ++ .type = REISER4_FIBRATION_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_SD] = { ++ .offset = offsetof(plugin_set, sd), ++ .type = REISER4_ITEM_PLUGIN_TYPE, ++ .groups = (1 << STAT_DATA_ITEM_TYPE) ++ }, ++ [PSET_DIR_ITEM] = { ++ .offset = offsetof(plugin_set, dir_item), ++ .type = REISER4_ITEM_PLUGIN_TYPE, ++ .groups = (1 << DIR_ENTRY_ITEM_TYPE) ++ }, ++ [PSET_CIPHER] = { ++ .offset = offsetof(plugin_set, cipher), ++ .type = REISER4_CIPHER_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_DIGEST] = { ++ .offset = offsetof(plugin_set, digest), ++ .type = REISER4_DIGEST_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_COMPRESSION] = { ++ .offset = offsetof(plugin_set, compression), ++ .type = REISER4_COMPRESSION_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_COMPRESSION_MODE] = { ++ .offset = offsetof(plugin_set, compression_mode), ++ .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_CLUSTER] = { ++ .offset = offsetof(plugin_set, cluster), ++ .type = REISER4_CLUSTER_PLUGIN_TYPE, ++ .groups = 0 ++ }, ++ [PSET_CREATE] = { ++ .offset = offsetof(plugin_set, create), ++ .type = REISER4_FILE_PLUGIN_TYPE, ++ .groups = (1 << REISER4_REGULAR_FILE) ++ } ++}; ++ ++#define DEFINE_PSET_OPS(PREFIX) \ ++ reiser4_plugin_type PREFIX##_member_to_type_unsafe(pset_member memb) \ ++{ \ ++ if (memb > PSET_LAST) \ ++ return REISER4_PLUGIN_TYPES; \ ++ return pset_descr[memb].type; \ ++} \ ++ \ ++int PREFIX##_set_unsafe(plugin_set ** set, pset_member memb, \ ++ reiser4_plugin * plugin) \ ++{ \ ++ assert("nikita-3492", set != NULL); \ ++ assert("nikita-3493", *set != NULL); \ ++ assert("nikita-3494", plugin != NULL); \ ++ assert("nikita-3495", 0 <= memb && memb < PSET_LAST); \ ++ assert("nikita-3496", plugin->h.type_id == pset_descr[memb].type); \ ++ \ ++ if (pset_descr[memb].groups) \ ++ if (!(pset_descr[memb].groups & plugin->h.groups)) \ ++ return -EINVAL; \ ++ \ ++ return plugin_set_field(set, \ ++ (unsigned long)plugin, pset_descr[memb].offset); \ ++} \ ++ \ ++reiser4_plugin *PREFIX##_get(plugin_set * set, pset_member memb) \ ++{ \ ++ assert("nikita-3497", set != NULL); \ ++ assert("nikita-3498", 0 <= memb && memb < PSET_LAST); \ ++ \ ++ return *(reiser4_plugin **) (((char *)set) + pset_descr[memb].offset); \ ++} ++ ++DEFINE_PSET_OPS(aset); ++ ++int set_plugin(plugin_set ** set, pset_member memb, reiser4_plugin * plugin) { ++ return plugin_set_field(set, ++ (unsigned long)plugin, pset_descr[memb].offset); ++} ++ ++/** ++ * init_plugin_set - create plugin set cache and hash table ++ * ++ * Initializes slab cache of plugin_set-s and their hash table. It is part of ++ * reiser4 module initialization. ++ */ ++int init_plugin_set(void) ++{ ++ int result; ++ ++ result = ps_hash_init(&ps_table, PS_TABLE_SIZE); ++ if (result == 0) { ++ plugin_set_slab = kmem_cache_create("plugin_set", ++ sizeof(plugin_set), 0, ++ SLAB_HWCACHE_ALIGN, ++ NULL, NULL); ++ if (plugin_set_slab == NULL) ++ result = RETERR(-ENOMEM); ++ } ++ return result; ++} ++ ++/** ++ * done_plugin_set - delete plugin_set cache and plugin_set hash table ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++void done_plugin_set(void) ++{ ++ plugin_set *cur, *next; ++ ++ for_all_in_htable(&ps_table, ps, cur, next) { ++ ps_hash_remove(&ps_table, cur); ++ kmem_cache_free(plugin_set_slab, cur); ++ } ++ destroy_reiser4_cache(&plugin_set_slab); ++ ps_hash_done(&ps_table); ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 120 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/plugin_set.h b/fs/reiser4/plugin/plugin_set.h +new file mode 100644 +index 0000000..8edcaea +--- /dev/null ++++ b/fs/reiser4/plugin/plugin_set.h +@@ -0,0 +1,77 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Reiser4 plugin set definition. ++ See fs/reiser4/plugin/plugin_set.c for details */ ++ ++#if !defined( __PLUGIN_SET_H__ ) ++#define __PLUGIN_SET_H__ ++ ++#include "../type_safe_hash.h" ++#include "plugin.h" ++ ++#include ++ ++struct plugin_set; ++typedef struct plugin_set plugin_set; ++ ++TYPE_SAFE_HASH_DECLARE(ps, plugin_set); ++ ++struct plugin_set { ++ unsigned long hashval; ++ /* plugin of file */ ++ file_plugin *file; ++ /* plugin of dir */ ++ dir_plugin *dir; ++ /* perm plugin for this file */ ++ perm_plugin *perm; ++ /* tail policy plugin. Only meaningful for regular files */ ++ formatting_plugin *formatting; ++ /* hash plugin. Only meaningful for directories. */ ++ hash_plugin *hash; ++ /* fibration plugin. Only meaningful for directories. */ ++ fibration_plugin *fibration; ++ /* plugin of stat-data */ ++ item_plugin *sd; ++ /* plugin of items a directory is built of */ ++ item_plugin *dir_item; ++ /* cipher plugin */ ++ cipher_plugin *cipher; ++ /* digest plugin */ ++ digest_plugin *digest; ++ /* compression plugin */ ++ compression_plugin *compression; ++ /* compression mode plugin */ ++ compression_mode_plugin *compression_mode; ++ /* cluster plugin */ ++ cluster_plugin *cluster; ++ /* this specifies file plugin of regular children. ++ only meaningful for directories */ ++ file_plugin *create; ++ ps_hash_link link; ++}; ++ ++extern plugin_set *plugin_set_get_empty(void); ++extern void plugin_set_put(plugin_set * set); ++ ++extern int init_plugin_set(void); ++extern void done_plugin_set(void); ++ ++extern reiser4_plugin *aset_get(plugin_set * set, pset_member memb); ++extern int set_plugin(plugin_set ** set, pset_member memb, ++ reiser4_plugin * plugin); ++extern int aset_set_unsafe(plugin_set ** set, pset_member memb, ++ reiser4_plugin * plugin); ++extern reiser4_plugin_type aset_member_to_type_unsafe(pset_member memb); ++ ++/* __PLUGIN_SET_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/security/Makefile b/fs/reiser4/plugin/security/Makefile +new file mode 100644 +index 0000000..645dbb5 +--- /dev/null ++++ b/fs/reiser4/plugin/security/Makefile +@@ -0,0 +1,4 @@ ++obj-$(CONFIG_REISER4_FS) += security_plugins.o ++ ++security_plugins-objs := \ ++ perm.o +diff --git a/fs/reiser4/plugin/security/perm.c b/fs/reiser4/plugin/security/perm.c +new file mode 100644 +index 0000000..ab3b4fc +--- /dev/null ++++ b/fs/reiser4/plugin/security/perm.c +@@ -0,0 +1,44 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* ++ * this file contains implementation of permission plugins. Currently, only ++ * RWX_PERM_ID is implemented ++ */ ++ ++#include "../plugin.h" ++#include "../plugin_header.h" ++#include "../../debug.h" ++ ++perm_plugin perm_plugins[LAST_PERM_ID] = { ++ [NULL_PERM_ID] = { ++ .h = { ++ .type_id = REISER4_PERM_PLUGIN_TYPE, ++ .id = NULL_PERM_ID, ++ .pops = NULL, ++ .label = "null", ++ .desc = "stub permission plugin", ++ .linkage = {NULL, NULL} ++ }, ++ .read_ok = NULL, ++ .write_ok = NULL, ++ .lookup_ok = NULL, ++ .create_ok = NULL, ++ .link_ok = NULL, ++ .unlink_ok = NULL, ++ .delete_ok = NULL, ++ .mask_ok = NULL, ++ .setattr_ok = NULL, ++ .getattr_ok = NULL, ++ .rename_ok = NULL, ++ } ++}; ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/security/perm.h b/fs/reiser4/plugin/security/perm.h +new file mode 100644 +index 0000000..747e8f7 +--- /dev/null ++++ b/fs/reiser4/plugin/security/perm.h +@@ -0,0 +1,82 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Perm (short for "permissions") plugins common stuff. */ ++ ++#if !defined( __REISER4_PERM_H__ ) ++#define __REISER4_PERM_H__ ++ ++#include "../../forward.h" ++#include "../plugin_header.h" ++ ++#include ++#include /* for struct file */ ++#include /* for struct dentry */ ++ ++/* interface for perm plugin. ++ ++ Perm plugin method can be implemented through: ++ ++ 1. consulting ->i_mode bits in stat data ++ ++ 2. obtaining acl from the tree and inspecting it ++ ++ 3. asking some kernel module or user-level program to authorize access. ++ ++ This allows for integration with things like capabilities, SELinux-style ++ secutiry contexts, etc. ++ ++*/ ++/* NIKITA-FIXME-HANS: define what this is targeted for. It does not seem to be intended for use with sys_reiser4. Explain. */ ++typedef struct perm_plugin { ++ /* generic plugin fields */ ++ plugin_header h; ++ ++ /* check permissions for read/write */ ++ int (*read_ok) (struct file *file, const char __user *buf, ++ size_t size, loff_t *off); ++ int (*write_ok) (struct file *file, const char __user *buf, ++ size_t size, loff_t *off); ++ ++ /* check permissions for lookup */ ++ int (*lookup_ok) (struct inode * parent, struct dentry * dentry); ++ ++ /* check permissions for create */ ++ int (*create_ok) (struct inode * parent, struct dentry * dentry, ++ reiser4_object_create_data * data); ++ ++ /* check permissions for linking @where to @existing */ ++ int (*link_ok) (struct dentry * existing, struct inode * parent, ++ struct dentry * where); ++ ++ /* check permissions for unlinking @victim from @parent */ ++ int (*unlink_ok) (struct inode * parent, struct dentry * victim); ++ ++ /* check permissions for deletion of @object whose last reference is ++ by @parent */ ++ int (*delete_ok) (struct inode * parent, struct dentry * victim); ++ int (*mask_ok) (struct inode * inode, int mask); ++ /* check whether attribute change is acceptable */ ++ int (*setattr_ok) (struct dentry * dentry, struct iattr * attr); ++ ++ /* check whether stat(2) is allowed */ ++ int (*getattr_ok) (struct vfsmount * mnt UNUSED_ARG, ++ struct dentry * dentry, struct kstat * stat); ++ /* check whether rename(2) is allowed */ ++ int (*rename_ok) (struct inode * old_dir, struct dentry * old, ++ struct inode * new_dir, struct dentry * new); ++} perm_plugin; ++ ++typedef enum { NULL_PERM_ID, LAST_PERM_ID } reiser4_perm_id; ++ ++/* __REISER4_PERM_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/space/Makefile b/fs/reiser4/plugin/space/Makefile +new file mode 100644 +index 0000000..5a0c94f +--- /dev/null ++++ b/fs/reiser4/plugin/space/Makefile +@@ -0,0 +1,4 @@ ++obj-$(CONFIG_REISER4_FS) += space_plugins.o ++ ++space_plugins-objs := \ ++ bitmap.o +diff --git a/fs/reiser4/plugin/space/bitmap.c b/fs/reiser4/plugin/space/bitmap.c +new file mode 100644 +index 0000000..a0ff17a +--- /dev/null ++++ b/fs/reiser4/plugin/space/bitmap.c +@@ -0,0 +1,1585 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#include "../../debug.h" ++#include "../../dformat.h" ++#include "../../txnmgr.h" ++#include "../../jnode.h" ++#include "../../block_alloc.h" ++#include "../../tree.h" ++#include "../../super.h" ++#include "../plugin.h" ++#include "space_allocator.h" ++#include "bitmap.h" ++ ++#include ++#include /* for struct super_block */ ++#include ++#include ++ ++/* Proposed (but discarded) optimization: dynamic loading/unloading of bitmap ++ * blocks ++ ++ A useful optimization of reiser4 bitmap handling would be dynamic bitmap ++ blocks loading/unloading which is different from v3.x where all bitmap ++ blocks are loaded at mount time. ++ ++ To implement bitmap blocks unloading we need to count bitmap block usage ++ and detect currently unused blocks allowing them to be unloaded. It is not ++ a simple task since we allow several threads to modify one bitmap block ++ simultaneously. ++ ++ Briefly speaking, the following schema is proposed: we count in special ++ variable associated with each bitmap block. That is for counting of block ++ alloc/dealloc operations on that bitmap block. With a deferred block ++ deallocation feature of reiser4 all those operation will be represented in ++ atom dirty/deleted lists as jnodes for freshly allocated or deleted ++ nodes. ++ ++ So, we increment usage counter for each new node allocated or deleted, and ++ decrement it at atom commit one time for each node from the dirty/deleted ++ atom's list. Of course, freshly allocated node deletion and node reusing ++ from atom deleted (if we do so) list should decrement bitmap usage counter ++ also. ++ ++ This schema seems to be working but that reference counting is ++ not easy to debug. I think we should agree with Hans and do not implement ++ it in v4.0. Current code implements "on-demand" bitmap blocks loading only. ++ ++ For simplicity all bitmap nodes (both commit and working bitmap blocks) are ++ loaded into memory on fs mount time or each bitmap nodes are loaded at the ++ first access to it, the "dont_load_bitmap" mount option controls whether ++ bimtap nodes should be loaded at mount time. Dynamic unloading of bitmap ++ nodes currently is not supported. */ ++ ++#define CHECKSUM_SIZE 4 ++ ++#define BYTES_PER_LONG (sizeof(long)) ++ ++#if BITS_PER_LONG == 64 ++# define LONG_INT_SHIFT (6) ++#else ++# define LONG_INT_SHIFT (5) ++#endif ++ ++#define LONG_INT_MASK (BITS_PER_LONG - 1UL) ++ ++typedef unsigned long ulong_t; ++ ++#define bmap_size(blocksize) ((blocksize) - CHECKSUM_SIZE) ++#define bmap_bit_count(blocksize) (bmap_size(blocksize) << 3) ++ ++/* Block allocation/deallocation are done through special bitmap objects which ++ are allocated in an array at fs mount. */ ++struct bitmap_node { ++ struct mutex mutex; /* long term lock object */ ++ ++ jnode *wjnode; /* j-nodes for WORKING ... */ ++ jnode *cjnode; /* ... and COMMIT bitmap blocks */ ++ ++ bmap_off_t first_zero_bit; /* for skip_busy option implementation */ ++ ++ atomic_t loaded; /* a flag which shows that bnode is loaded ++ * already */ ++}; ++ ++static inline char *bnode_working_data(struct bitmap_node *bnode) ++{ ++ char *data; ++ ++ data = jdata(bnode->wjnode); ++ assert("zam-429", data != NULL); ++ ++ return data + CHECKSUM_SIZE; ++} ++ ++static inline char *bnode_commit_data(const struct bitmap_node *bnode) ++{ ++ char *data; ++ ++ data = jdata(bnode->cjnode); ++ assert("zam-430", data != NULL); ++ ++ return data + CHECKSUM_SIZE; ++} ++ ++static inline __u32 bnode_commit_crc(const struct bitmap_node *bnode) ++{ ++ char *data; ++ ++ data = jdata(bnode->cjnode); ++ assert("vpf-261", data != NULL); ++ ++ return le32_to_cpu(get_unaligned((d32 *)data)); ++} ++ ++static inline void bnode_set_commit_crc(struct bitmap_node *bnode, __u32 crc) ++{ ++ char *data; ++ ++ data = jdata(bnode->cjnode); ++ assert("vpf-261", data != NULL); ++ ++ put_unaligned(cpu_to_le32(crc), (d32 *)data); ++} ++ ++/* ZAM-FIXME-HANS: is the idea that this might be a union someday? having ++ * written the code, does this added abstraction still have */ ++/* ANSWER(Zam): No, the abstractions is in the level above (exact place is the ++ * reiser4_space_allocator structure) */ ++/* ZAM-FIXME-HANS: I don't understand your english in comment above. */ ++/* FIXME-HANS(Zam): I don't understand the questions like "might be a union ++ * someday?". What they about? If there is a reason to have a union, it should ++ * be a union, if not, it should not be a union. "..might be someday" means no ++ * reason. */ ++struct bitmap_allocator_data { ++ /* an array for bitmap blocks direct access */ ++ struct bitmap_node *bitmap; ++}; ++ ++#define get_barray(super) \ ++(((struct bitmap_allocator_data *)(get_super_private(super)->space_allocator.u.generic)) -> bitmap) ++ ++#define get_bnode(super, i) (get_barray(super) + i) ++ ++/* allocate and initialize jnode with JNODE_BITMAP type */ ++static jnode *bnew(void) ++{ ++ jnode *jal = jalloc(); ++ ++ if (jal) ++ jnode_init(jal, current_tree, JNODE_BITMAP); ++ ++ return jal; ++} ++ ++/* this file contains: ++ - bitmap based implementation of space allocation plugin ++ - all the helper functions like set bit, find_first_zero_bit, etc */ ++ ++/* Audited by: green(2002.06.12) */ ++static int find_next_zero_bit_in_word(ulong_t word, int start_bit) ++{ ++ ulong_t mask = 1UL << start_bit; ++ int i = start_bit; ++ ++ while ((word & mask) != 0) { ++ mask <<= 1; ++ if (++i >= BITS_PER_LONG) ++ break; ++ } ++ ++ return i; ++} ++ ++#include ++ ++#if BITS_PER_LONG == 64 ++ ++#define OFF(addr) (((ulong_t)(addr) & (BYTES_PER_LONG - 1)) << 3) ++#define BASE(addr) ((ulong_t*) ((ulong_t)(addr) & ~(BYTES_PER_LONG - 1))) ++ ++static inline void reiser4_set_bit(int nr, void *addr) ++{ ++ ext2_set_bit(nr + OFF(addr), BASE(addr)); ++} ++ ++static inline void reiser4_clear_bit(int nr, void *addr) ++{ ++ ext2_clear_bit(nr + OFF(addr), BASE(addr)); ++} ++ ++static inline int reiser4_test_bit(int nr, void *addr) ++{ ++ return ext2_test_bit(nr + OFF(addr), BASE(addr)); ++} ++static inline int reiser4_find_next_zero_bit(void *addr, int maxoffset, ++ int offset) ++{ ++ int off = OFF(addr); ++ ++ return ext2_find_next_zero_bit(BASE(addr), maxoffset + off, ++ offset + off) - off; ++} ++ ++#else ++ ++#define reiser4_set_bit(nr, addr) ext2_set_bit(nr, addr) ++#define reiser4_clear_bit(nr, addr) ext2_clear_bit(nr, addr) ++#define reiser4_test_bit(nr, addr) ext2_test_bit(nr, addr) ++ ++#define reiser4_find_next_zero_bit(addr, maxoffset, offset) \ ++ext2_find_next_zero_bit(addr, maxoffset, offset) ++#endif ++ ++/* Search for a set bit in the bit array [@start_offset, @max_offset[, offsets ++ * are counted from @addr, return the offset of the first bit if it is found, ++ * @maxoffset otherwise. */ ++static bmap_off_t __reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset, ++ bmap_off_t start_offset) ++{ ++ ulong_t *base = addr; ++ /* start_offset is in bits, convert it to byte offset within bitmap. */ ++ int word_nr = start_offset >> LONG_INT_SHIFT; ++ /* bit number within the byte. */ ++ int bit_nr = start_offset & LONG_INT_MASK; ++ int max_word_nr = (max_offset - 1) >> LONG_INT_SHIFT; ++ ++ assert("zam-387", max_offset != 0); ++ ++ /* Unaligned @start_offset case. */ ++ if (bit_nr != 0) { ++ bmap_nr_t nr; ++ ++ nr = find_next_zero_bit_in_word(~(base[word_nr]), bit_nr); ++ ++ if (nr < BITS_PER_LONG) ++ return (word_nr << LONG_INT_SHIFT) + nr; ++ ++ ++word_nr; ++ } ++ ++ /* Fast scan trough aligned words. */ ++ while (word_nr <= max_word_nr) { ++ if (base[word_nr] != 0) { ++ return (word_nr << LONG_INT_SHIFT) ++ + find_next_zero_bit_in_word(~(base[word_nr]), 0); ++ } ++ ++ ++word_nr; ++ } ++ ++ return max_offset; ++} ++ ++#if BITS_PER_LONG == 64 ++ ++static bmap_off_t reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset, ++ bmap_off_t start_offset) ++{ ++ bmap_off_t off = OFF(addr); ++ ++ return __reiser4_find_next_set_bit(BASE(addr), max_offset + off, ++ start_offset + off) - off; ++} ++ ++#else ++#define reiser4_find_next_set_bit(addr, max_offset, start_offset) \ ++ __reiser4_find_next_set_bit(addr, max_offset, start_offset) ++#endif ++ ++/* search for the first set bit in single word. */ ++static int find_last_set_bit_in_word(ulong_t word, int start_bit) ++{ ++ ulong_t bit_mask; ++ int nr = start_bit; ++ ++ assert("zam-965", start_bit < BITS_PER_LONG); ++ assert("zam-966", start_bit >= 0); ++ ++ bit_mask = (1UL << nr); ++ ++ while (bit_mask != 0) { ++ if (bit_mask & word) ++ return nr; ++ bit_mask >>= 1; ++ nr--; ++ } ++ return BITS_PER_LONG; ++} ++ ++/* Search bitmap for a set bit in backward direction from the end to the ++ * beginning of given region ++ * ++ * @result: result offset of the last set bit ++ * @addr: base memory address, ++ * @low_off: low end of the search region, edge bit included into the region, ++ * @high_off: high end of the search region, edge bit included into the region, ++ * ++ * @return: 0 - set bit was found, -1 otherwise. ++ */ ++static int ++reiser4_find_last_set_bit(bmap_off_t * result, void *addr, bmap_off_t low_off, ++ bmap_off_t high_off) ++{ ++ ulong_t *base = addr; ++ int last_word; ++ int first_word; ++ int last_bit; ++ int nr; ++ ++ assert("zam-962", high_off >= low_off); ++ ++ last_word = high_off >> LONG_INT_SHIFT; ++ last_bit = high_off & LONG_INT_MASK; ++ first_word = low_off >> LONG_INT_SHIFT; ++ ++ if (last_bit < BITS_PER_LONG) { ++ nr = find_last_set_bit_in_word(base[last_word], last_bit); ++ if (nr < BITS_PER_LONG) { ++ *result = (last_word << LONG_INT_SHIFT) + nr; ++ return 0; ++ } ++ --last_word; ++ } ++ while (last_word >= first_word) { ++ if (base[last_word] != 0x0) { ++ last_bit = ++ find_last_set_bit_in_word(base[last_word], ++ BITS_PER_LONG - 1); ++ assert("zam-972", last_bit < BITS_PER_LONG); ++ *result = (last_word << LONG_INT_SHIFT) + last_bit; ++ return 0; ++ } ++ --last_word; ++ } ++ ++ return -1; /* set bit not found */ ++} ++ ++/* Search bitmap for a clear bit in backward direction from the end to the ++ * beginning of given region */ ++static int ++reiser4_find_last_zero_bit(bmap_off_t * result, void *addr, bmap_off_t low_off, ++ bmap_off_t high_off) ++{ ++ ulong_t *base = addr; ++ int last_word; ++ int first_word; ++ int last_bit; ++ int nr; ++ ++ last_word = high_off >> LONG_INT_SHIFT; ++ last_bit = high_off & LONG_INT_MASK; ++ first_word = low_off >> LONG_INT_SHIFT; ++ ++ if (last_bit < BITS_PER_LONG) { ++ nr = find_last_set_bit_in_word(~base[last_word], last_bit); ++ if (nr < BITS_PER_LONG) { ++ *result = (last_word << LONG_INT_SHIFT) + nr; ++ return 0; ++ } ++ --last_word; ++ } ++ while (last_word >= first_word) { ++ if (base[last_word] != (ulong_t) (-1)) { ++ *result = (last_word << LONG_INT_SHIFT) + ++ find_last_set_bit_in_word(~base[last_word], ++ BITS_PER_LONG - 1); ++ return 0; ++ } ++ --last_word; ++ } ++ ++ return -1; /* zero bit not found */ ++} ++ ++/* Audited by: green(2002.06.12) */ ++static void reiser4_clear_bits(char *addr, bmap_off_t start, bmap_off_t end) ++{ ++ int first_byte; ++ int last_byte; ++ ++ unsigned char first_byte_mask = 0xFF; ++ unsigned char last_byte_mask = 0xFF; ++ ++ assert("zam-410", start < end); ++ ++ first_byte = start >> 3; ++ last_byte = (end - 1) >> 3; ++ ++ if (last_byte > first_byte + 1) ++ memset(addr + first_byte + 1, 0, ++ (size_t) (last_byte - first_byte - 1)); ++ ++ first_byte_mask >>= 8 - (start & 0x7); ++ last_byte_mask <<= ((end - 1) & 0x7) + 1; ++ ++ if (first_byte == last_byte) { ++ addr[first_byte] &= (first_byte_mask | last_byte_mask); ++ } else { ++ addr[first_byte] &= first_byte_mask; ++ addr[last_byte] &= last_byte_mask; ++ } ++} ++ ++/* Audited by: green(2002.06.12) */ ++/* ZAM-FIXME-HANS: comment this */ ++static void reiser4_set_bits(char *addr, bmap_off_t start, bmap_off_t end) ++{ ++ int first_byte; ++ int last_byte; ++ ++ unsigned char first_byte_mask = 0xFF; ++ unsigned char last_byte_mask = 0xFF; ++ ++ assert("zam-386", start < end); ++ ++ first_byte = start >> 3; ++ last_byte = (end - 1) >> 3; ++ ++ if (last_byte > first_byte + 1) ++ memset(addr + first_byte + 1, 0xFF, ++ (size_t) (last_byte - first_byte - 1)); ++ ++ first_byte_mask <<= start & 0x7; ++ last_byte_mask >>= 7 - ((end - 1) & 0x7); ++ ++ if (first_byte == last_byte) { ++ addr[first_byte] |= (first_byte_mask & last_byte_mask); ++ } else { ++ addr[first_byte] |= first_byte_mask; ++ addr[last_byte] |= last_byte_mask; ++ } ++} ++ ++#define ADLER_BASE 65521 ++#define ADLER_NMAX 5552 ++ ++/* Calculates the adler32 checksum for the data pointed by `data` of the ++ length `len`. This function was originally taken from zlib, version 1.1.3, ++ July 9th, 1998. ++ ++ Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler ++ ++ This software is provided 'as-is', without any express or implied ++ warranty. In no event will the authors be held liable for any damages ++ arising from the use of this software. ++ ++ Permission is granted to anyone to use this software for any purpose, ++ including commercial applications, and to alter it and redistribute it ++ freely, subject to the following restrictions: ++ ++ 1. The origin of this software must not be misrepresented; you must not ++ claim that you wrote the original software. If you use this software ++ in a product, an acknowledgment in the product documentation would be ++ appreciated but is not required. ++ 2. Altered source versions must be plainly marked as such, and must not be ++ misrepresented as being the original software. ++ 3. This notice may not be removed or altered from any source distribution. ++ ++ Jean-loup Gailly Mark Adler ++ jloup@gzip.org madler@alumni.caltech.edu ++ ++ The above comment applies only to the reiser4_adler32 function. ++*/ ++ ++__u32 reiser4_adler32(char *data, __u32 len) ++{ ++ unsigned char *t = data; ++ __u32 s1 = 1; ++ __u32 s2 = 0; ++ int k; ++ ++ while (len > 0) { ++ k = len < ADLER_NMAX ? len : ADLER_NMAX; ++ len -= k; ++ ++ while (k--) { ++ s1 += *t++; ++ s2 += s1; ++ } ++ ++ s1 %= ADLER_BASE; ++ s2 %= ADLER_BASE; ++ } ++ return (s2 << 16) | s1; ++} ++ ++#define sb_by_bnode(bnode) \ ++ ((struct super_block *)jnode_get_tree(bnode->wjnode)->super) ++ ++static __u32 bnode_calc_crc(const struct bitmap_node *bnode, unsigned long size) ++{ ++ return reiser4_adler32(bnode_commit_data(bnode), bmap_size(size)); ++} ++ ++static int ++bnode_check_adler32(const struct bitmap_node *bnode, unsigned long size) ++{ ++ if (bnode_calc_crc(bnode, size) != bnode_commit_crc(bnode)) { ++ bmap_nr_t bmap; ++ ++ bmap = bnode - get_bnode(sb_by_bnode(bnode), 0); ++ ++ warning("vpf-263", ++ "Checksum for the bitmap block %llu is incorrect", ++ bmap); ++ ++ return RETERR(-EIO); ++ } ++ ++ return 0; ++} ++ ++#define REISER4_CHECK_BMAP_CRC (0) ++ ++#if REISER4_CHECK_BMAP_CRC ++static int bnode_check_crc(const struct bitmap_node *bnode) ++{ ++ return bnode_check_adler32(bnode, ++ bmap_size(sb_by_bnode(bnode)->s_blocksize)); ++} ++ ++/* REISER4_CHECK_BMAP_CRC */ ++#else ++ ++#define bnode_check_crc(bnode) (0) ++ ++/* REISER4_CHECK_BMAP_CRC */ ++#endif ++ ++/* Recalculates the adler32 checksum for only 1 byte change. ++ adler - previous adler checksum ++ old_data, data - old, new byte values. ++ tail == (chunk - offset) : length, checksum was calculated for, - offset of ++ the changed byte within this chunk. ++ This function can be used for checksum calculation optimisation. ++*/ ++ ++static __u32 ++adler32_recalc(__u32 adler, unsigned char old_data, unsigned char data, ++ __u32 tail) ++{ ++ __u32 delta = data - old_data + 2 * ADLER_BASE; ++ __u32 s1 = adler & 0xffff; ++ __u32 s2 = (adler >> 16) & 0xffff; ++ ++ s1 = (delta + s1) % ADLER_BASE; ++ s2 = (delta * tail + s2) % ADLER_BASE; ++ ++ return (s2 << 16) | s1; ++} ++ ++#define LIMIT(val, boundary) ((val) > (boundary) ? (boundary) : (val)) ++ ++/** ++ * get_nr_bitmap - calculate number of bitmap blocks ++ * @super: super block with initialized blocksize and block count ++ * ++ * Calculates number of bitmap blocks of a filesystem which uses bitmaps to ++ * maintain free disk space. It assumes that each bitmap addresses the same ++ * number of blocks which is calculated by bmap_block_count macro defined in ++ * above. Number of blocks in the filesystem has to be initialized in reiser4 ++ * private data of super block already so that it can be obtained via ++ * reiser4_block_count(). Unfortunately, number of blocks addressed by a bitmap ++ * is not power of 2 because 4 bytes are used for checksum. Therefore, we have ++ * to use special function to divide and modulo 64bits filesystem block ++ * counters. ++ * ++ * Example: suppose filesystem have 32768 blocks. Blocksize is 4096. Each bitmap ++ * block addresses (4096 - 4) * 8 = 32736 blocks. Number of bitmaps to address ++ * all 32768 blocks is calculated as (32768 - 1) / 32736 + 1 = 2. ++ */ ++static bmap_nr_t get_nr_bmap(const struct super_block *super) ++{ ++ u64 quotient; ++ ++ assert("zam-393", reiser4_block_count(super) != 0); ++ ++ quotient = reiser4_block_count(super) - 1; ++ do_div(quotient, bmap_bit_count(super->s_blocksize)); ++ return quotient + 1; ++} ++ ++/** ++ * parse_blocknr - calculate bitmap number and offset in it by block number ++ * @block: pointer to block number to calculate location in bitmap of ++ * @bmap: pointer where to store bitmap block number ++ * @offset: pointer where to store offset within bitmap block ++ * ++ * Calculates location of bit which is responsible for allocation/freeing of ++ * block @*block. That location is represented by bitmap block number and offset ++ * within that bitmap block. ++ */ ++static void ++parse_blocknr(const reiser4_block_nr *block, bmap_nr_t *bmap, ++ bmap_off_t *offset) ++{ ++ struct super_block *super = get_current_context()->super; ++ u64 quotient = *block; ++ ++ *offset = do_div(quotient, bmap_bit_count(super->s_blocksize)); ++ *bmap = quotient; ++ ++ assert("zam-433", *bmap < get_nr_bmap(super)); ++ assert("", *offset < bmap_bit_count(super->s_blocksize)); ++} ++ ++#if REISER4_DEBUG ++/* Audited by: green(2002.06.12) */ ++static void ++check_block_range(const reiser4_block_nr * start, const reiser4_block_nr * len) ++{ ++ struct super_block *sb = reiser4_get_current_sb(); ++ ++ assert("zam-436", sb != NULL); ++ ++ assert("zam-455", start != NULL); ++ assert("zam-437", *start != 0); ++ assert("zam-541", !reiser4_blocknr_is_fake(start)); ++ assert("zam-441", *start < reiser4_block_count(sb)); ++ ++ if (len != NULL) { ++ assert("zam-438", *len != 0); ++ assert("zam-442", *start + *len <= reiser4_block_count(sb)); ++ } ++} ++ ++static void check_bnode_loaded(const struct bitmap_node *bnode) ++{ ++ assert("zam-485", bnode != NULL); ++ assert("zam-483", jnode_page(bnode->wjnode) != NULL); ++ assert("zam-484", jnode_page(bnode->cjnode) != NULL); ++ assert("nikita-2820", jnode_is_loaded(bnode->wjnode)); ++ assert("nikita-2821", jnode_is_loaded(bnode->cjnode)); ++} ++ ++#else ++ ++# define check_block_range(start, len) do { /* nothing */} while(0) ++# define check_bnode_loaded(bnode) do { /* nothing */} while(0) ++ ++#endif ++ ++/* modify bnode->first_zero_bit (if we free bits before); bnode should be ++ spin-locked */ ++static inline void ++adjust_first_zero_bit(struct bitmap_node *bnode, bmap_off_t offset) ++{ ++ if (offset < bnode->first_zero_bit) ++ bnode->first_zero_bit = offset; ++} ++ ++/* return a physical disk address for logical bitmap number @bmap */ ++/* FIXME-VS: this is somehow related to disk layout? */ ++/* ZAM-FIXME-HANS: your answer is? Use not more than one function dereference ++ * per block allocation so that performance is not affected. Probably this ++ * whole file should be considered part of the disk layout plugin, and other ++ * disk layouts can use other defines and efficiency will not be significantly ++ * affected. */ ++ ++#define REISER4_FIRST_BITMAP_BLOCK \ ++ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 2) ++ ++/* Audited by: green(2002.06.12) */ ++static void ++get_bitmap_blocknr(struct super_block *super, bmap_nr_t bmap, ++ reiser4_block_nr * bnr) ++{ ++ ++ assert("zam-390", bmap < get_nr_bmap(super)); ++ ++#ifdef CONFIG_REISER4_BADBLOCKS ++#define BITMAP_PLUGIN_DISKMAP_ID ((0xc0e1<<16) | (0xe0ff)) ++ /* Check if the diskmap have this already, first. */ ++ if (reiser4_get_diskmap_value(BITMAP_PLUGIN_DISKMAP_ID, bmap, bnr) == 0) ++ return; /* Found it in diskmap */ ++#endif ++ /* FIXME_ZAM: before discussing of disk layouts and disk format ++ plugins I implement bitmap location scheme which is close to scheme ++ used in reiser 3.6 */ ++ if (bmap == 0) { ++ *bnr = REISER4_FIRST_BITMAP_BLOCK; ++ } else { ++ *bnr = bmap * bmap_bit_count(super->s_blocksize); ++ } ++} ++ ++/* construct a fake block number for shadow bitmap (WORKING BITMAP) block */ ++/* Audited by: green(2002.06.12) */ ++static void get_working_bitmap_blocknr(bmap_nr_t bmap, reiser4_block_nr * bnr) ++{ ++ *bnr = ++ (reiser4_block_nr) ((bmap & ~REISER4_BLOCKNR_STATUS_BIT_MASK) | ++ REISER4_BITMAP_BLOCKS_STATUS_VALUE); ++} ++ ++/* bnode structure initialization */ ++static void ++init_bnode(struct bitmap_node *bnode, ++ struct super_block *super UNUSED_ARG, bmap_nr_t bmap UNUSED_ARG) ++{ ++ memset(bnode, 0, sizeof(struct bitmap_node)); ++ ++ mutex_init(&bnode->mutex); ++ atomic_set(&bnode->loaded, 0); ++} ++ ++static void release(jnode * node) ++{ ++ jrelse(node); ++ JF_SET(node, JNODE_HEARD_BANSHEE); ++ jput(node); ++} ++ ++/* This function is for internal bitmap.c use because it assumes that jnode is ++ in under full control of this thread */ ++static void done_bnode(struct bitmap_node *bnode) ++{ ++ if (bnode) { ++ atomic_set(&bnode->loaded, 0); ++ if (bnode->wjnode != NULL) ++ release(bnode->wjnode); ++ if (bnode->cjnode != NULL) ++ release(bnode->cjnode); ++ bnode->wjnode = bnode->cjnode = NULL; ++ } ++} ++ ++/* ZAM-FIXME-HANS: comment this. Called only by load_and_lock_bnode()*/ ++static int prepare_bnode(struct bitmap_node *bnode, jnode **cjnode_ret, ++ jnode **wjnode_ret) ++{ ++ struct super_block *super; ++ jnode *cjnode; ++ jnode *wjnode; ++ bmap_nr_t bmap; ++ int ret; ++ ++ super = reiser4_get_current_sb(); ++ ++ *wjnode_ret = wjnode = bnew(); ++ if (wjnode == NULL) { ++ *cjnode_ret = NULL; ++ return RETERR(-ENOMEM); ++ } ++ ++ *cjnode_ret = cjnode = bnew(); ++ if (cjnode == NULL) ++ return RETERR(-ENOMEM); ++ ++ bmap = bnode - get_bnode(super, 0); ++ ++ get_working_bitmap_blocknr(bmap, &wjnode->blocknr); ++ get_bitmap_blocknr(super, bmap, &cjnode->blocknr); ++ ++ jref(cjnode); ++ jref(wjnode); ++ ++ /* load commit bitmap */ ++ ret = jload_gfp(cjnode, GFP_NOFS, 1); ++ ++ if (ret) ++ goto error; ++ ++ /* allocate memory for working bitmap block. Note that for ++ * bitmaps jinit_new() doesn't actually modifies node content, ++ * so parallel calls to this are ok. */ ++ ret = jinit_new(wjnode, GFP_NOFS); ++ ++ if (ret != 0) { ++ jrelse(cjnode); ++ goto error; ++ } ++ ++ return 0; ++ ++ error: ++ jput(cjnode); ++ jput(wjnode); ++ *wjnode_ret = *cjnode_ret = NULL; ++ return ret; ++ ++} ++ ++/* Check the bnode data on read. */ ++static int check_struct_bnode(struct bitmap_node *bnode, __u32 blksize) ++{ ++ void *data; ++ int ret; ++ ++ /* Check CRC */ ++ ret = bnode_check_adler32(bnode, blksize); ++ ++ if (ret) { ++ return ret; ++ } ++ ++ data = jdata(bnode->cjnode) + CHECKSUM_SIZE; ++ ++ /* Check the very first bit -- it must be busy. */ ++ if (!reiser4_test_bit(0, data)) { ++ warning("vpf-1362", "The allocator block %llu is not marked " ++ "as used.", (unsigned long long)bnode->cjnode->blocknr); ++ ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/* load bitmap blocks "on-demand" */ ++static int load_and_lock_bnode(struct bitmap_node *bnode) ++{ ++ int ret; ++ ++ jnode *cjnode; ++ jnode *wjnode; ++ ++ assert("nikita-3040", reiser4_schedulable()); ++ ++/* ZAM-FIXME-HANS: since bitmaps are never unloaded, this does not ++ * need to be atomic, right? Just leave a comment that if bitmaps were ++ * unloadable, this would need to be atomic. */ ++ if (atomic_read(&bnode->loaded)) { ++ /* bitmap is already loaded, nothing to do */ ++ check_bnode_loaded(bnode); ++ mutex_lock(&bnode->mutex); ++ assert("nikita-2827", atomic_read(&bnode->loaded)); ++ return 0; ++ } ++ ++ ret = prepare_bnode(bnode, &cjnode, &wjnode); ++ if (ret == 0) { ++ mutex_lock(&bnode->mutex); ++ ++ if (!atomic_read(&bnode->loaded)) { ++ assert("nikita-2822", cjnode != NULL); ++ assert("nikita-2823", wjnode != NULL); ++ assert("nikita-2824", jnode_is_loaded(cjnode)); ++ assert("nikita-2825", jnode_is_loaded(wjnode)); ++ ++ bnode->wjnode = wjnode; ++ bnode->cjnode = cjnode; ++ ++ ret = check_struct_bnode(bnode, current_blocksize); ++ if (!ret) { ++ cjnode = wjnode = NULL; ++ atomic_set(&bnode->loaded, 1); ++ /* working bitmap is initialized by on-disk ++ * commit bitmap. This should be performed ++ * under mutex. */ ++ memcpy(bnode_working_data(bnode), ++ bnode_commit_data(bnode), ++ bmap_size(current_blocksize)); ++ } else ++ mutex_unlock(&bnode->mutex); ++ } else ++ /* race: someone already loaded bitmap while we were ++ * busy initializing data. */ ++ check_bnode_loaded(bnode); ++ } ++ ++ if (wjnode != NULL) { ++ release(wjnode); ++ bnode->wjnode = NULL; ++ } ++ if (cjnode != NULL) { ++ release(cjnode); ++ bnode->cjnode = NULL; ++ } ++ ++ return ret; ++} ++ ++static void release_and_unlock_bnode(struct bitmap_node *bnode) ++{ ++ check_bnode_loaded(bnode); ++ mutex_unlock(&bnode->mutex); ++} ++ ++/* This function does all block allocation work but only for one bitmap ++ block.*/ ++/* FIXME_ZAM: It does not allow us to allocate block ranges across bitmap ++ block responsibility zone boundaries. This had no sense in v3.6 but may ++ have it in v4.x */ ++/* ZAM-FIXME-HANS: do you mean search one bitmap block forward? */ ++static int ++search_one_bitmap_forward(bmap_nr_t bmap, bmap_off_t * offset, ++ bmap_off_t max_offset, int min_len, int max_len) ++{ ++ struct super_block *super = get_current_context()->super; ++ struct bitmap_node *bnode = get_bnode(super, bmap); ++ ++ char *data; ++ ++ bmap_off_t search_end; ++ bmap_off_t start; ++ bmap_off_t end; ++ ++ int set_first_zero_bit = 0; ++ ++ int ret; ++ ++ assert("zam-364", min_len > 0); ++ assert("zam-365", max_len >= min_len); ++ assert("zam-366", *offset <= max_offset); ++ ++ ret = load_and_lock_bnode(bnode); ++ ++ if (ret) ++ return ret; ++ ++ data = bnode_working_data(bnode); ++ ++ start = *offset; ++ ++ if (bnode->first_zero_bit >= start) { ++ start = bnode->first_zero_bit; ++ set_first_zero_bit = 1; ++ } ++ ++ while (start + min_len < max_offset) { ++ ++ start = ++ reiser4_find_next_zero_bit((long *)data, max_offset, start); ++ if (set_first_zero_bit) { ++ bnode->first_zero_bit = start; ++ set_first_zero_bit = 0; ++ } ++ if (start >= max_offset) ++ break; ++ ++ search_end = LIMIT(start + max_len, max_offset); ++ end = ++ reiser4_find_next_set_bit((long *)data, search_end, start); ++ if (end >= start + min_len) { ++ /* we can't trust find_next_set_bit result if set bit ++ was not fount, result may be bigger than ++ max_offset */ ++ if (end > search_end) ++ end = search_end; ++ ++ ret = end - start; ++ *offset = start; ++ ++ reiser4_set_bits(data, start, end); ++ ++ /* FIXME: we may advance first_zero_bit if [start, ++ end] region overlaps the first_zero_bit point */ ++ ++ break; ++ } ++ ++ start = end + 1; ++ } ++ ++ release_and_unlock_bnode(bnode); ++ ++ return ret; ++} ++ ++static int ++search_one_bitmap_backward(bmap_nr_t bmap, bmap_off_t * start_offset, ++ bmap_off_t end_offset, int min_len, int max_len) ++{ ++ struct super_block *super = get_current_context()->super; ++ struct bitmap_node *bnode = get_bnode(super, bmap); ++ char *data; ++ bmap_off_t start; ++ int ret; ++ ++ assert("zam-958", min_len > 0); ++ assert("zam-959", max_len >= min_len); ++ assert("zam-960", *start_offset >= end_offset); ++ ++ ret = load_and_lock_bnode(bnode); ++ if (ret) ++ return ret; ++ ++ data = bnode_working_data(bnode); ++ start = *start_offset; ++ ++ while (1) { ++ bmap_off_t end, search_end; ++ ++ /* Find the beginning of the zero filled region */ ++ if (reiser4_find_last_zero_bit(&start, data, end_offset, start)) ++ break; ++ /* Is there more than `min_len' bits from `start' to ++ * `end_offset'? */ ++ if (start < end_offset + min_len - 1) ++ break; ++ ++ /* Do not search to `end_offset' if we need to find less than ++ * `max_len' zero bits. */ ++ if (end_offset + max_len - 1 < start) ++ search_end = start - max_len + 1; ++ else ++ search_end = end_offset; ++ ++ if (reiser4_find_last_set_bit(&end, data, search_end, start)) ++ end = search_end; ++ else ++ end++; ++ ++ if (end + min_len <= start + 1) { ++ if (end < search_end) ++ end = search_end; ++ ret = start - end + 1; ++ *start_offset = end; /* `end' is lowest offset */ ++ assert("zam-987", ++ reiser4_find_next_set_bit(data, start + 1, ++ end) >= start + 1); ++ reiser4_set_bits(data, end, start + 1); ++ break; ++ } ++ ++ if (end <= end_offset) ++ /* left search boundary reached. */ ++ break; ++ start = end - 1; ++ } ++ ++ release_and_unlock_bnode(bnode); ++ return ret; ++} ++ ++/* allocate contiguous range of blocks in bitmap */ ++static int bitmap_alloc_forward(reiser4_block_nr * start, ++ const reiser4_block_nr * end, int min_len, ++ int max_len) ++{ ++ bmap_nr_t bmap, end_bmap; ++ bmap_off_t offset, end_offset; ++ int len; ++ ++ reiser4_block_nr tmp; ++ ++ struct super_block *super = get_current_context()->super; ++ const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize); ++ ++ parse_blocknr(start, &bmap, &offset); ++ ++ tmp = *end - 1; ++ parse_blocknr(&tmp, &end_bmap, &end_offset); ++ ++end_offset; ++ ++ assert("zam-358", end_bmap >= bmap); ++ assert("zam-359", ergo(end_bmap == bmap, end_offset >= offset)); ++ ++ for (; bmap < end_bmap; bmap++, offset = 0) { ++ len = ++ search_one_bitmap_forward(bmap, &offset, max_offset, ++ min_len, max_len); ++ if (len != 0) ++ goto out; ++ } ++ ++ len = ++ search_one_bitmap_forward(bmap, &offset, end_offset, min_len, ++ max_len); ++ out: ++ *start = bmap * max_offset + offset; ++ return len; ++} ++ ++/* allocate contiguous range of blocks in bitmap (from @start to @end in ++ * backward direction) */ ++static int bitmap_alloc_backward(reiser4_block_nr * start, ++ const reiser4_block_nr * end, int min_len, ++ int max_len) ++{ ++ bmap_nr_t bmap, end_bmap; ++ bmap_off_t offset, end_offset; ++ int len; ++ struct super_block *super = get_current_context()->super; ++ const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize); ++ ++ parse_blocknr(start, &bmap, &offset); ++ parse_blocknr(end, &end_bmap, &end_offset); ++ ++ assert("zam-961", end_bmap <= bmap); ++ assert("zam-962", ergo(end_bmap == bmap, end_offset <= offset)); ++ ++ for (; bmap > end_bmap; bmap--, offset = max_offset - 1) { ++ len = ++ search_one_bitmap_backward(bmap, &offset, 0, min_len, ++ max_len); ++ if (len != 0) ++ goto out; ++ } ++ ++ len = ++ search_one_bitmap_backward(bmap, &offset, end_offset, min_len, ++ max_len); ++ out: ++ *start = bmap * max_offset + offset; ++ return len; ++} ++ ++/* plugin->u.space_allocator.alloc_blocks() */ ++static int alloc_blocks_forward(reiser4_blocknr_hint *hint, int needed, ++ reiser4_block_nr *start, reiser4_block_nr *len) ++{ ++ struct super_block *super = get_current_context()->super; ++ int actual_len; ++ ++ reiser4_block_nr search_start; ++ reiser4_block_nr search_end; ++ ++ assert("zam-398", super != NULL); ++ assert("zam-412", hint != NULL); ++ assert("zam-397", hint->blk <= reiser4_block_count(super)); ++ ++ if (hint->max_dist == 0) ++ search_end = reiser4_block_count(super); ++ else ++ search_end = ++ LIMIT(hint->blk + hint->max_dist, ++ reiser4_block_count(super)); ++ ++ /* We use @hint -> blk as a search start and search from it to the end ++ of the disk or in given region if @hint -> max_dist is not zero */ ++ search_start = hint->blk; ++ ++ actual_len = ++ bitmap_alloc_forward(&search_start, &search_end, 1, needed); ++ ++ /* There is only one bitmap search if max_dist was specified or first ++ pass was from the beginning of the bitmap. We also do one pass for ++ scanning bitmap in backward direction. */ ++ if (!(actual_len != 0 || hint->max_dist != 0 || search_start == 0)) { ++ /* next step is a scanning from 0 to search_start */ ++ search_end = search_start; ++ search_start = 0; ++ actual_len = ++ bitmap_alloc_forward(&search_start, &search_end, 1, needed); ++ } ++ if (actual_len == 0) ++ return RETERR(-ENOSPC); ++ if (actual_len < 0) ++ return RETERR(actual_len); ++ *len = actual_len; ++ *start = search_start; ++ return 0; ++} ++ ++static int alloc_blocks_backward(reiser4_blocknr_hint * hint, int needed, ++ reiser4_block_nr * start, ++ reiser4_block_nr * len) ++{ ++ reiser4_block_nr search_start; ++ reiser4_block_nr search_end; ++ int actual_len; ++ ++ ON_DEBUG(struct super_block *super = reiser4_get_current_sb()); ++ ++ assert("zam-969", super != NULL); ++ assert("zam-970", hint != NULL); ++ assert("zam-971", hint->blk <= reiser4_block_count(super)); ++ ++ search_start = hint->blk; ++ if (hint->max_dist == 0 || search_start <= hint->max_dist) ++ search_end = 0; ++ else ++ search_end = search_start - hint->max_dist; ++ ++ actual_len = ++ bitmap_alloc_backward(&search_start, &search_end, 1, needed); ++ if (actual_len == 0) ++ return RETERR(-ENOSPC); ++ if (actual_len < 0) ++ return RETERR(actual_len); ++ *len = actual_len; ++ *start = search_start; ++ return 0; ++} ++ ++/* plugin->u.space_allocator.alloc_blocks() */ ++int reiser4_alloc_blocks_bitmap(reiser4_space_allocator * allocator, ++ reiser4_blocknr_hint * hint, int needed, ++ reiser4_block_nr * start, reiser4_block_nr * len) ++{ ++ if (hint->backward) ++ return alloc_blocks_backward(hint, needed, start, len); ++ return alloc_blocks_forward(hint, needed, start, len); ++} ++ ++/* plugin->u.space_allocator.dealloc_blocks(). */ ++/* It just frees blocks in WORKING BITMAP. Usually formatted an unformatted ++ nodes deletion is deferred until transaction commit. However, deallocation ++ of temporary objects like wandered blocks and transaction commit records ++ requires immediate node deletion from WORKING BITMAP.*/ ++void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator * allocator, ++ reiser4_block_nr start, reiser4_block_nr len) ++{ ++ struct super_block *super = reiser4_get_current_sb(); ++ ++ bmap_nr_t bmap; ++ bmap_off_t offset; ++ ++ struct bitmap_node *bnode; ++ int ret; ++ ++ assert("zam-468", len != 0); ++ check_block_range(&start, &len); ++ ++ parse_blocknr(&start, &bmap, &offset); ++ ++ assert("zam-469", offset + len <= bmap_bit_count(super->s_blocksize)); ++ ++ bnode = get_bnode(super, bmap); ++ ++ assert("zam-470", bnode != NULL); ++ ++ ret = load_and_lock_bnode(bnode); ++ assert("zam-481", ret == 0); ++ ++ reiser4_clear_bits(bnode_working_data(bnode), offset, ++ (bmap_off_t) (offset + len)); ++ ++ adjust_first_zero_bit(bnode, offset); ++ ++ release_and_unlock_bnode(bnode); ++} ++ ++/* plugin->u.space_allocator.check_blocks(). */ ++void reiser4_check_blocks_bitmap(const reiser4_block_nr * start, ++ const reiser4_block_nr * len, int desired) ++{ ++#if REISER4_DEBUG ++ struct super_block *super = reiser4_get_current_sb(); ++ ++ bmap_nr_t bmap; ++ bmap_off_t start_offset; ++ bmap_off_t end_offset; ++ ++ struct bitmap_node *bnode; ++ int ret; ++ ++ assert("zam-622", len != NULL); ++ check_block_range(start, len); ++ parse_blocknr(start, &bmap, &start_offset); ++ ++ end_offset = start_offset + *len; ++ assert("nikita-2214", end_offset <= bmap_bit_count(super->s_blocksize)); ++ ++ bnode = get_bnode(super, bmap); ++ ++ assert("nikita-2215", bnode != NULL); ++ ++ ret = load_and_lock_bnode(bnode); ++ assert("zam-626", ret == 0); ++ ++ assert("nikita-2216", jnode_is_loaded(bnode->wjnode)); ++ ++ if (desired) { ++ assert("zam-623", ++ reiser4_find_next_zero_bit(bnode_working_data(bnode), ++ end_offset, start_offset) ++ >= end_offset); ++ } else { ++ assert("zam-624", ++ reiser4_find_next_set_bit(bnode_working_data(bnode), ++ end_offset, start_offset) ++ >= end_offset); ++ } ++ ++ release_and_unlock_bnode(bnode); ++#endif ++} ++ ++/* conditional insertion of @node into atom's overwrite set if it was not there */ ++static void cond_add_to_overwrite_set(txn_atom * atom, jnode * node) ++{ ++ assert("zam-546", atom != NULL); ++ assert("zam-547", atom->stage == ASTAGE_PRE_COMMIT); ++ assert("zam-548", node != NULL); ++ ++ spin_lock_atom(atom); ++ spin_lock_jnode(node); ++ ++ if (node->atom == NULL) { ++ JF_SET(node, JNODE_OVRWR); ++ insert_into_atom_ovrwr_list(atom, node); ++ } else { ++ assert("zam-549", node->atom == atom); ++ } ++ ++ spin_unlock_jnode(node); ++ spin_unlock_atom(atom); ++} ++ ++/* an actor which applies delete set to COMMIT bitmap pages and link modified ++ pages in a single-linked list */ ++static int ++apply_dset_to_commit_bmap(txn_atom * atom, const reiser4_block_nr * start, ++ const reiser4_block_nr * len, void *data) ++{ ++ ++ bmap_nr_t bmap; ++ bmap_off_t offset; ++ int ret; ++ ++ long long *blocks_freed_p = data; ++ ++ struct bitmap_node *bnode; ++ ++ struct super_block *sb = reiser4_get_current_sb(); ++ ++ check_block_range(start, len); ++ ++ parse_blocknr(start, &bmap, &offset); ++ ++ /* FIXME-ZAM: we assume that all block ranges are allocated by this ++ bitmap-based allocator and each block range can't go over a zone of ++ responsibility of one bitmap block; same assumption is used in ++ other journal hooks in bitmap code. */ ++ bnode = get_bnode(sb, bmap); ++ assert("zam-448", bnode != NULL); ++ ++ /* it is safe to unlock atom with is in ASTAGE_PRE_COMMIT */ ++ assert("zam-767", atom->stage == ASTAGE_PRE_COMMIT); ++ ret = load_and_lock_bnode(bnode); ++ if (ret) ++ return ret; ++ ++ /* put bnode into atom's overwrite set */ ++ cond_add_to_overwrite_set(atom, bnode->cjnode); ++ ++ data = bnode_commit_data(bnode); ++ ++ ret = bnode_check_crc(bnode); ++ if (ret != 0) ++ return ret; ++ ++ if (len != NULL) { ++ /* FIXME-ZAM: a check that all bits are set should be there */ ++ assert("zam-443", ++ offset + *len <= bmap_bit_count(sb->s_blocksize)); ++ reiser4_clear_bits(data, offset, (bmap_off_t) (offset + *len)); ++ ++ (*blocks_freed_p) += *len; ++ } else { ++ reiser4_clear_bit(offset, data); ++ (*blocks_freed_p)++; ++ } ++ ++ bnode_set_commit_crc(bnode, bnode_calc_crc(bnode, sb->s_blocksize)); ++ ++ release_and_unlock_bnode(bnode); ++ ++ return 0; ++} ++ ++/* plugin->u.space_allocator.pre_commit_hook(). */ ++/* It just applies transaction changes to fs-wide COMMIT BITMAP, hoping the ++ rest is done by transaction manager (allocate wandered locations for COMMIT ++ BITMAP blocks, copy COMMIT BITMAP blocks data). */ ++/* Only one instance of this function can be running at one given time, because ++ only one transaction can be committed a time, therefore it is safe to access ++ some global variables without any locking */ ++ ++int reiser4_pre_commit_hook_bitmap(void) ++{ ++ struct super_block *super = reiser4_get_current_sb(); ++ txn_atom *atom; ++ ++ long long blocks_freed = 0; ++ ++ atom = get_current_atom_locked(); ++ assert("zam-876", atom->stage == ASTAGE_PRE_COMMIT); ++ spin_unlock_atom(atom); ++ ++ { /* scan atom's captured list and find all freshly allocated nodes, ++ * mark corresponded bits in COMMIT BITMAP as used */ ++ struct list_head *head = ATOM_CLEAN_LIST(atom); ++ jnode *node = list_entry(head->next, jnode, capture_link); ++ ++ while (head != &node->capture_link) { ++ /* we detect freshly allocated jnodes */ ++ if (JF_ISSET(node, JNODE_RELOC)) { ++ int ret; ++ bmap_nr_t bmap; ++ ++ bmap_off_t offset; ++ bmap_off_t index; ++ struct bitmap_node *bn; ++ __u32 size = bmap_size(super->s_blocksize); ++ __u32 crc; ++ char byte; ++ ++ assert("zam-559", !JF_ISSET(node, JNODE_OVRWR)); ++ assert("zam-460", ++ !reiser4_blocknr_is_fake(&node->blocknr)); ++ ++ parse_blocknr(&node->blocknr, &bmap, &offset); ++ bn = get_bnode(super, bmap); ++ ++ index = offset >> 3; ++ assert("vpf-276", index < size); ++ ++ ret = bnode_check_crc(bnode); ++ if (ret != 0) ++ return ret; ++ ++ check_bnode_loaded(bn); ++ load_and_lock_bnode(bn); ++ ++ byte = *(bnode_commit_data(bn) + index); ++ reiser4_set_bit(offset, bnode_commit_data(bn)); ++ ++ crc = adler32_recalc(bnode_commit_crc(bn), byte, ++ *(bnode_commit_data(bn) + ++ index), ++ size - index), ++ bnode_set_commit_crc(bn, crc); ++ ++ release_and_unlock_bnode(bn); ++ ++ ret = bnode_check_crc(bn); ++ if (ret != 0) ++ return ret; ++ ++ /* working of this depends on how it inserts ++ new j-node into clean list, because we are ++ scanning the same list now. It is OK, if ++ insertion is done to the list front */ ++ cond_add_to_overwrite_set(atom, bn->cjnode); ++ } ++ ++ node = list_entry(node->capture_link.next, jnode, capture_link); ++ } ++ } ++ ++ blocknr_set_iterator(atom, &atom->delete_set, apply_dset_to_commit_bmap, ++ &blocks_freed, 0); ++ ++ blocks_freed -= atom->nr_blocks_allocated; ++ ++ { ++ reiser4_super_info_data *sbinfo; ++ ++ sbinfo = get_super_private(super); ++ ++ spin_lock_reiser4_super(sbinfo); ++ sbinfo->blocks_free_committed += blocks_freed; ++ spin_unlock_reiser4_super(sbinfo); ++ } ++ ++ return 0; ++} ++ ++/* plugin->u.space_allocator.init_allocator ++ constructor of reiser4_space_allocator object. It is called on fs mount */ ++int reiser4_init_allocator_bitmap(reiser4_space_allocator * allocator, ++ struct super_block *super, void *arg) ++{ ++ struct bitmap_allocator_data *data = NULL; ++ bmap_nr_t bitmap_blocks_nr; ++ bmap_nr_t i; ++ ++ assert("nikita-3039", reiser4_schedulable()); ++ ++ /* getting memory for bitmap allocator private data holder */ ++ data = ++ kmalloc(sizeof(struct bitmap_allocator_data), ++ reiser4_ctx_gfp_mask_get()); ++ ++ if (data == NULL) ++ return RETERR(-ENOMEM); ++ ++ /* allocation and initialization for the array of bnodes */ ++ bitmap_blocks_nr = get_nr_bmap(super); ++ ++ /* FIXME-ZAM: it is not clear what to do with huge number of bitmaps ++ which is bigger than 2^32 (= 8 * 4096 * 4096 * 2^32 bytes = 5.76e+17, ++ may I never meet someone who still uses the ia32 architecture when ++ storage devices of that size enter the market, and wants to use ia32 ++ with that storage device, much less reiser4. ;-) -Hans). Kmalloc is not possible and, ++ probably, another dynamic data structure should replace a static ++ array of bnodes. */ ++ /*data->bitmap = reiser4_kmalloc((size_t) (sizeof (struct bitmap_node) * bitmap_blocks_nr), GFP_KERNEL); */ ++ data->bitmap = reiser4_vmalloc(sizeof(struct bitmap_node) * bitmap_blocks_nr); ++ if (data->bitmap == NULL) { ++ kfree(data); ++ return RETERR(-ENOMEM); ++ } ++ ++ for (i = 0; i < bitmap_blocks_nr; i++) ++ init_bnode(data->bitmap + i, super, i); ++ ++ allocator->u.generic = data; ++ ++#if REISER4_DEBUG ++ get_super_private(super)->min_blocks_used += bitmap_blocks_nr; ++#endif ++ ++ /* Load all bitmap blocks at mount time. */ ++ if (!test_bit ++ (REISER4_DONT_LOAD_BITMAP, &get_super_private(super)->fs_flags)) { ++ __u64 start_time, elapsed_time; ++ struct bitmap_node *bnode; ++ int ret; ++ ++ if (REISER4_DEBUG) ++ printk(KERN_INFO "loading reiser4 bitmap..."); ++ start_time = jiffies; ++ ++ for (i = 0; i < bitmap_blocks_nr; i++) { ++ bnode = data->bitmap + i; ++ ret = load_and_lock_bnode(bnode); ++ if (ret) { ++ reiser4_destroy_allocator_bitmap(allocator, ++ super); ++ return ret; ++ } ++ release_and_unlock_bnode(bnode); ++ } ++ ++ elapsed_time = jiffies - start_time; ++ if (REISER4_DEBUG) ++ printk("...done (%llu jiffies)\n", ++ (unsigned long long)elapsed_time); ++ } ++ ++ return 0; ++} ++ ++/* plugin->u.space_allocator.destroy_allocator ++ destructor. It is called on fs unmount */ ++int reiser4_destroy_allocator_bitmap(reiser4_space_allocator * allocator, ++ struct super_block *super) ++{ ++ bmap_nr_t bitmap_blocks_nr; ++ bmap_nr_t i; ++ ++ struct bitmap_allocator_data *data = allocator->u.generic; ++ ++ assert("zam-414", data != NULL); ++ assert("zam-376", data->bitmap != NULL); ++ ++ bitmap_blocks_nr = get_nr_bmap(super); ++ ++ for (i = 0; i < bitmap_blocks_nr; i++) { ++ struct bitmap_node *bnode = data->bitmap + i; ++ ++ mutex_lock(&bnode->mutex); ++ ++#if REISER4_DEBUG ++ if (atomic_read(&bnode->loaded)) { ++ jnode *wj = bnode->wjnode; ++ jnode *cj = bnode->cjnode; ++ ++ assert("zam-480", jnode_page(cj) != NULL); ++ assert("zam-633", jnode_page(wj) != NULL); ++ ++ assert("zam-634", ++ memcmp(jdata(wj), jdata(wj), ++ bmap_size(super->s_blocksize)) == 0); ++ ++ } ++#endif ++ done_bnode(bnode); ++ mutex_unlock(&bnode->mutex); ++ } ++ ++ vfree(data->bitmap); ++ kfree(data); ++ ++ allocator->u.generic = NULL; ++ ++ return 0; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * scroll-step: 1 ++ * End: ++ */ +diff --git a/fs/reiser4/plugin/space/bitmap.h b/fs/reiser4/plugin/space/bitmap.h +new file mode 100644 +index 0000000..be867f1 +--- /dev/null ++++ b/fs/reiser4/plugin/space/bitmap.h +@@ -0,0 +1,47 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#if !defined (__REISER4_PLUGIN_SPACE_BITMAP_H__) ++#define __REISER4_PLUGIN_SPACE_BITMAP_H__ ++ ++#include "../../dformat.h" ++#include "../../block_alloc.h" ++ ++#include /* for __u?? */ ++#include /* for struct super_block */ ++/* EDWARD-FIXME-HANS: write something as informative as the below for every .h file lacking it. */ ++/* declarations of functions implementing methods of space allocator plugin for ++ bitmap based allocator. The functions themselves are in bitmap.c */ ++extern int reiser4_init_allocator_bitmap(reiser4_space_allocator *, ++ struct super_block *, void *); ++extern int reiser4_destroy_allocator_bitmap(reiser4_space_allocator *, ++ struct super_block *); ++extern int reiser4_alloc_blocks_bitmap(reiser4_space_allocator *, ++ reiser4_blocknr_hint *, int needed, ++ reiser4_block_nr * start, ++ reiser4_block_nr * len); ++extern void reiser4_check_blocks_bitmap(const reiser4_block_nr *, ++ const reiser4_block_nr *, int); ++extern void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator *, ++ reiser4_block_nr, ++ reiser4_block_nr); ++extern int reiser4_pre_commit_hook_bitmap(void); ++ ++#define reiser4_post_commit_hook_bitmap() do{}while(0) ++#define reiser4_post_write_back_hook_bitmap() do{}while(0) ++#define reiser4_print_info_bitmap(pref, al) do{}while(0) ++ ++typedef __u64 bmap_nr_t; ++typedef __u32 bmap_off_t; ++ ++#endif /* __REISER4_PLUGIN_SPACE_BITMAP_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/space/space_allocator.h b/fs/reiser4/plugin/space/space_allocator.h +new file mode 100644 +index 0000000..5bfa9a3 +--- /dev/null ++++ b/fs/reiser4/plugin/space/space_allocator.h +@@ -0,0 +1,80 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#ifndef __SPACE_ALLOCATOR_H__ ++#define __SPACE_ALLOCATOR_H__ ++ ++#include "../../forward.h" ++#include "bitmap.h" ++/* NIKITA-FIXME-HANS: surely this could use a comment. Something about how bitmap is the only space allocator for now, ++ * but... */ ++#define DEF_SPACE_ALLOCATOR(allocator) \ ++ \ ++static inline int sa_init_allocator (reiser4_space_allocator * al, struct super_block *s, void * opaque) \ ++{ \ ++ return reiser4_init_allocator_##allocator (al, s, opaque); \ ++} \ ++ \ ++static inline void sa_destroy_allocator (reiser4_space_allocator *al, struct super_block *s) \ ++{ \ ++ reiser4_destroy_allocator_##allocator (al, s); \ ++} \ ++ \ ++static inline int sa_alloc_blocks (reiser4_space_allocator *al, reiser4_blocknr_hint * hint, \ ++ int needed, reiser4_block_nr * start, reiser4_block_nr * len) \ ++{ \ ++ return reiser4_alloc_blocks_##allocator (al, hint, needed, start, len); \ ++} \ ++static inline void sa_dealloc_blocks (reiser4_space_allocator * al, reiser4_block_nr start, reiser4_block_nr len) \ ++{ \ ++ reiser4_dealloc_blocks_##allocator (al, start, len); \ ++} \ ++ \ ++static inline void sa_check_blocks (const reiser4_block_nr * start, const reiser4_block_nr * end, int desired) \ ++{ \ ++ reiser4_check_blocks_##allocator (start, end, desired); \ ++} \ ++ \ ++static inline void sa_pre_commit_hook (void) \ ++{ \ ++ reiser4_pre_commit_hook_##allocator (); \ ++} \ ++ \ ++static inline void sa_post_commit_hook (void) \ ++{ \ ++ reiser4_post_commit_hook_##allocator (); \ ++} \ ++ \ ++static inline void sa_post_write_back_hook (void) \ ++{ \ ++ reiser4_post_write_back_hook_##allocator(); \ ++} \ ++ \ ++static inline void sa_print_info(const char * prefix, reiser4_space_allocator * al) \ ++{ \ ++ reiser4_print_info_##allocator (prefix, al); \ ++} ++ ++DEF_SPACE_ALLOCATOR(bitmap) ++ ++/* this object is part of reiser4 private in-core super block */ ++struct reiser4_space_allocator { ++ union { ++ /* space allocators might use this pointer to reference their ++ * data. */ ++ void *generic; ++ } u; ++}; ++ ++/* __SPACE_ALLOCATOR_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/plugin/tail_policy.c b/fs/reiser4/plugin/tail_policy.c +new file mode 100644 +index 0000000..43f4ae7 +--- /dev/null ++++ b/fs/reiser4/plugin/tail_policy.c +@@ -0,0 +1,113 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Formatting policy plugins */ ++ ++/* ++ * Formatting policy plugin is used by object plugin (of regular file) to ++ * convert file between two representations. ++ * ++ * Currently following policies are implemented: ++ * never store file in formatted nodes ++ * always store file in formatted nodes ++ * store file in formatted nodes if file is smaller than 4 blocks (default) ++ */ ++ ++#include "../tree.h" ++#include "../inode.h" ++#include "../super.h" ++#include "object.h" ++#include "plugin.h" ++#include "node/node.h" ++#include "plugin_header.h" ++ ++#include ++#include /* For struct inode */ ++ ++/** ++ * have_formatting_never - ++ * @inode: ++ * @size: ++ * ++ * ++ */ ++/* Never store file's tail as direct item */ ++/* Audited by: green(2002.06.12) */ ++static int have_formatting_never(const struct inode *inode UNUSED_ARG ++ /* inode to operate on */ , ++ loff_t size UNUSED_ARG /* new object size */ ) ++{ ++ return 0; ++} ++ ++/* Always store file's tail as direct item */ ++/* Audited by: green(2002.06.12) */ ++static int ++have_formatting_always(const struct inode *inode UNUSED_ARG ++ /* inode to operate on */ , ++ loff_t size UNUSED_ARG /* new object size */ ) ++{ ++ return 1; ++} ++ ++/* This function makes test if we should store file denoted @inode as tails only or ++ as extents only. */ ++static int ++have_formatting_default(const struct inode *inode UNUSED_ARG ++ /* inode to operate on */ , ++ loff_t size /* new object size */ ) ++{ ++ assert("umka-1253", inode != NULL); ++ ++ if (size > inode->i_sb->s_blocksize * 4) ++ return 0; ++ ++ return 1; ++} ++ ++/* tail plugins */ ++formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID] = { ++ [NEVER_TAILS_FORMATTING_ID] = { ++ .h = { ++ .type_id = REISER4_FORMATTING_PLUGIN_TYPE, ++ .id = NEVER_TAILS_FORMATTING_ID, ++ .pops = NULL, ++ .label = "never", ++ .desc = "Never store file's tail", ++ .linkage = {NULL, NULL} ++ }, ++ .have_tail = have_formatting_never ++ }, ++ [ALWAYS_TAILS_FORMATTING_ID] = { ++ .h = { ++ .type_id = REISER4_FORMATTING_PLUGIN_TYPE, ++ .id = ALWAYS_TAILS_FORMATTING_ID, ++ .pops = NULL, ++ .label = "always", ++ .desc = "Always store file's tail", ++ .linkage = {NULL, NULL} ++ }, ++ .have_tail = have_formatting_always ++ }, ++ [SMALL_FILE_FORMATTING_ID] = { ++ .h = { ++ .type_id = REISER4_FORMATTING_PLUGIN_TYPE, ++ .id = SMALL_FILE_FORMATTING_ID, ++ .pops = NULL, ++ .label = "4blocks", ++ .desc = "store files shorter than 4 blocks in tail items", ++ .linkage = {NULL, NULL} ++ }, ++ .have_tail = have_formatting_default ++ } ++}; ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/pool.c b/fs/reiser4/pool.c +new file mode 100644 +index 0000000..f4303da +--- /dev/null ++++ b/fs/reiser4/pool.c +@@ -0,0 +1,234 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Fast pool allocation. ++ ++ There are situations when some sub-system normally asks memory allocator ++ for only few objects, but under some circumstances could require much ++ more. Typical and actually motivating example is tree balancing. It needs ++ to keep track of nodes that were involved into it, and it is well-known ++ that in reasonable packed balanced tree most (92.938121%) percent of all ++ balancings end up after working with only few nodes (3.141592 on ++ average). But in rare cases balancing can involve much more nodes ++ (3*tree_height+1 in extremal situation). ++ ++ On the one hand, we don't want to resort to dynamic allocation (slab, ++ malloc(), etc.) to allocate data structures required to keep track of ++ nodes during balancing. On the other hand, we cannot statically allocate ++ required amount of space on the stack, because first: it is useless wastage ++ of precious resource, and second: this amount is unknown in advance (tree ++ height can change). ++ ++ Pools, implemented in this file are solution for this problem: ++ ++ - some configurable amount of objects is statically preallocated on the ++ stack ++ ++ - if this preallocated pool is exhausted and more objects is requested ++ they are allocated dynamically. ++ ++ Pools encapsulate distinction between statically and dynamically allocated ++ objects. Both allocation and recycling look exactly the same. ++ ++ To keep track of dynamically allocated objects, pool adds its own linkage ++ to each object. ++ ++ NOTE-NIKITA This linkage also contains some balancing-specific data. This ++ is not perfect. On the other hand, balancing is currently the only client ++ of pool code. ++ ++ NOTE-NIKITA Another desirable feature is to rewrite all pool manipulation ++ functions in the style of tslist/tshash, i.e., make them unreadable, but ++ type-safe. ++ ++*/ ++ ++#include "debug.h" ++#include "pool.h" ++#include "super.h" ++ ++#include ++#include ++ ++/* initialize new pool object */ ++static void reiser4_init_pool_obj(reiser4_pool_header * h /* pool object to ++ * initialize */ ) ++{ ++ INIT_LIST_HEAD(&h->usage_linkage); ++ INIT_LIST_HEAD(&h->level_linkage); ++ INIT_LIST_HEAD(&h->extra_linkage); ++} ++ ++/* initialize new pool */ ++void reiser4_init_pool(reiser4_pool * pool /* pool to initialize */ , ++ size_t obj_size /* size of objects in @pool */ , ++ int num_of_objs /* number of preallocated objects */ , ++ char *data /* area for preallocated objects */ ) ++{ ++ reiser4_pool_header *h; ++ int i; ++ ++ assert("nikita-955", pool != NULL); ++ assert("nikita-1044", obj_size > 0); ++ assert("nikita-956", num_of_objs >= 0); ++ assert("nikita-957", data != NULL); ++ ++ memset(pool, 0, sizeof *pool); ++ pool->obj_size = obj_size; ++ pool->data = data; ++ INIT_LIST_HEAD(&pool->free); ++ INIT_LIST_HEAD(&pool->used); ++ INIT_LIST_HEAD(&pool->extra); ++ memset(data, 0, obj_size * num_of_objs); ++ for (i = 0; i < num_of_objs; ++i) { ++ h = (reiser4_pool_header *) (data + i * obj_size); ++ reiser4_init_pool_obj(h); ++ /* add pool header to the end of pool's free list */ ++ list_add_tail(&h->usage_linkage, &pool->free); ++ } ++} ++ ++/* release pool resources ++ ++ Release all resources acquired by this pool, specifically, dynamically ++ allocated objects. ++ ++*/ ++void reiser4_done_pool(reiser4_pool * pool UNUSED_ARG /* pool to destroy */ ) ++{ ++} ++ ++/* allocate carry object from pool ++ ++ First, try to get preallocated object. If this fails, resort to dynamic ++ allocation. ++ ++*/ ++static void *reiser4_pool_alloc(reiser4_pool * pool /* pool to allocate object ++ * from */ ) ++{ ++ reiser4_pool_header *result; ++ ++ assert("nikita-959", pool != NULL); ++ ++ if (!list_empty(&pool->free)) { ++ struct list_head *linkage; ++ ++ linkage = pool->free.next; ++ list_del(linkage); ++ INIT_LIST_HEAD(linkage); ++ result = list_entry(linkage, reiser4_pool_header, usage_linkage); ++ BUG_ON(!list_empty(&result->level_linkage) || ++ !list_empty(&result->extra_linkage)); ++ } else { ++ /* pool is empty. Extra allocations don't deserve dedicated ++ slab to be served from, as they are expected to be rare. */ ++ result = kmalloc(pool->obj_size, reiser4_ctx_gfp_mask_get()); ++ if (result != 0) { ++ reiser4_init_pool_obj(result); ++ list_add(&result->extra_linkage, &pool->extra); ++ } else ++ return ERR_PTR(RETERR(-ENOMEM)); ++ BUG_ON(!list_empty(&result->usage_linkage) || ++ !list_empty(&result->level_linkage)); ++ } ++ ++pool->objs; ++ list_add(&result->usage_linkage, &pool->used); ++ memset(result + 1, 0, pool->obj_size - sizeof *result); ++ return result; ++} ++ ++/* return object back to the pool */ ++void reiser4_pool_free(reiser4_pool * pool, reiser4_pool_header * h /* pool to return object back ++ * into */ ) ++{ ++ assert("nikita-961", h != NULL); ++ assert("nikita-962", pool != NULL); ++ ++ --pool->objs; ++ assert("nikita-963", pool->objs >= 0); ++ ++ list_del_init(&h->usage_linkage); ++ list_del_init(&h->level_linkage); ++ ++ if (list_empty(&h->extra_linkage)) ++ /* ++ * pool header is not an extra one. Push it onto free list ++ * using usage_linkage ++ */ ++ list_add(&h->usage_linkage, &pool->free); ++ else { ++ /* remove pool header from pool's extra list and kfree it */ ++ list_del(&h->extra_linkage); ++ kfree(h); ++ } ++} ++ ++/* add new object to the carry level list ++ ++ Carry level is FIFO most of the time, but not always. Complications arise ++ when make_space() function tries to go to the left neighbor and thus adds ++ carry node before existing nodes, and also, when updating delimiting keys ++ after moving data between two nodes, we want left node to be locked before ++ right node. ++ ++ Latter case is confusing at the first glance. Problem is that COP_UPDATE ++ opration that updates delimiting keys is sometimes called with two nodes ++ (when data are moved between two nodes) and sometimes with only one node ++ (when leftmost item is deleted in a node). In any case operation is ++ supplied with at least node whose left delimiting key is to be updated ++ (that is "right" node). ++ ++*/ ++reiser4_pool_header *reiser4_add_obj(reiser4_pool * pool /* pool from which to ++ * allocate new object ++ */, ++ struct list_head *list /* list where to add ++ * object */, ++ pool_ordering order /* where to add */, ++ reiser4_pool_header * reference ++ /* after (or before) which existing object ++ to add */) ++{ ++ reiser4_pool_header *result; ++ ++ assert("nikita-972", pool != NULL); ++ ++ result = reiser4_pool_alloc(pool); ++ if (IS_ERR(result)) ++ return result; ++ ++ assert("nikita-973", result != NULL); ++ ++ switch (order) { ++ case POOLO_BEFORE: ++ __list_add(&result->level_linkage, ++ reference->level_linkage.prev, ++ &reference->level_linkage); ++ break; ++ case POOLO_AFTER: ++ __list_add(&result->level_linkage, ++ &reference->level_linkage, ++ reference->level_linkage.next); ++ break; ++ case POOLO_LAST: ++ list_add_tail(&result->level_linkage, list); ++ break; ++ case POOLO_FIRST: ++ list_add(&result->level_linkage, list); ++ break; ++ default: ++ wrong_return_value("nikita-927", "order"); ++ } ++ return result; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/pool.h b/fs/reiser4/pool.h +new file mode 100644 +index 0000000..174d3c6 +--- /dev/null ++++ b/fs/reiser4/pool.h +@@ -0,0 +1,55 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Fast pool allocation */ ++ ++#ifndef __REISER4_POOL_H__ ++#define __REISER4_POOL_H__ ++ ++#include ++ ++typedef struct reiser4_pool { ++ size_t obj_size; ++ int objs; ++ char *data; ++ struct list_head free; ++ struct list_head used; ++ struct list_head extra; ++} reiser4_pool; ++ ++typedef struct reiser4_pool_header { ++ /* object is either on free or "used" lists */ ++ struct list_head usage_linkage; ++ struct list_head level_linkage; ++ struct list_head extra_linkage; ++} reiser4_pool_header; ++ ++typedef enum { ++ POOLO_BEFORE, ++ POOLO_AFTER, ++ POOLO_LAST, ++ POOLO_FIRST ++} pool_ordering; ++ ++/* pool manipulation functions */ ++ ++extern void reiser4_init_pool(reiser4_pool * pool, size_t obj_size, ++ int num_of_objs, char *data); ++extern void reiser4_done_pool(reiser4_pool * pool); ++extern void reiser4_pool_free(reiser4_pool * pool, reiser4_pool_header * h); ++reiser4_pool_header *reiser4_add_obj(reiser4_pool * pool, ++ struct list_head * list, ++ pool_ordering order, ++ reiser4_pool_header * reference); ++ ++/* __REISER4_POOL_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/readahead.c b/fs/reiser4/readahead.c +new file mode 100644 +index 0000000..8e5a9f1 +--- /dev/null ++++ b/fs/reiser4/readahead.c +@@ -0,0 +1,138 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#include "forward.h" ++#include "tree.h" ++#include "tree_walk.h" ++#include "super.h" ++#include "inode.h" ++#include "key.h" ++#include "znode.h" ++ ++#include /* for totalram_pages */ ++ ++void reiser4_init_ra_info(ra_info_t * rai) ++{ ++ rai->key_to_stop = *reiser4_min_key(); ++} ++ ++/* global formatted node readahead parameter. It can be set by mount option -o readahead:NUM:1 */ ++static inline int ra_adjacent_only(int flags) ++{ ++ return flags & RA_ADJACENT_ONLY; ++} ++ ++/* this is used by formatted_readahead to decide whether read for right neighbor of node is to be issued. It returns 1 ++ if right neighbor's first key is less or equal to readahead's stop key */ ++static int should_readahead_neighbor(znode * node, ra_info_t * info) ++{ ++ int result; ++ ++ read_lock_dk(znode_get_tree(node)); ++ result = keyle(znode_get_rd_key(node), &info->key_to_stop); ++ read_unlock_dk(znode_get_tree(node)); ++ return result; ++} ++ ++#define LOW_MEM_PERCENTAGE (5) ++ ++static int low_on_memory(void) ++{ ++ unsigned int freepages; ++ ++ freepages = nr_free_pages(); ++ return freepages < (totalram_pages * LOW_MEM_PERCENTAGE / 100); ++} ++ ++/* start read for @node and for a few of its right neighbors */ ++void formatted_readahead(znode * node, ra_info_t * info) ++{ ++ ra_params_t *ra_params; ++ znode *cur; ++ int i; ++ int grn_flags; ++ lock_handle next_lh; ++ ++ /* do nothing if node block number has not been assigned to node (which means it is still in cache). */ ++ if (reiser4_blocknr_is_fake(znode_get_block(node))) ++ return; ++ ++ ra_params = get_current_super_ra_params(); ++ ++ if (znode_page(node) == NULL) ++ jstartio(ZJNODE(node)); ++ ++ if (znode_get_level(node) != LEAF_LEVEL) ++ return; ++ ++ /* don't waste memory for read-ahead when low on memory */ ++ if (low_on_memory()) ++ return; ++ ++ /* We can have locked nodes on upper tree levels, in this situation lock ++ priorities do not help to resolve deadlocks, we have to use TRY_LOCK ++ here. */ ++ grn_flags = (GN_CAN_USE_UPPER_LEVELS | GN_TRY_LOCK); ++ ++ i = 0; ++ cur = zref(node); ++ init_lh(&next_lh); ++ while (i < ra_params->max) { ++ const reiser4_block_nr *nextblk; ++ ++ if (!should_readahead_neighbor(cur, info)) ++ break; ++ ++ if (reiser4_get_right_neighbor ++ (&next_lh, cur, ZNODE_READ_LOCK, grn_flags)) ++ break; ++ ++ nextblk = znode_get_block(next_lh.node); ++ if (reiser4_blocknr_is_fake(nextblk) || ++ (ra_adjacent_only(ra_params->flags) ++ && *nextblk != *znode_get_block(cur) + 1)) { ++ break; ++ } ++ ++ zput(cur); ++ cur = zref(next_lh.node); ++ done_lh(&next_lh); ++ if (znode_page(cur) == NULL) ++ jstartio(ZJNODE(cur)); ++ else ++ /* Do not scan read-ahead window if pages already ++ * allocated (and i/o already started). */ ++ break; ++ ++ i++; ++ } ++ zput(cur); ++ done_lh(&next_lh); ++} ++ ++void reiser4_readdir_readahead_init(struct inode *dir, tap_t * tap) ++{ ++ reiser4_key *stop_key; ++ ++ assert("nikita-3542", dir != NULL); ++ assert("nikita-3543", tap != NULL); ++ ++ stop_key = &tap->ra_info.key_to_stop; ++ /* initialize readdir readahead information: include into readahead ++ * stat data of all files of the directory */ ++ set_key_locality(stop_key, get_inode_oid(dir)); ++ set_key_type(stop_key, KEY_SD_MINOR); ++ set_key_ordering(stop_key, get_key_ordering(reiser4_max_key())); ++ set_key_objectid(stop_key, get_key_objectid(reiser4_max_key())); ++ set_key_offset(stop_key, get_key_offset(reiser4_max_key())); ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ End: ++*/ +diff --git a/fs/reiser4/readahead.h b/fs/reiser4/readahead.h +new file mode 100644 +index 0000000..524c574 +--- /dev/null ++++ b/fs/reiser4/readahead.h +@@ -0,0 +1,48 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#ifndef __READAHEAD_H__ ++#define __READAHEAD_H__ ++ ++#include "key.h" ++ ++typedef enum { ++ RA_ADJACENT_ONLY = 1, /* only requests nodes which are adjacent. Default is NO (not only adjacent) */ ++} ra_global_flags; ++ ++/* reiser4 super block has a field of this type. It controls readahead during tree traversals */ ++typedef struct formatted_read_ahead_params { ++ unsigned long max; /* request not more than this amount of nodes. Default is totalram_pages / 4 */ ++ int flags; ++} ra_params_t; ++ ++typedef struct { ++ reiser4_key key_to_stop; ++} ra_info_t; ++ ++void formatted_readahead(znode *, ra_info_t *); ++void reiser4_init_ra_info(ra_info_t * rai); ++ ++struct reiser4_file_ra_state { ++ loff_t start; /* Current window */ ++ loff_t size; ++ loff_t next_size; /* Next window size */ ++ loff_t ahead_start; /* Ahead window */ ++ loff_t ahead_size; ++ loff_t max_window_size; /* Maximum readahead window */ ++ loff_t slow_start; /* enlarging r/a size algorithm. */ ++}; ++ ++extern void reiser4_readdir_readahead_init(struct inode *dir, tap_t * tap); ++ ++/* __READAHEAD_H__ */ ++#endif ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/reiser4.h b/fs/reiser4/reiser4.h +new file mode 100644 +index 0000000..77d720e +--- /dev/null ++++ b/fs/reiser4/reiser4.h +@@ -0,0 +1,269 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* definitions of common constants used by reiser4 */ ++ ++#if !defined( __REISER4_H__ ) ++#define __REISER4_H__ ++ ++#include /* for HZ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * reiser4 compilation options. ++ */ ++ ++#if defined(CONFIG_REISER4_DEBUG) ++/* turn on assertion checks */ ++#define REISER4_DEBUG (1) ++#else ++#define REISER4_DEBUG (0) ++#endif ++ ++#if defined(CONFIG_ZLIB_INFLATE) ++/* turn on zlib */ ++#define REISER4_ZLIB (1) ++#else ++#define REISER4_ZLIB (0) ++#endif ++ ++#if defined(CONFIG_CRYPTO_SHA256) ++#define REISER4_SHA256 (1) ++#else ++#define REISER4_SHA256 (0) ++#endif ++ ++/* ++ * Turn on large keys mode. In his mode (which is default), reiser4 key has 4 ++ * 8-byte components. In the old "small key" mode, it's 3 8-byte ++ * components. Additional component, referred to as "ordering" is used to ++ * order items from which given object is composed of. As such, ordering is ++ * placed between locality and objectid. For directory item ordering contains ++ * initial prefix of the file name this item is for. This sorts all directory ++ * items within given directory lexicographically (but see ++ * fibration.[ch]). For file body and stat-data, ordering contains initial ++ * prefix of the name file was initially created with. In the common case ++ * (files with single name) this allows to order file bodies and stat-datas in ++ * the same order as their respective directory entries, thus speeding up ++ * readdir. ++ * ++ * Note, that kernel can only mount file system with the same key size as one ++ * it is compiled for, so flipping this option may render your data ++ * inaccessible. ++ */ ++#define REISER4_LARGE_KEY (1) ++/*#define REISER4_LARGE_KEY (0)*/ ++ ++/*#define GUESS_EXISTS 1*/ ++ ++/* ++ * PLEASE update fs/reiser4/kattr.c:show_options() when adding new compilation ++ * option ++ */ ++ ++extern const char *REISER4_SUPER_MAGIC_STRING; ++extern const int REISER4_MAGIC_OFFSET; /* offset to magic string from the ++ * beginning of device */ ++ ++/* here go tunable parameters that are not worth special entry in kernel ++ configuration */ ++ ++/* default number of slots in coord-by-key caches */ ++#define CBK_CACHE_SLOTS (16) ++/* how many elementary tree operation to carry on the next level */ ++#define CARRIES_POOL_SIZE (5) ++/* size of pool of preallocated nodes for carry process. */ ++#define NODES_LOCKED_POOL_SIZE (5) ++ ++#define REISER4_NEW_NODE_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT) ++#define REISER4_NEW_EXTENT_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT) ++#define REISER4_PASTE_FLAGS (COPI_GO_LEFT) ++#define REISER4_INSERT_FLAGS (COPI_GO_LEFT) ++ ++/* we are supporting reservation of disk space on uid basis */ ++#define REISER4_SUPPORT_UID_SPACE_RESERVATION (0) ++/* we are supporting reservation of disk space for groups */ ++#define REISER4_SUPPORT_GID_SPACE_RESERVATION (0) ++/* we are supporting reservation of disk space for root */ ++#define REISER4_SUPPORT_ROOT_SPACE_RESERVATION (0) ++/* we use rapid flush mode, see flush.c for comments. */ ++#define REISER4_USE_RAPID_FLUSH (1) ++ ++/* ++ * set this to 0 if you don't want to use wait-for-flush in ->writepage(). ++ */ ++#define REISER4_USE_ENTD (1) ++ ++/* key allocation is Plan-A */ ++#define REISER4_PLANA_KEY_ALLOCATION (1) ++/* key allocation follows good old 3.x scheme */ ++#define REISER4_3_5_KEY_ALLOCATION (0) ++ ++/* size of hash-table for znodes */ ++#define REISER4_ZNODE_HASH_TABLE_SIZE (1 << 13) ++ ++/* number of buckets in lnode hash-table */ ++#define LNODE_HTABLE_BUCKETS (1024) ++ ++/* some ridiculously high maximal limit on height of znode tree. This ++ is used in declaration of various per level arrays and ++ to allocate stattistics gathering array for per-level stats. */ ++#define REISER4_MAX_ZTREE_HEIGHT (8) ++ ++#define REISER4_PANIC_MSG_BUFFER_SIZE (1024) ++ ++/* If array contains less than REISER4_SEQ_SEARCH_BREAK elements then, ++ sequential search is on average faster than binary. This is because ++ of better optimization and because sequential search is more CPU ++ cache friendly. This number (25) was found by experiments on dual AMD ++ Athlon(tm), 1400MHz. ++ ++ NOTE: testing in kernel has shown that binary search is more effective than ++ implied by results of the user level benchmarking. Probably because in the ++ node keys are separated by other data. So value was adjusted after few ++ tests. More thorough tuning is needed. ++*/ ++#define REISER4_SEQ_SEARCH_BREAK (3) ++ ++/* don't allow tree to be lower than this */ ++#define REISER4_MIN_TREE_HEIGHT (TWIG_LEVEL) ++ ++/* NOTE NIKITA this is no longer used: maximal atom size is auto-adjusted to ++ * available memory. */ ++/* Default value of maximal atom size. Can be ovewritten by ++ tmgr.atom_max_size mount option. By default infinity. */ ++#define REISER4_ATOM_MAX_SIZE ((unsigned)(~0)) ++ ++/* Default value of maximal atom age (in jiffies). After reaching this age ++ atom will be forced to commit, either synchronously or asynchronously. Can ++ be overwritten by tmgr.atom_max_age mount option. */ ++#define REISER4_ATOM_MAX_AGE (600 * HZ) ++ ++/* sleeping period for ktxnmrgd */ ++#define REISER4_TXNMGR_TIMEOUT (5 * HZ) ++ ++/* timeout to wait for ent thread in writepage. Default: 3 milliseconds. */ ++#define REISER4_ENTD_TIMEOUT (3 * HZ / 1000) ++ ++/* start complaining after that many restarts in coord_by_key(). ++ ++ This either means incredibly heavy contention for this part of a tree, or ++ some corruption or bug. ++*/ ++#define REISER4_CBK_ITERATIONS_LIMIT (100) ++ ++/* return -EIO after that many iterations in coord_by_key(). ++ ++ I have witnessed more than 800 iterations (in 30 thread test) before cbk ++ finished. --nikita ++*/ ++#define REISER4_MAX_CBK_ITERATIONS 500000 ++ ++/* put a per-inode limit on maximal number of directory entries with identical ++ keys in hashed directory. ++ ++ Disable this until inheritance interfaces stabilize: we need some way to ++ set per directory limit. ++*/ ++#define REISER4_USE_COLLISION_LIMIT (0) ++ ++/* If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty leaf-level blocks it ++ will force them to be relocated. */ ++#define FLUSH_RELOCATE_THRESHOLD 64 ++/* If flush finds can find a block allocation closer than at most FLUSH_RELOCATE_DISTANCE ++ from the preceder it will relocate to that position. */ ++#define FLUSH_RELOCATE_DISTANCE 64 ++ ++/* If we have written this much or more blocks before encountering busy jnode ++ in flush list - abort flushing hoping that next time we get called ++ this jnode will be clean already, and we will save some seeks. */ ++#define FLUSH_WRITTEN_THRESHOLD 50 ++ ++/* The maximum number of nodes to scan left on a level during flush. */ ++#define FLUSH_SCAN_MAXNODES 10000 ++ ++/* per-atom limit of flushers */ ++#define ATOM_MAX_FLUSHERS (1) ++ ++/* default tracing buffer size */ ++#define REISER4_TRACE_BUF_SIZE (1 << 15) ++ ++/* what size units of IO we would like cp, etc., to use, in writing to ++ reiser4. In bytes. ++ ++ Can be overwritten by optimal_io_size mount option. ++*/ ++#define REISER4_OPTIMAL_IO_SIZE (64 * 1024) ++ ++/* see comments in inode.c:oid_to_uino() */ ++#define REISER4_UINO_SHIFT (1 << 30) ++ ++/* Mark function argument as unused to avoid compiler warnings. */ ++#define UNUSED_ARG __attribute__((unused)) ++ ++#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3) ++#define NONNULL __attribute__((nonnull)) ++#else ++#define NONNULL ++#endif ++ ++/* master super block offset in bytes.*/ ++#define REISER4_MASTER_OFFSET 65536 ++ ++/* size of VFS block */ ++#define VFS_BLKSIZE 512 ++/* number of bits in size of VFS block (512==2^9) */ ++#define VFS_BLKSIZE_BITS 9 ++ ++#define REISER4_I reiser4_inode_data ++ ++/* implication */ ++#define ergo( antecedent, consequent ) ( !( antecedent ) || ( consequent ) ) ++/* logical equivalence */ ++#define equi( p1, p2 ) ( ergo( ( p1 ), ( p2 ) ) && ergo( ( p2 ), ( p1 ) ) ) ++ ++#define sizeof_array(x) ((int) (sizeof(x) / sizeof(x[0]))) ++ ++#define NOT_YET (0) ++ ++/** Reiser4 specific error codes **/ ++ ++#define REISER4_ERROR_CODE_BASE 500 ++ ++/* Neighbor is not available (side neighbor or parent) */ ++#define E_NO_NEIGHBOR (REISER4_ERROR_CODE_BASE) ++ ++/* Node was not found in cache */ ++#define E_NOT_IN_CACHE (REISER4_ERROR_CODE_BASE + 1) ++ ++/* node has no free space enough for completion of balancing operation */ ++#define E_NODE_FULL (REISER4_ERROR_CODE_BASE + 2) ++ ++/* repeat operation */ ++#define E_REPEAT (REISER4_ERROR_CODE_BASE + 3) ++ ++/* deadlock happens */ ++#define E_DEADLOCK (REISER4_ERROR_CODE_BASE + 4) ++ ++/* operation cannot be performed, because it would block and non-blocking mode ++ * was requested. */ ++#define E_BLOCK (REISER4_ERROR_CODE_BASE + 5) ++ ++/* wait some event (depends on context), then repeat */ ++#define E_WAIT (REISER4_ERROR_CODE_BASE + 6) ++ ++#endif /* __REISER4_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/safe_link.c b/fs/reiser4/safe_link.c +new file mode 100644 +index 0000000..1253bdb +--- /dev/null ++++ b/fs/reiser4/safe_link.c +@@ -0,0 +1,351 @@ ++/* Copyright 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Safe-links. */ ++ ++/* ++ * Safe-links are used to maintain file system consistency during operations ++ * that spawns multiple transactions. For example: ++ * ++ * 1. Unlink. UNIX supports "open-but-unlinked" files, that is files ++ * without user-visible names in the file system, but still opened by some ++ * active process. What happens here is that unlink proper (i.e., removal ++ * of the last file name) and file deletion (truncate of file body to zero ++ * and deletion of stat-data, that happens when last file descriptor is ++ * closed), may belong to different transactions T1 and T2. If a crash ++ * happens after T1 commit, but before T2 commit, on-disk file system has ++ * a file without name, that is, disk space leak. ++ * ++ * 2. Truncate. Truncate of large file may spawn multiple transactions. If ++ * system crashes while truncate was in-progress, file is left partially ++ * truncated, which violates "atomicity guarantees" of reiser4, viz. that ++ * every system is atomic. ++ * ++ * Safe-links address both above cases. Basically, safe-link is a way post ++ * some operation to be executed during commit of some other transaction than ++ * current one. (Another way to look at the safe-link is to interpret it as a ++ * logical logging.) ++ * ++ * Specifically, at the beginning of unlink safe-link in inserted in the ++ * tree. This safe-link is normally removed by file deletion code (during ++ * transaction T2 in the above terms). Truncate also inserts safe-link that is ++ * normally removed when truncate operation is finished. ++ * ++ * This means, that in the case of "clean umount" there are no safe-links in ++ * the tree. If safe-links are observed during mount, it means that (a) system ++ * was terminated abnormally, and (b) safe-link correspond to the "pending" ++ * (i.e., not finished) operations that were in-progress during system ++ * termination. Each safe-link record enough information to complete ++ * corresponding operation, and mount simply "replays" them (hence, the ++ * analogy with the logical logging). ++ * ++ * Safe-links are implemented as blackbox items (see ++ * plugin/item/blackbox.[ch]). ++ * ++ * For the reference: ext3 also has similar mechanism, it's called "an orphan ++ * list" there. ++ */ ++ ++#include "safe_link.h" ++#include "debug.h" ++#include "inode.h" ++ ++#include "plugin/item/blackbox.h" ++ ++#include ++ ++/* ++ * On-disk format of safe-link. ++ */ ++typedef struct safelink { ++ reiser4_key sdkey; /* key of stat-data for the file safe-link is ++ * for */ ++ d64 size; /* size to which file should be truncated */ ++} safelink_t; ++ ++/* ++ * locality where safe-link items are stored. Next to the objectid of root ++ * directory. ++ */ ++static oid_t safe_link_locality(reiser4_tree * tree) ++{ ++ return get_key_objectid(get_super_private(tree->super)->df_plug-> ++ root_dir_key(tree->super)) + 1; ++} ++ ++/* ++ Construct a key for the safe-link. Key has the following format: ++ ++| 60 | 4 | 64 | 4 | 60 | 64 | +++---------------+---+------------------+---+---------------+------------------+ ++| locality | 0 | 0 | 0 | objectid | link type | +++---------------+---+------------------+---+---------------+------------------+ ++| | | | | ++| 8 bytes | 8 bytes | 8 bytes | 8 bytes | ++ ++ This is in large keys format. In small keys format second 8 byte chunk is ++ out. Locality is a constant returned by safe_link_locality(). objectid is ++ an oid of a file on which operation protected by this safe-link is ++ performed. link-type is used to distinguish safe-links for different ++ operations. ++ ++ */ ++static reiser4_key *build_link_key(reiser4_tree * tree, oid_t oid, ++ reiser4_safe_link_t link, reiser4_key * key) ++{ ++ reiser4_key_init(key); ++ set_key_locality(key, safe_link_locality(tree)); ++ set_key_objectid(key, oid); ++ set_key_offset(key, link); ++ return key; ++} ++ ++/* ++ * how much disk space is necessary to insert and remove (in the ++ * error-handling path) safe-link. ++ */ ++static __u64 safe_link_tograb(reiser4_tree * tree) ++{ ++ return ++ /* insert safe link */ ++ estimate_one_insert_item(tree) + ++ /* remove safe link */ ++ estimate_one_item_removal(tree) + ++ /* drill to the leaf level during insertion */ ++ 1 + estimate_one_insert_item(tree) + ++ /* ++ * possible update of existing safe-link. Actually, if ++ * safe-link existed already (we failed to remove it), then no ++ * insertion is necessary, so this term is already "covered", ++ * but for simplicity let's left it. ++ */ ++ 1; ++} ++ ++/* ++ * grab enough disk space to insert and remove (in the error-handling path) ++ * safe-link. ++ */ ++int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags) ++{ ++ int result; ++ ++ grab_space_enable(); ++ /* The sbinfo->delete_mutex can be taken here. ++ * safe_link_release() should be called before leaving reiser4 ++ * context. */ ++ result = ++ reiser4_grab_reserved(tree->super, safe_link_tograb(tree), flags); ++ grab_space_enable(); ++ return result; ++} ++ ++/* ++ * release unused disk space reserved by safe_link_grab(). ++ */ ++void safe_link_release(reiser4_tree * tree) ++{ ++ reiser4_release_reserved(tree->super); ++} ++ ++/* ++ * insert into tree safe-link for operation @link on inode @inode. ++ */ ++int safe_link_add(struct inode *inode, reiser4_safe_link_t link) ++{ ++ reiser4_key key; ++ safelink_t sl; ++ int length; ++ int result; ++ reiser4_tree *tree; ++ ++ build_sd_key(inode, &sl.sdkey); ++ length = sizeof sl.sdkey; ++ ++ if (link == SAFE_TRUNCATE) { ++ /* ++ * for truncate we have to store final file length also, ++ * expand item. ++ */ ++ length += sizeof(sl.size); ++ put_unaligned(cpu_to_le64(inode->i_size), &sl.size); ++ } ++ tree = reiser4_tree_by_inode(inode); ++ build_link_key(tree, get_inode_oid(inode), link, &key); ++ ++ result = store_black_box(tree, &key, &sl, length); ++ if (result == -EEXIST) ++ result = update_black_box(tree, &key, &sl, length); ++ return result; ++} ++ ++/* ++ * remove safe-link corresponding to the operation @link on inode @inode from ++ * the tree. ++ */ ++int safe_link_del(reiser4_tree * tree, oid_t oid, reiser4_safe_link_t link) ++{ ++ reiser4_key key; ++ ++ return kill_black_box(tree, build_link_key(tree, oid, link, &key)); ++} ++ ++/* ++ * in-memory structure to keep information extracted from safe-link. This is ++ * used to iterate over all safe-links. ++ */ ++typedef struct { ++ reiser4_tree *tree; /* internal tree */ ++ reiser4_key key; /* safe-link key */ ++ reiser4_key sdkey; /* key of object stat-data */ ++ reiser4_safe_link_t link; /* safe-link type */ ++ oid_t oid; /* object oid */ ++ __u64 size; /* final size for truncate */ ++} safe_link_context; ++ ++/* ++ * start iterating over all safe-links. ++ */ ++static void safe_link_iter_begin(reiser4_tree * tree, safe_link_context * ctx) ++{ ++ ctx->tree = tree; ++ reiser4_key_init(&ctx->key); ++ set_key_locality(&ctx->key, safe_link_locality(tree)); ++ set_key_objectid(&ctx->key, get_key_objectid(reiser4_max_key())); ++ set_key_offset(&ctx->key, get_key_offset(reiser4_max_key())); ++} ++ ++/* ++ * return next safe-link. ++ */ ++static int safe_link_iter_next(safe_link_context * ctx) ++{ ++ int result; ++ safelink_t sl; ++ ++ result = load_black_box(ctx->tree, &ctx->key, &sl, sizeof sl, 0); ++ if (result == 0) { ++ ctx->oid = get_key_objectid(&ctx->key); ++ ctx->link = get_key_offset(&ctx->key); ++ ctx->sdkey = sl.sdkey; ++ if (ctx->link == SAFE_TRUNCATE) ++ ctx->size = le64_to_cpu(get_unaligned(&sl.size)); ++ } ++ return result; ++} ++ ++/* ++ * check are there any more safe-links left in the tree. ++ */ ++static int safe_link_iter_finished(safe_link_context * ctx) ++{ ++ return get_key_locality(&ctx->key) != safe_link_locality(ctx->tree); ++} ++ ++/* ++ * finish safe-link iteration. ++ */ ++static void safe_link_iter_end(safe_link_context * ctx) ++{ ++ /* nothing special */ ++} ++ ++/* ++ * process single safe-link. ++ */ ++static int process_safelink(struct super_block *super, reiser4_safe_link_t link, ++ reiser4_key * sdkey, oid_t oid, __u64 size) ++{ ++ struct inode *inode; ++ int result; ++ ++ /* ++ * obtain object inode by reiser4_iget(), then call object plugin ++ * ->safelink() method to do actual work, then delete safe-link on ++ * success. ++ */ ++ inode = reiser4_iget(super, sdkey, 1); ++ if (!IS_ERR(inode)) { ++ file_plugin *fplug; ++ ++ fplug = inode_file_plugin(inode); ++ assert("nikita-3428", fplug != NULL); ++ assert("", oid == get_inode_oid(inode)); ++ if (fplug->safelink != NULL) { ++ /* reiser4_txn_restart_current is not necessary because ++ * mounting is signle thread. However, without it ++ * deadlock detection code will complain (see ++ * nikita-3361). */ ++ reiser4_txn_restart_current(); ++ result = fplug->safelink(inode, link, size); ++ } else { ++ warning("nikita-3430", ++ "Cannot handle safelink for %lli", ++ (unsigned long long)oid); ++ reiser4_print_key("key", sdkey); ++ result = 0; ++ } ++ if (result != 0) { ++ warning("nikita-3431", ++ "Error processing safelink for %lli: %i", ++ (unsigned long long)oid, result); ++ } ++ reiser4_iget_complete(inode); ++ iput(inode); ++ if (result == 0) { ++ result = safe_link_grab(reiser4_get_tree(super), BA_CAN_COMMIT); ++ if (result == 0) ++ result = ++ safe_link_del(reiser4_get_tree(super), oid, link); ++ safe_link_release(reiser4_get_tree(super)); ++ /* ++ * restart transaction: if there was large number of ++ * safe-links, their processing may fail to fit into ++ * single transaction. ++ */ ++ if (result == 0) ++ reiser4_txn_restart_current(); ++ } ++ } else ++ result = PTR_ERR(inode); ++ return result; ++} ++ ++/* ++ * iterate over all safe-links in the file-system processing them one by one. ++ */ ++int process_safelinks(struct super_block *super) ++{ ++ safe_link_context ctx; ++ int result; ++ ++ if (rofs_super(super)) ++ /* do nothing on the read-only file system */ ++ return 0; ++ safe_link_iter_begin(&get_super_private(super)->tree, &ctx); ++ result = 0; ++ do { ++ result = safe_link_iter_next(&ctx); ++ if (safe_link_iter_finished(&ctx) || result == -ENOENT) { ++ result = 0; ++ break; ++ } ++ if (result == 0) ++ result = process_safelink(super, ctx.link, ++ &ctx.sdkey, ctx.oid, ++ ctx.size); ++ } while (result == 0); ++ safe_link_iter_end(&ctx); ++ return result; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/safe_link.h b/fs/reiser4/safe_link.h +new file mode 100644 +index 0000000..7ae4458 +--- /dev/null ++++ b/fs/reiser4/safe_link.h +@@ -0,0 +1,29 @@ ++/* Copyright 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Safe-links. See safe_link.c for details. */ ++ ++#if !defined( __FS_SAFE_LINK_H__ ) ++#define __FS_SAFE_LINK_H__ ++ ++#include "tree.h" ++ ++int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags); ++void safe_link_release(reiser4_tree * tree); ++int safe_link_add(struct inode *inode, reiser4_safe_link_t link); ++int safe_link_del(reiser4_tree *, oid_t oid, reiser4_safe_link_t link); ++ ++int process_safelinks(struct super_block *super); ++ ++/* __FS_SAFE_LINK_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/seal.c b/fs/reiser4/seal.c +new file mode 100644 +index 0000000..c91cf52 +--- /dev/null ++++ b/fs/reiser4/seal.c +@@ -0,0 +1,218 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++/* Seals implementation. */ ++/* Seals are "weak" tree pointers. They are analogous to tree coords in ++ allowing to bypass tree traversal. But normal usage of coords implies that ++ node pointed to by coord is locked, whereas seals don't keep a lock (or ++ even a reference) to znode. In stead, each znode contains a version number, ++ increased on each znode modification. This version number is copied into a ++ seal when seal is created. Later, one can "validate" seal by calling ++ reiser4_seal_validate(). If znode is in cache and its version number is ++ still the same, seal is "pristine" and coord associated with it can be ++ re-used immediately. ++ ++ If, on the other hand, znode is out of cache, or it is obviously different ++ one from the znode seal was initially attached to (for example, it is on ++ the different level, or is being removed from the tree), seal is ++ irreparably invalid ("burned") and tree traversal has to be repeated. ++ ++ Otherwise, there is some hope, that while znode was modified (and seal was ++ "broken" as a result), key attached to the seal is still in the node. This ++ is checked by first comparing this key with delimiting keys of node and, if ++ key is ok, doing intra-node lookup. ++ ++ Znode version is maintained in the following way: ++ ++ there is reiser4_tree.znode_epoch counter. Whenever new znode is created, ++ znode_epoch is incremented and its new value is stored in ->version field ++ of new znode. Whenever znode is dirtied (which means it was probably ++ modified), znode_epoch is also incremented and its new value is stored in ++ znode->version. This is done so, because just incrementing znode->version ++ on each update is not enough: it may so happen, that znode get deleted, new ++ znode is allocated for the same disk block and gets the same version ++ counter, tricking seal code into false positive. ++*/ ++ ++#include "forward.h" ++#include "debug.h" ++#include "key.h" ++#include "coord.h" ++#include "seal.h" ++#include "plugin/item/item.h" ++#include "plugin/node/node.h" ++#include "jnode.h" ++#include "znode.h" ++#include "super.h" ++ ++static znode *seal_node(const seal_t * seal); ++static int seal_matches(const seal_t * seal, znode * node); ++ ++/* initialise seal. This can be called several times on the same seal. @coord ++ and @key can be NULL. */ ++void reiser4_seal_init(seal_t * seal /* seal to initialise */ , ++ const coord_t * coord /* coord @seal will be ++ * attached to */ , ++ const reiser4_key * key UNUSED_ARG /* key @seal will be ++ * attached to */ ) ++{ ++ assert("nikita-1886", seal != NULL); ++ memset(seal, 0, sizeof *seal); ++ if (coord != NULL) { ++ znode *node; ++ ++ node = coord->node; ++ assert("nikita-1987", node != NULL); ++ spin_lock_znode(node); ++ seal->version = node->version; ++ assert("nikita-1988", seal->version != 0); ++ seal->block = *znode_get_block(node); ++#if REISER4_DEBUG ++ seal->coord1 = *coord; ++ if (key != NULL) ++ seal->key = *key; ++#endif ++ spin_unlock_znode(node); ++ } ++} ++ ++/* finish with seal */ ++void reiser4_seal_done(seal_t * seal /* seal to clear */ ) ++{ ++ assert("nikita-1887", seal != NULL); ++ seal->version = 0; ++} ++ ++/* true if seal was initialised */ ++int reiser4_seal_is_set(const seal_t * seal /* seal to query */ ) ++{ ++ assert("nikita-1890", seal != NULL); ++ return seal->version != 0; ++} ++ ++#if REISER4_DEBUG ++/* helper function for reiser4_seal_validate(). It checks that item at @coord ++ * has expected key. This is to detect cases where node was modified but wasn't ++ * marked dirty. */ ++static inline int check_seal_match(const coord_t * coord /* coord to check */ , ++ const reiser4_key * k /* expected key */ ) ++{ ++ reiser4_key ukey; ++ ++ return (coord->between != AT_UNIT) || ++ /* FIXME-VS: we only can compare keys for items whose units ++ represent exactly one key */ ++ ((coord_is_existing_unit(coord)) ++ && (item_is_extent(coord) ++ || keyeq(k, unit_key_by_coord(coord, &ukey)))) ++ || ((coord_is_existing_unit(coord)) && (item_is_ctail(coord)) ++ && keyge(k, unit_key_by_coord(coord, &ukey))); ++} ++#endif ++ ++/* this is used by reiser4_seal_validate. It accepts return value of ++ * longterm_lock_znode and returns 1 if it can be interpreted as seal ++ * validation failure. For instance, when longterm_lock_znode returns -EINVAL, ++ * reiser4_seal_validate returns -E_REPEAT and caller will call tre search. ++ * We cannot do this in longterm_lock_znode(), because sometimes we want to ++ * distinguish between -EINVAL and -E_REPEAT. */ ++static int should_repeat(int return_code) ++{ ++ return return_code == -EINVAL; ++} ++ ++/* (re-)validate seal. ++ ++ Checks whether seal is pristine, and try to revalidate it if possible. ++ ++ If seal was burned, or broken irreparably, return -E_REPEAT. ++ ++ NOTE-NIKITA currently reiser4_seal_validate() returns -E_REPEAT if key we are ++ looking for is in range of keys covered by the sealed node, but item wasn't ++ found by node ->lookup() method. Alternative is to return -ENOENT in this ++ case, but this would complicate callers logic. ++ ++*/ ++int reiser4_seal_validate(seal_t * seal /* seal to validate */, ++ coord_t * coord /* coord to validate against */, ++ const reiser4_key * key /* key to validate against */, ++ lock_handle * lh /* resulting lock handle */, ++ znode_lock_mode mode /* lock node */, ++ znode_lock_request request /* locking priority */) ++{ ++ znode *node; ++ int result; ++ ++ assert("nikita-1889", seal != NULL); ++ assert("nikita-1881", reiser4_seal_is_set(seal)); ++ assert("nikita-1882", key != NULL); ++ assert("nikita-1883", coord != NULL); ++ assert("nikita-1884", lh != NULL); ++ assert("nikita-1885", keyeq(&seal->key, key)); ++ assert("nikita-1989", coords_equal(&seal->coord1, coord)); ++ ++ /* obtain znode by block number */ ++ node = seal_node(seal); ++ if (node != NULL) { ++ /* znode was in cache, lock it */ ++ result = longterm_lock_znode(lh, node, mode, request); ++ zput(node); ++ if (result == 0) { ++ if (seal_matches(seal, node)) { ++ /* if seal version and znode version ++ coincide */ ++ ON_DEBUG(coord_update_v(coord)); ++ assert("nikita-1990", ++ node == seal->coord1.node); ++ assert("nikita-1898", ++ WITH_DATA_RET(coord->node, 1, ++ check_seal_match(coord, ++ key))); ++ } else ++ result = RETERR(-E_REPEAT); ++ } ++ if (result != 0) { ++ if (should_repeat(result)) ++ result = RETERR(-E_REPEAT); ++ /* unlock node on failure */ ++ done_lh(lh); ++ } ++ } else { ++ /* znode wasn't in cache */ ++ result = RETERR(-E_REPEAT); ++ } ++ return result; ++} ++ ++/* helpers functions */ ++ ++/* obtain reference to znode seal points to, if in cache */ ++static znode *seal_node(const seal_t * seal /* seal to query */ ) ++{ ++ assert("nikita-1891", seal != NULL); ++ return zlook(current_tree, &seal->block); ++} ++ ++/* true if @seal version and @node version coincide */ ++static int seal_matches(const seal_t * seal /* seal to check */ , ++ znode * node /* node to check */ ) ++{ ++ int result; ++ ++ assert("nikita-1991", seal != NULL); ++ assert("nikita-1993", node != NULL); ++ ++ spin_lock_znode(node); ++ result = (seal->version == node->version); ++ spin_unlock_znode(node); ++ return result; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/seal.h b/fs/reiser4/seal.h +new file mode 100644 +index 0000000..5c3c5e0 +--- /dev/null ++++ b/fs/reiser4/seal.h +@@ -0,0 +1,49 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Declaration of seals: "weak" tree pointers. See seal.c for comments. */ ++ ++#ifndef __SEAL_H__ ++#define __SEAL_H__ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "coord.h" ++ ++/* for __u?? types */ ++/*#include */ ++ ++/* seal. See comment at the top of seal.c */ ++typedef struct seal_s { ++ /* version of znode recorder at the time of seal creation */ ++ __u64 version; ++ /* block number of znode attached to this seal */ ++ reiser4_block_nr block; ++#if REISER4_DEBUG ++ /* coord this seal is attached to. For debugging. */ ++ coord_t coord1; ++ /* key this seal is attached to. For debugging. */ ++ reiser4_key key; ++#endif ++} seal_t; ++ ++extern void reiser4_seal_init(seal_t *, const coord_t *, const reiser4_key *); ++extern void reiser4_seal_done(seal_t *); ++extern int reiser4_seal_is_set(const seal_t *); ++extern int reiser4_seal_validate(seal_t *, coord_t *, ++ const reiser4_key *, lock_handle *, ++ znode_lock_mode mode, znode_lock_request request); ++ ++/* __SEAL_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/search.c b/fs/reiser4/search.c +new file mode 100644 +index 0000000..9d35e11 +--- /dev/null ++++ b/fs/reiser4/search.c +@@ -0,0 +1,1611 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "coord.h" ++#include "seal.h" ++#include "plugin/item/item.h" ++#include "plugin/node/node.h" ++#include "plugin/plugin.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree_walk.h" ++#include "tree.h" ++#include "reiser4.h" ++#include "super.h" ++#include "inode.h" ++ ++#include ++ ++static const char *bias_name(lookup_bias bias); ++ ++/* tree searching algorithm, intranode searching algorithms are in ++ plugin/node/ */ ++ ++/* tree lookup cache ++ * ++ * The coord by key cache consists of small list of recently accessed nodes ++ * maintained according to the LRU discipline. Before doing real top-to-down ++ * tree traversal this cache is scanned for nodes that can contain key ++ * requested. ++ * ++ * The efficiency of coord cache depends heavily on locality of reference for ++ * tree accesses. Our user level simulations show reasonably good hit ratios ++ * for coord cache under most loads so far. ++ */ ++ ++/* Initialise coord cache slot */ ++static void cbk_cache_init_slot(cbk_cache_slot *slot) ++{ ++ assert("nikita-345", slot != NULL); ++ ++ INIT_LIST_HEAD(&slot->lru); ++ slot->node = NULL; ++} ++ ++/* Initialize coord cache */ ++int cbk_cache_init(cbk_cache *cache /* cache to init */ ) ++{ ++ int i; ++ ++ assert("nikita-346", cache != NULL); ++ ++ cache->slot = ++ kmalloc(sizeof(cbk_cache_slot) * cache->nr_slots, ++ reiser4_ctx_gfp_mask_get()); ++ if (cache->slot == NULL) ++ return RETERR(-ENOMEM); ++ ++ INIT_LIST_HEAD(&cache->lru); ++ for (i = 0; i < cache->nr_slots; ++i) { ++ cbk_cache_init_slot(cache->slot + i); ++ list_add_tail(&((cache->slot + i)->lru), &cache->lru); ++ } ++ rwlock_init(&cache->guard); ++ return 0; ++} ++ ++/* free cbk cache data */ ++void cbk_cache_done(cbk_cache * cache /* cache to release */ ) ++{ ++ assert("nikita-2493", cache != NULL); ++ if (cache->slot != NULL) { ++ kfree(cache->slot); ++ cache->slot = NULL; ++ } ++} ++ ++/* macro to iterate over all cbk cache slots */ ++#define for_all_slots(cache, slot) \ ++ for ((slot) = list_entry((cache)->lru.next, cbk_cache_slot, lru); \ ++ &(cache)->lru != &(slot)->lru; \ ++ (slot) = list_entry(slot->lru.next, cbk_cache_slot, lru)) ++ ++#if REISER4_DEBUG ++/* this function assures that [cbk-cache-invariant] invariant holds */ ++static int cbk_cache_invariant(const cbk_cache *cache) ++{ ++ cbk_cache_slot *slot; ++ int result; ++ int unused; ++ ++ if (cache->nr_slots == 0) ++ return 1; ++ ++ assert("nikita-2469", cache != NULL); ++ unused = 0; ++ result = 1; ++ read_lock(&((cbk_cache *)cache)->guard); ++ for_all_slots(cache, slot) { ++ /* in LRU first go all `used' slots followed by `unused' */ ++ if (unused && (slot->node != NULL)) ++ result = 0; ++ if (slot->node == NULL) ++ unused = 1; ++ else { ++ cbk_cache_slot *scan; ++ ++ /* all cached nodes are different */ ++ scan = slot; ++ while (result) { ++ scan = list_entry(scan->lru.next, cbk_cache_slot, lru); ++ if (&cache->lru == &scan->lru) ++ break; ++ if (slot->node == scan->node) ++ result = 0; ++ } ++ } ++ if (!result) ++ break; ++ } ++ read_unlock(&((cbk_cache *)cache)->guard); ++ return result; ++} ++ ++#endif ++ ++/* Remove references, if any, to @node from coord cache */ ++void cbk_cache_invalidate(const znode * node /* node to remove from cache */ , ++ reiser4_tree * tree /* tree to remove node from */ ) ++{ ++ cbk_cache_slot *slot; ++ cbk_cache *cache; ++ int i; ++ ++ assert("nikita-350", node != NULL); ++ assert("nikita-1479", LOCK_CNT_GTZ(rw_locked_tree)); ++ ++ cache = &tree->cbk_cache; ++ assert("nikita-2470", cbk_cache_invariant(cache)); ++ ++ write_lock(&(cache->guard)); ++ for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) { ++ if (slot->node == node) { ++ list_move_tail(&slot->lru, &cache->lru); ++ slot->node = NULL; ++ break; ++ } ++ } ++ write_unlock(&(cache->guard)); ++ assert("nikita-2471", cbk_cache_invariant(cache)); ++} ++ ++/* add to the cbk-cache in the "tree" information about "node". This ++ can actually be update of existing slot in a cache. */ ++static void cbk_cache_add(const znode *node /* node to add to the cache */ ) ++{ ++ cbk_cache *cache; ++ cbk_cache_slot *slot; ++ int i; ++ ++ assert("nikita-352", node != NULL); ++ ++ cache = &znode_get_tree(node)->cbk_cache; ++ assert("nikita-2472", cbk_cache_invariant(cache)); ++ ++ if (cache->nr_slots == 0) ++ return; ++ ++ write_lock(&(cache->guard)); ++ /* find slot to update/add */ ++ for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) { ++ /* oops, this node is already in a cache */ ++ if (slot->node == node) ++ break; ++ } ++ /* if all slots are used, reuse least recently used one */ ++ if (i == cache->nr_slots) { ++ slot = list_entry(cache->lru.prev, cbk_cache_slot, lru); ++ slot->node = (znode *) node; ++ } ++ list_move(&slot->lru, &cache->lru); ++ write_unlock(&(cache->guard)); ++ assert("nikita-2473", cbk_cache_invariant(cache)); ++} ++ ++static int setup_delimiting_keys(cbk_handle * h); ++static lookup_result coord_by_handle(cbk_handle * handle); ++static lookup_result traverse_tree(cbk_handle * h); ++static int cbk_cache_search(cbk_handle * h); ++ ++static level_lookup_result cbk_level_lookup(cbk_handle * h); ++static level_lookup_result cbk_node_lookup(cbk_handle * h); ++ ++/* helper functions */ ++ ++static void update_stale_dk(reiser4_tree * tree, znode * node); ++ ++/* release parent node during traversal */ ++static void put_parent(cbk_handle * h); ++/* check consistency of fields */ ++static int sanity_check(cbk_handle * h); ++/* release resources in handle */ ++static void hput(cbk_handle * h); ++ ++static level_lookup_result search_to_left(cbk_handle * h); ++ ++/* pack numerous (numberous I should say) arguments of coord_by_key() into ++ * cbk_handle */ ++static cbk_handle *cbk_pack(cbk_handle * handle, ++ reiser4_tree * tree, ++ const reiser4_key * key, ++ coord_t * coord, ++ lock_handle * active_lh, ++ lock_handle * parent_lh, ++ znode_lock_mode lock_mode, ++ lookup_bias bias, ++ tree_level lock_level, ++ tree_level stop_level, ++ __u32 flags, ra_info_t * info) ++{ ++ memset(handle, 0, sizeof *handle); ++ ++ handle->tree = tree; ++ handle->key = key; ++ handle->lock_mode = lock_mode; ++ handle->bias = bias; ++ handle->lock_level = lock_level; ++ handle->stop_level = stop_level; ++ handle->coord = coord; ++ /* set flags. See comment in tree.h:cbk_flags */ ++ handle->flags = flags | CBK_TRUST_DK | CBK_USE_CRABLOCK; ++ ++ handle->active_lh = active_lh; ++ handle->parent_lh = parent_lh; ++ handle->ra_info = info; ++ return handle; ++} ++ ++/* main tree lookup procedure ++ ++ Check coord cache. If key we are looking for is not found there, call cbk() ++ to do real tree traversal. ++ ++ As we have extents on the twig level, @lock_level and @stop_level can ++ be different from LEAF_LEVEL and each other. ++ ++ Thread cannot keep any reiser4 locks (tree, znode, dk spin-locks, or znode ++ long term locks) while calling this. ++*/ ++lookup_result coord_by_key(reiser4_tree * tree /* tree to perform search ++ * in. Usually this tree is ++ * part of file-system ++ * super-block */ , ++ const reiser4_key * key /* key to look for */ , ++ coord_t * coord /* where to store found ++ * position in a tree. Fields ++ * in "coord" are only valid if ++ * coord_by_key() returned ++ * "CBK_COORD_FOUND" */ , ++ lock_handle * lh, /* resulting lock handle */ ++ znode_lock_mode lock_mode /* type of lookup we ++ * want on node. Pass ++ * ZNODE_READ_LOCK here ++ * if you only want to ++ * read item found and ++ * ZNODE_WRITE_LOCK if ++ * you want to modify ++ * it */ , ++ lookup_bias bias /* what to return if coord ++ * with exactly the @key is ++ * not in the tree */ , ++ tree_level lock_level /* tree level where to start ++ * taking @lock type of ++ * locks */ , ++ tree_level stop_level /* tree level to stop. Pass ++ * LEAF_LEVEL or TWIG_LEVEL ++ * here Item being looked ++ * for has to be between ++ * @lock_level and ++ * @stop_level, inclusive */ , ++ __u32 flags /* search flags */ , ++ ra_info_t * ++ info ++ /* information about desired tree traversal readahead */ ++ ) ++{ ++ cbk_handle handle; ++ lock_handle parent_lh; ++ lookup_result result; ++ ++ init_lh(lh); ++ init_lh(&parent_lh); ++ ++ assert("nikita-3023", reiser4_schedulable()); ++ ++ assert("nikita-353", tree != NULL); ++ assert("nikita-354", key != NULL); ++ assert("nikita-355", coord != NULL); ++ assert("nikita-356", (bias == FIND_EXACT) ++ || (bias == FIND_MAX_NOT_MORE_THAN)); ++ assert("nikita-357", stop_level >= LEAF_LEVEL); ++ /* no locks can be held during tree traversal */ ++ assert("nikita-2104", lock_stack_isclean(get_current_lock_stack())); ++ ++ cbk_pack(&handle, ++ tree, ++ key, ++ coord, ++ lh, ++ &parent_lh, ++ lock_mode, bias, lock_level, stop_level, flags, info); ++ ++ result = coord_by_handle(&handle); ++ assert("nikita-3247", ++ ergo(!IS_CBKERR(result), coord->node == lh->node)); ++ return result; ++} ++ ++/* like coord_by_key(), but starts traversal from vroot of @object rather than ++ * from tree root. */ ++lookup_result reiser4_object_lookup(struct inode * object, ++ const reiser4_key * key, ++ coord_t * coord, ++ lock_handle * lh, ++ znode_lock_mode lock_mode, ++ lookup_bias bias, ++ tree_level lock_level, ++ tree_level stop_level, __u32 flags, ++ ra_info_t * info) ++{ ++ cbk_handle handle; ++ lock_handle parent_lh; ++ lookup_result result; ++ ++ init_lh(lh); ++ init_lh(&parent_lh); ++ ++ assert("nikita-3023", reiser4_schedulable()); ++ ++ assert("nikita-354", key != NULL); ++ assert("nikita-355", coord != NULL); ++ assert("nikita-356", (bias == FIND_EXACT) ++ || (bias == FIND_MAX_NOT_MORE_THAN)); ++ assert("nikita-357", stop_level >= LEAF_LEVEL); ++ /* no locks can be held during tree search by key */ ++ assert("nikita-2104", lock_stack_isclean(get_current_lock_stack())); ++ ++ cbk_pack(&handle, ++ object != NULL ? reiser4_tree_by_inode(object) : current_tree, ++ key, ++ coord, ++ lh, ++ &parent_lh, ++ lock_mode, bias, lock_level, stop_level, flags, info); ++ handle.object = object; ++ ++ result = coord_by_handle(&handle); ++ assert("nikita-3247", ++ ergo(!IS_CBKERR(result), coord->node == lh->node)); ++ return result; ++} ++ ++/* lookup by cbk_handle. Common part of coord_by_key() and ++ reiser4_object_lookup(). */ ++static lookup_result coord_by_handle(cbk_handle * handle) ++{ ++ /* ++ * first check cbk_cache (which is look-aside cache for our tree) and ++ * of this fails, start traversal. ++ */ ++ /* first check whether "key" is in cache of recent lookups. */ ++ if (cbk_cache_search(handle) == 0) ++ return handle->result; ++ else ++ return traverse_tree(handle); ++} ++ ++/* Execute actor for each item (or unit, depending on @through_units_p), ++ starting from @coord, right-ward, until either: ++ ++ - end of the tree is reached ++ - unformatted node is met ++ - error occurred ++ - @actor returns 0 or less ++ ++ Error code, or last actor return value is returned. ++ ++ This is used by plugin/dir/hashe_dir.c:reiser4_find_entry() to move through ++ sequence of entries with identical keys and alikes. ++*/ ++int reiser4_iterate_tree(reiser4_tree * tree /* tree to scan */ , ++ coord_t * coord /* coord to start from */ , ++ lock_handle * lh /* lock handle to start with and to ++ * update along the way */ , ++ tree_iterate_actor_t actor /* function to call on each ++ * item/unit */ , ++ void *arg /* argument to pass to @actor */ , ++ znode_lock_mode mode /* lock mode on scanned nodes */ , ++ int through_units_p /* call @actor on each item or on ++ * each unit */ ) ++{ ++ int result; ++ ++ assert("nikita-1143", tree != NULL); ++ assert("nikita-1145", coord != NULL); ++ assert("nikita-1146", lh != NULL); ++ assert("nikita-1147", actor != NULL); ++ ++ result = zload(coord->node); ++ coord_clear_iplug(coord); ++ if (result != 0) ++ return result; ++ if (!coord_is_existing_unit(coord)) { ++ zrelse(coord->node); ++ return -ENOENT; ++ } ++ while ((result = actor(tree, coord, lh, arg)) > 0) { ++ /* move further */ ++ if ((through_units_p && coord_next_unit(coord)) || ++ (!through_units_p && coord_next_item(coord))) { ++ do { ++ lock_handle couple; ++ ++ /* move to the next node */ ++ init_lh(&couple); ++ result = ++ reiser4_get_right_neighbor(&couple, ++ coord->node, ++ (int)mode, ++ GN_CAN_USE_UPPER_LEVELS); ++ zrelse(coord->node); ++ if (result == 0) { ++ ++ result = zload(couple.node); ++ if (result != 0) { ++ done_lh(&couple); ++ return result; ++ } ++ ++ coord_init_first_unit(coord, ++ couple.node); ++ done_lh(lh); ++ move_lh(lh, &couple); ++ } else ++ return result; ++ } while (node_is_empty(coord->node)); ++ } ++ ++ assert("nikita-1149", coord_is_existing_unit(coord)); ++ } ++ zrelse(coord->node); ++ return result; ++} ++ ++/* return locked uber znode for @tree */ ++int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode, ++ znode_lock_request pri, lock_handle * lh) ++{ ++ int result; ++ ++ result = longterm_lock_znode(lh, tree->uber, mode, pri); ++ return result; ++} ++ ++/* true if @key is strictly within @node ++ ++ we are looking for possibly non-unique key and it is item is at the edge of ++ @node. May be it is in the neighbor. ++*/ ++static int znode_contains_key_strict(znode * node /* node to check key ++ * against */ , ++ const reiser4_key * ++ key /* key to check */ , ++ int isunique) ++{ ++ int answer; ++ ++ assert("nikita-1760", node != NULL); ++ assert("nikita-1722", key != NULL); ++ ++ if (keyge(key, &node->rd_key)) ++ return 0; ++ ++ answer = keycmp(&node->ld_key, key); ++ ++ if (isunique) ++ return answer != GREATER_THAN; ++ else ++ return answer == LESS_THAN; ++} ++ ++/* ++ * Virtual Root (vroot) code. ++ * ++ * For given file system object (e.g., regular file or directory) let's ++ * define its "virtual root" as lowest in the tree (that is, furtherest ++ * from the tree root) node such that all body items of said object are ++ * located in a tree rooted at this node. ++ * ++ * Once vroot of object is found all tree lookups for items within body of ++ * this object ("object lookups") can be started from its vroot rather ++ * than from real root. This has following advantages: ++ * ++ * 1. amount of nodes traversed during lookup (and, hence, amount of ++ * key comparisons made) decreases, and ++ * ++ * 2. contention on tree root is decreased. This latter was actually ++ * motivating reason behind vroot, because spin lock of root node, ++ * which is taken when acquiring long-term lock on root node is the ++ * hottest lock in the reiser4. ++ * ++ * How to find vroot. ++ * ++ * When vroot of object F is not yet determined, all object lookups start ++ * from the root of the tree. At each tree level during traversal we have ++ * a node N such that a key we are looking for (which is the key inside ++ * object's body) is located within N. In function handle_vroot() called ++ * from cbk_level_lookup() we check whether N is possible vroot for ++ * F. Check is trivial---if neither leftmost nor rightmost item of N ++ * belongs to F (and we already have helpful ->owns_item() method of ++ * object plugin for this), then N is possible vroot of F. This, of ++ * course, relies on the assumption that each object occupies contiguous ++ * range of keys in the tree. ++ * ++ * Thus, traversing tree downward and checking each node as we go, we can ++ * find lowest such node, which, by definition, is vroot. ++ * ++ * How to track vroot. ++ * ++ * Nohow. If actual vroot changes, next object lookup will just restart ++ * from the actual tree root, refreshing object's vroot along the way. ++ * ++ */ ++ ++/* ++ * Check whether @node is possible vroot of @object. ++ */ ++static void handle_vroot(struct inode *object, znode * node) ++{ ++ file_plugin *fplug; ++ coord_t coord; ++ ++ fplug = inode_file_plugin(object); ++ assert("nikita-3353", fplug != NULL); ++ assert("nikita-3354", fplug->owns_item != NULL); ++ ++ if (unlikely(node_is_empty(node))) ++ return; ++ ++ coord_init_first_unit(&coord, node); ++ /* ++ * if leftmost item of @node belongs to @object, we cannot be sure ++ * that @node is vroot of @object, because, some items of @object are ++ * probably in the sub-tree rooted at the left neighbor of @node. ++ */ ++ if (fplug->owns_item(object, &coord)) ++ return; ++ coord_init_last_unit(&coord, node); ++ /* mutatis mutandis for the rightmost item */ ++ if (fplug->owns_item(object, &coord)) ++ return; ++ /* otherwise, @node is possible vroot of @object */ ++ inode_set_vroot(object, node); ++} ++ ++/* ++ * helper function used by traverse tree to start tree traversal not from the ++ * tree root, but from @h->object's vroot, if possible. ++ */ ++static int prepare_object_lookup(cbk_handle * h) ++{ ++ znode *vroot; ++ int result; ++ ++ vroot = inode_get_vroot(h->object); ++ if (vroot == NULL) { ++ /* ++ * object doesn't have known vroot, start from real tree root. ++ */ ++ return LOOKUP_CONT; ++ } ++ ++ h->level = znode_get_level(vroot); ++ /* take a long-term lock on vroot */ ++ h->result = longterm_lock_znode(h->active_lh, vroot, ++ cbk_lock_mode(h->level, h), ++ ZNODE_LOCK_LOPRI); ++ result = LOOKUP_REST; ++ if (h->result == 0) { ++ int isunique; ++ int inside; ++ ++ isunique = h->flags & CBK_UNIQUE; ++ /* check that key is inside vroot */ ++ read_lock_dk(h->tree); ++ inside = (znode_contains_key_strict(vroot, h->key, isunique) && ++ !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE)); ++ read_unlock_dk(h->tree); ++ if (inside) { ++ h->result = zload(vroot); ++ if (h->result == 0) { ++ /* search for key in vroot. */ ++ result = cbk_node_lookup(h); ++ zrelse(vroot); /*h->active_lh->node); */ ++ if (h->active_lh->node != vroot) { ++ result = LOOKUP_REST; ++ } else if (result == LOOKUP_CONT) { ++ move_lh(h->parent_lh, h->active_lh); ++ h->flags &= ~CBK_DKSET; ++ } ++ } ++ } ++ } ++ ++ zput(vroot); ++ ++ if (IS_CBKERR(h->result) || result == LOOKUP_REST) ++ hput(h); ++ return result; ++} ++ ++/* main function that handles common parts of tree traversal: starting ++ (fake znode handling), restarts, error handling, completion */ ++static lookup_result traverse_tree(cbk_handle * h /* search handle */ ) ++{ ++ int done; ++ int iterations; ++ int vroot_used; ++ ++ assert("nikita-365", h != NULL); ++ assert("nikita-366", h->tree != NULL); ++ assert("nikita-367", h->key != NULL); ++ assert("nikita-368", h->coord != NULL); ++ assert("nikita-369", (h->bias == FIND_EXACT) ++ || (h->bias == FIND_MAX_NOT_MORE_THAN)); ++ assert("nikita-370", h->stop_level >= LEAF_LEVEL); ++ assert("nikita-2949", !(h->flags & CBK_DKSET)); ++ assert("zam-355", lock_stack_isclean(get_current_lock_stack())); ++ ++ done = 0; ++ iterations = 0; ++ vroot_used = 0; ++ ++ /* loop for restarts */ ++ restart: ++ ++ assert("nikita-3024", reiser4_schedulable()); ++ ++ h->result = CBK_COORD_FOUND; ++ /* connect_znode() needs it */ ++ h->ld_key = *reiser4_min_key(); ++ h->rd_key = *reiser4_max_key(); ++ h->flags |= CBK_DKSET; ++ h->error = NULL; ++ ++ if (!vroot_used && h->object != NULL) { ++ vroot_used = 1; ++ done = prepare_object_lookup(h); ++ if (done == LOOKUP_REST) { ++ goto restart; ++ } else if (done == LOOKUP_DONE) ++ return h->result; ++ } ++ if (h->parent_lh->node == NULL) { ++ done = ++ get_uber_znode(h->tree, ZNODE_READ_LOCK, ZNODE_LOCK_LOPRI, ++ h->parent_lh); ++ ++ assert("nikita-1637", done != -E_DEADLOCK); ++ ++ h->block = h->tree->root_block; ++ h->level = h->tree->height; ++ h->coord->node = h->parent_lh->node; ++ ++ if (done != 0) ++ return done; ++ } ++ ++ /* loop descending a tree */ ++ while (!done) { ++ ++ if (unlikely((iterations > REISER4_CBK_ITERATIONS_LIMIT) && ++ IS_POW(iterations))) { ++ warning("nikita-1481", "Too many iterations: %i", ++ iterations); ++ reiser4_print_key("key", h->key); ++ ++iterations; ++ } else if (unlikely(iterations > REISER4_MAX_CBK_ITERATIONS)) { ++ h->error = ++ "reiser-2018: Too many iterations. Tree corrupted, or (less likely) starvation occurring."; ++ h->result = RETERR(-EIO); ++ break; ++ } ++ switch (cbk_level_lookup(h)) { ++ case LOOKUP_CONT: ++ move_lh(h->parent_lh, h->active_lh); ++ continue; ++ default: ++ wrong_return_value("nikita-372", "cbk_level"); ++ case LOOKUP_DONE: ++ done = 1; ++ break; ++ case LOOKUP_REST: ++ hput(h); ++ /* deadlock avoidance is normal case. */ ++ if (h->result != -E_DEADLOCK) ++ ++iterations; ++ reiser4_preempt_point(); ++ goto restart; ++ } ++ } ++ /* that's all. The rest is error handling */ ++ if (unlikely(h->error != NULL)) { ++ warning("nikita-373", "%s: level: %i, " ++ "lock_level: %i, stop_level: %i " ++ "lock_mode: %s, bias: %s", ++ h->error, h->level, h->lock_level, h->stop_level, ++ lock_mode_name(h->lock_mode), bias_name(h->bias)); ++ reiser4_print_address("block", &h->block); ++ reiser4_print_key("key", h->key); ++ print_coord_content("coord", h->coord); ++ } ++ /* `unlikely' error case */ ++ if (unlikely(IS_CBKERR(h->result))) { ++ /* failure. do cleanup */ ++ hput(h); ++ } else { ++ assert("nikita-1605", WITH_DATA_RET ++ (h->coord->node, 1, ++ ergo((h->result == CBK_COORD_FOUND) && ++ (h->bias == FIND_EXACT) && ++ (!node_is_empty(h->coord->node)), ++ coord_is_existing_item(h->coord)))); ++ } ++ return h->result; ++} ++ ++/* find delimiting keys of child ++ ++ Determine left and right delimiting keys for child pointed to by ++ @parent_coord. ++ ++*/ ++static void find_child_delimiting_keys(znode * parent /* parent znode, passed ++ * locked */ , ++ const coord_t * parent_coord /* coord where ++ * pointer to ++ * child is ++ * stored */ , ++ reiser4_key * ld /* where to store left ++ * delimiting key */ , ++ reiser4_key * rd /* where to store right ++ * delimiting key */ ) ++{ ++ coord_t neighbor; ++ ++ assert("nikita-1484", parent != NULL); ++ assert_rw_locked(&(znode_get_tree(parent)->dk_lock)); ++ ++ coord_dup(&neighbor, parent_coord); ++ ++ if (neighbor.between == AT_UNIT) ++ /* imitate item ->lookup() behavior. */ ++ neighbor.between = AFTER_UNIT; ++ ++ if (coord_set_to_left(&neighbor) == 0) ++ unit_key_by_coord(&neighbor, ld); ++ else { ++ assert("nikita-14851", 0); ++ *ld = *znode_get_ld_key(parent); ++ } ++ ++ coord_dup(&neighbor, parent_coord); ++ if (neighbor.between == AT_UNIT) ++ neighbor.between = AFTER_UNIT; ++ if (coord_set_to_right(&neighbor) == 0) ++ unit_key_by_coord(&neighbor, rd); ++ else ++ *rd = *znode_get_rd_key(parent); ++} ++ ++/* ++ * setup delimiting keys for a child ++ * ++ * @parent parent node ++ * ++ * @coord location in @parent where pointer to @child is ++ * ++ * @child child node ++ */ ++int ++set_child_delimiting_keys(znode * parent, const coord_t * coord, znode * child) ++{ ++ reiser4_tree *tree; ++ ++ assert("nikita-2952", ++ znode_get_level(parent) == znode_get_level(coord->node)); ++ ++ /* fast check without taking dk lock. This is safe, because ++ * JNODE_DKSET is never cleared once set. */ ++ if (!ZF_ISSET(child, JNODE_DKSET)) { ++ tree = znode_get_tree(parent); ++ write_lock_dk(tree); ++ if (likely(!ZF_ISSET(child, JNODE_DKSET))) { ++ find_child_delimiting_keys(parent, coord, ++ &child->ld_key, ++ &child->rd_key); ++ ON_DEBUG(child->ld_key_version = ++ atomic_inc_return(&delim_key_version); ++ child->rd_key_version = ++ atomic_inc_return(&delim_key_version);); ++ ZF_SET(child, JNODE_DKSET); ++ } ++ write_unlock_dk(tree); ++ return 1; ++ } ++ return 0; ++} ++ ++/* Perform tree lookup at one level. This is called from cbk_traverse() ++ function that drives lookup through tree and calls cbk_node_lookup() to ++ perform lookup within one node. ++ ++ See comments in a code. ++*/ ++static level_lookup_result cbk_level_lookup(cbk_handle * h /* search handle */ ) ++{ ++ int ret; ++ int setdk; ++ int ldkeyset = 0; ++ reiser4_key ldkey; ++ reiser4_key key; ++ znode *active; ++ ++ assert("nikita-3025", reiser4_schedulable()); ++ ++ /* acquire reference to @active node */ ++ active = ++ zget(h->tree, &h->block, h->parent_lh->node, h->level, ++ reiser4_ctx_gfp_mask_get()); ++ ++ if (IS_ERR(active)) { ++ h->result = PTR_ERR(active); ++ return LOOKUP_DONE; ++ } ++ ++ /* lock @active */ ++ h->result = longterm_lock_znode(h->active_lh, ++ active, ++ cbk_lock_mode(h->level, h), ++ ZNODE_LOCK_LOPRI); ++ /* longterm_lock_znode() acquires additional reference to znode (which ++ will be later released by longterm_unlock_znode()). Release ++ reference acquired by zget(). ++ */ ++ zput(active); ++ if (unlikely(h->result != 0)) ++ goto fail_or_restart; ++ ++ setdk = 0; ++ /* if @active is accessed for the first time, setup delimiting keys on ++ it. Delimiting keys are taken from the parent node. See ++ setup_delimiting_keys() for details. ++ */ ++ if (h->flags & CBK_DKSET) { ++ setdk = setup_delimiting_keys(h); ++ h->flags &= ~CBK_DKSET; ++ } else { ++ znode *parent; ++ ++ parent = h->parent_lh->node; ++ h->result = zload(parent); ++ if (unlikely(h->result != 0)) ++ goto fail_or_restart; ++ ++ if (!ZF_ISSET(active, JNODE_DKSET)) ++ setdk = set_child_delimiting_keys(parent, ++ h->coord, active); ++ else { ++ read_lock_dk(h->tree); ++ find_child_delimiting_keys(parent, h->coord, &ldkey, ++ &key); ++ read_unlock_dk(h->tree); ++ ldkeyset = 1; ++ } ++ zrelse(parent); ++ } ++ ++ /* this is ugly kludge. Reminder: this is necessary, because ++ ->lookup() method returns coord with ->between field probably set ++ to something different from AT_UNIT. ++ */ ++ h->coord->between = AT_UNIT; ++ ++ if (znode_just_created(active) && (h->coord->node != NULL)) { ++ write_lock_tree(h->tree); ++ /* if we are going to load znode right now, setup ++ ->in_parent: coord where pointer to this node is stored in ++ parent. ++ */ ++ coord_to_parent_coord(h->coord, &active->in_parent); ++ write_unlock_tree(h->tree); ++ } ++ ++ /* check connectedness without holding tree lock---false negatives ++ * will be re-checked by connect_znode(), and false positives are ++ * impossible---@active cannot suddenly turn into unconnected ++ * state. */ ++ if (!znode_is_connected(active)) { ++ h->result = connect_znode(h->coord, active); ++ if (unlikely(h->result != 0)) { ++ put_parent(h); ++ goto fail_or_restart; ++ } ++ } ++ ++ jload_prefetch(ZJNODE(active)); ++ ++ if (setdk) ++ update_stale_dk(h->tree, active); ++ ++ /* put_parent() cannot be called earlier, because connect_znode() ++ assumes parent node is referenced; */ ++ put_parent(h); ++ ++ if ((!znode_contains_key_lock(active, h->key) && ++ (h->flags & CBK_TRUST_DK)) ++ || ZF_ISSET(active, JNODE_HEARD_BANSHEE)) { ++ /* 1. key was moved out of this node while this thread was ++ waiting for the lock. Restart. More elaborate solution is ++ to determine where key moved (to the left, or to the right) ++ and try to follow it through sibling pointers. ++ ++ 2. or, node itself is going to be removed from the ++ tree. Release lock and restart. ++ */ ++ h->result = -E_REPEAT; ++ } ++ if (h->result == -E_REPEAT) ++ return LOOKUP_REST; ++ ++ h->result = zload_ra(active, h->ra_info); ++ if (h->result) { ++ return LOOKUP_DONE; ++ } ++ ++ /* sanity checks */ ++ if (sanity_check(h)) { ++ zrelse(active); ++ return LOOKUP_DONE; ++ } ++ ++ /* check that key of leftmost item in the @active is the same as in ++ * its parent */ ++ if (ldkeyset && !node_is_empty(active) && ++ !keyeq(leftmost_key_in_node(active, &key), &ldkey)) { ++ warning("vs-3533", "Keys are inconsistent. Fsck?"); ++ reiser4_print_key("inparent", &ldkey); ++ reiser4_print_key("inchild", &key); ++ h->result = RETERR(-EIO); ++ zrelse(active); ++ return LOOKUP_DONE; ++ } ++ ++ if (h->object != NULL) ++ handle_vroot(h->object, active); ++ ++ ret = cbk_node_lookup(h); ++ ++ /* h->active_lh->node might change, but active is yet to be zrelsed */ ++ zrelse(active); ++ ++ return ret; ++ ++ fail_or_restart: ++ if (h->result == -E_DEADLOCK) ++ return LOOKUP_REST; ++ return LOOKUP_DONE; ++} ++ ++#if REISER4_DEBUG ++/* check left and right delimiting keys of a znode */ ++void check_dkeys(znode * node) ++{ ++ znode *left; ++ znode *right; ++ ++ read_lock_tree(current_tree); ++ read_lock_dk(current_tree); ++ ++ assert("vs-1710", znode_is_any_locked(node)); ++ assert("vs-1197", ++ !keygt(znode_get_ld_key(node), znode_get_rd_key(node))); ++ ++ left = node->left; ++ right = node->right; ++ ++ if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET) ++ && left != NULL && ZF_ISSET(left, JNODE_DKSET)) ++ /* check left neighbor. Note that left neighbor is not locked, ++ so it might get wrong delimiting keys therefore */ ++ assert("vs-1198", ++ (keyeq(znode_get_rd_key(left), znode_get_ld_key(node)) ++ || ZF_ISSET(left, JNODE_HEARD_BANSHEE))); ++ ++ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET) ++ && right != NULL && ZF_ISSET(right, JNODE_DKSET)) ++ /* check right neighbor. Note that right neighbor is not ++ locked, so it might get wrong delimiting keys therefore */ ++ assert("vs-1199", ++ (keyeq(znode_get_rd_key(node), znode_get_ld_key(right)) ++ || ZF_ISSET(right, JNODE_HEARD_BANSHEE))); ++ ++ read_unlock_dk(current_tree); ++ read_unlock_tree(current_tree); ++} ++#endif ++ ++/* true if @key is left delimiting key of @node */ ++static int key_is_ld(znode * node, const reiser4_key * key) ++{ ++ int ld; ++ ++ assert("nikita-1716", node != NULL); ++ assert("nikita-1758", key != NULL); ++ ++ read_lock_dk(znode_get_tree(node)); ++ assert("nikita-1759", znode_contains_key(node, key)); ++ ld = keyeq(znode_get_ld_key(node), key); ++ read_unlock_dk(znode_get_tree(node)); ++ return ld; ++} ++ ++/* Process one node during tree traversal. ++ ++ This is called by cbk_level_lookup(). */ ++static level_lookup_result cbk_node_lookup(cbk_handle * h /* search handle */ ) ++{ ++ /* node plugin of @active */ ++ node_plugin *nplug; ++ /* item plugin of item that was found */ ++ item_plugin *iplug; ++ /* search bias */ ++ lookup_bias node_bias; ++ /* node we are operating upon */ ++ znode *active; ++ /* tree we are searching in */ ++ reiser4_tree *tree; ++ /* result */ ++ int result; ++ ++ assert("nikita-379", h != NULL); ++ ++ active = h->active_lh->node; ++ tree = h->tree; ++ ++ nplug = active->nplug; ++ assert("nikita-380", nplug != NULL); ++ ++ ON_DEBUG(check_dkeys(active)); ++ ++ /* return item from "active" node with maximal key not greater than ++ "key" */ ++ node_bias = h->bias; ++ result = nplug->lookup(active, h->key, node_bias, h->coord); ++ if (unlikely(result != NS_FOUND && result != NS_NOT_FOUND)) { ++ /* error occurred */ ++ h->result = result; ++ return LOOKUP_DONE; ++ } ++ if (h->level == h->stop_level) { ++ /* welcome to the stop level */ ++ assert("nikita-381", h->coord->node == active); ++ if (result == NS_FOUND) { ++ /* success of tree lookup */ ++ if (!(h->flags & CBK_UNIQUE) ++ && key_is_ld(active, h->key)) { ++ return search_to_left(h); ++ } else ++ h->result = CBK_COORD_FOUND; ++ } else { ++ h->result = CBK_COORD_NOTFOUND; ++ } ++ if (!(h->flags & CBK_IN_CACHE)) ++ cbk_cache_add(active); ++ return LOOKUP_DONE; ++ } ++ ++ if (h->level > TWIG_LEVEL && result == NS_NOT_FOUND) { ++ h->error = "not found on internal node"; ++ h->result = result; ++ return LOOKUP_DONE; ++ } ++ ++ assert("vs-361", h->level > h->stop_level); ++ ++ if (handle_eottl(h, &result)) { ++ assert("vs-1674", (result == LOOKUP_DONE || ++ result == LOOKUP_REST)); ++ return result; ++ } ++ ++ /* go down to next level */ ++ check_me("vs-12", zload(h->coord->node) == 0); ++ assert("nikita-2116", item_is_internal(h->coord)); ++ iplug = item_plugin_by_coord(h->coord); ++ iplug->s.internal.down_link(h->coord, h->key, &h->block); ++ zrelse(h->coord->node); ++ --h->level; ++ return LOOKUP_CONT; /* continue */ ++} ++ ++/* scan cbk_cache slots looking for a match for @h */ ++static int cbk_cache_scan_slots(cbk_handle * h /* cbk handle */ ) ++{ ++ level_lookup_result llr; ++ znode *node; ++ reiser4_tree *tree; ++ cbk_cache_slot *slot; ++ cbk_cache *cache; ++ tree_level level; ++ int isunique; ++ const reiser4_key *key; ++ int result; ++ ++ assert("nikita-1317", h != NULL); ++ assert("nikita-1315", h->tree != NULL); ++ assert("nikita-1316", h->key != NULL); ++ ++ tree = h->tree; ++ cache = &tree->cbk_cache; ++ if (cache->nr_slots == 0) ++ /* size of cbk cache was set to 0 by mount time option. */ ++ return RETERR(-ENOENT); ++ ++ assert("nikita-2474", cbk_cache_invariant(cache)); ++ node = NULL; /* to keep gcc happy */ ++ level = h->level; ++ key = h->key; ++ isunique = h->flags & CBK_UNIQUE; ++ result = RETERR(-ENOENT); ++ ++ /* ++ * this is time-critical function and dragons had, hence, been settled ++ * here. ++ * ++ * Loop below scans cbk cache slots trying to find matching node with ++ * suitable range of delimiting keys and located at the h->level. ++ * ++ * Scan is done under cbk cache spin lock that protects slot->node ++ * pointers. If suitable node is found we want to pin it in ++ * memory. But slot->node can point to the node with x_count 0 ++ * (unreferenced). Such node can be recycled at any moment, or can ++ * already be in the process of being recycled (within jput()). ++ * ++ * As we found node in the cbk cache, it means that jput() hasn't yet ++ * called cbk_cache_invalidate(). ++ * ++ * We acquire reference to the node without holding tree lock, and ++ * later, check node's RIP bit. This avoids races with jput(). ++ */ ++ ++ rcu_read_lock(); ++ read_lock(&((cbk_cache *)cache)->guard); ++ ++ slot = list_entry(cache->lru.next, cbk_cache_slot, lru); ++ slot = list_entry(slot->lru.prev, cbk_cache_slot, lru); ++ BUG_ON(&slot->lru != &cache->lru);/*????*/ ++ while (1) { ++ ++ slot = list_entry(slot->lru.next, cbk_cache_slot, lru); ++ ++ if (&cache->lru != &slot->lru) ++ node = slot->node; ++ else ++ node = NULL; ++ ++ if (unlikely(node == NULL)) ++ break; ++ ++ /* ++ * this is (hopefully) the only place in the code where we are ++ * working with delimiting keys without holding dk lock. This ++ * is fine here, because this is only "guess" anyway---keys ++ * are rechecked under dk lock below. ++ */ ++ if (znode_get_level(node) == level && ++ /* reiser4_min_key < key < reiser4_max_key */ ++ znode_contains_key_strict(node, key, isunique)) { ++ zref(node); ++ result = 0; ++ spin_lock_prefetch(&tree->tree_lock); ++ break; ++ } ++ } ++ read_unlock(&((cbk_cache *)cache)->guard); ++ ++ assert("nikita-2475", cbk_cache_invariant(cache)); ++ ++ if (unlikely(result == 0 && ZF_ISSET(node, JNODE_RIP))) ++ result = -ENOENT; ++ ++ rcu_read_unlock(); ++ ++ if (result != 0) { ++ h->result = CBK_COORD_NOTFOUND; ++ return RETERR(-ENOENT); ++ } ++ ++ result = ++ longterm_lock_znode(h->active_lh, node, cbk_lock_mode(level, h), ++ ZNODE_LOCK_LOPRI); ++ zput(node); ++ if (result != 0) ++ return result; ++ result = zload(node); ++ if (result != 0) ++ return result; ++ ++ /* recheck keys */ ++ read_lock_dk(tree); ++ result = (znode_contains_key_strict(node, key, isunique) && ++ !ZF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ read_unlock_dk(tree); ++ if (result) { ++ /* do lookup inside node */ ++ llr = cbk_node_lookup(h); ++ /* if cbk_node_lookup() wandered to another node (due to eottl ++ or non-unique keys), adjust @node */ ++ /*node = h->active_lh->node; */ ++ ++ if (llr != LOOKUP_DONE) { ++ /* restart or continue on the next level */ ++ result = RETERR(-ENOENT); ++ } else if (IS_CBKERR(h->result)) ++ /* io or oom */ ++ result = RETERR(-ENOENT); ++ else { ++ /* good. Either item found or definitely not found. */ ++ result = 0; ++ ++ write_lock(&(cache->guard)); ++ if (slot->node == h->active_lh->node /*node */ ) { ++ /* if this node is still in cbk cache---move ++ its slot to the head of the LRU list. */ ++ list_move(&slot->lru, &cache->lru); ++ } ++ write_unlock(&(cache->guard)); ++ } ++ } else { ++ /* race. While this thread was waiting for the lock, node was ++ rebalanced and item we are looking for, shifted out of it ++ (if it ever was here). ++ ++ Continuing scanning is almost hopeless: node key range was ++ moved to, is almost certainly at the beginning of the LRU ++ list at this time, because it's hot, but restarting ++ scanning from the very beginning is complex. Just return, ++ so that cbk() will be performed. This is not that ++ important, because such races should be rare. Are they? ++ */ ++ result = RETERR(-ENOENT); /* -ERAUGHT */ ++ } ++ zrelse(node); ++ assert("nikita-2476", cbk_cache_invariant(cache)); ++ return result; ++} ++ ++/* look for item with given key in the coord cache ++ ++ This function, called by coord_by_key(), scans "coord cache" (&cbk_cache) ++ which is a small LRU list of znodes accessed lately. For each znode in ++ znode in this list, it checks whether key we are looking for fits into key ++ range covered by this node. If so, and in addition, node lies at allowed ++ level (this is to handle extents on a twig level), node is locked, and ++ lookup inside it is performed. ++ ++ we need a measurement of the cost of this cache search compared to the cost ++ of coord_by_key. ++ ++*/ ++static int cbk_cache_search(cbk_handle * h /* cbk handle */ ) ++{ ++ int result = 0; ++ tree_level level; ++ ++ /* add CBK_IN_CACHE to the handle flags. This means that ++ * cbk_node_lookup() assumes that cbk_cache is scanned and would add ++ * found node to the cache. */ ++ h->flags |= CBK_IN_CACHE; ++ for (level = h->stop_level; level <= h->lock_level; ++level) { ++ h->level = level; ++ result = cbk_cache_scan_slots(h); ++ if (result != 0) { ++ done_lh(h->active_lh); ++ done_lh(h->parent_lh); ++ } else { ++ assert("nikita-1319", !IS_CBKERR(h->result)); ++ break; ++ } ++ } ++ h->flags &= ~CBK_IN_CACHE; ++ return result; ++} ++ ++/* type of lock we want to obtain during tree traversal. On stop level ++ we want type of lock user asked for, on upper levels: read lock. */ ++znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h) ++{ ++ assert("nikita-382", h != NULL); ++ ++ return (level <= h->lock_level) ? h->lock_mode : ZNODE_READ_LOCK; ++} ++ ++/* update outdated delimiting keys */ ++static void stale_dk(reiser4_tree * tree, znode * node) ++{ ++ znode *right; ++ ++ read_lock_tree(tree); ++ write_lock_dk(tree); ++ right = node->right; ++ ++ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ++ right && ZF_ISSET(right, JNODE_DKSET) && ++ !keyeq(znode_get_rd_key(node), znode_get_ld_key(right))) ++ znode_set_rd_key(node, znode_get_ld_key(right)); ++ ++ write_unlock_dk(tree); ++ read_unlock_tree(tree); ++} ++ ++/* check for possibly outdated delimiting keys, and update them if ++ * necessary. */ ++static void update_stale_dk(reiser4_tree * tree, znode * node) ++{ ++ znode *right; ++ reiser4_key rd; ++ ++ read_lock_tree(tree); ++ read_lock_dk(tree); ++ rd = *znode_get_rd_key(node); ++ right = node->right; ++ if (unlikely(ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ++ right && ZF_ISSET(right, JNODE_DKSET) && ++ !keyeq(&rd, znode_get_ld_key(right)))) { ++ assert("nikita-38211", ZF_ISSET(node, JNODE_DKSET)); ++ read_unlock_dk(tree); ++ read_unlock_tree(tree); ++ stale_dk(tree, node); ++ return; ++ } ++ read_unlock_dk(tree); ++ read_unlock_tree(tree); ++} ++ ++/* ++ * handle searches a the non-unique key. ++ * ++ * Suppose that we are looking for an item with possibly non-unique key 100. ++ * ++ * Root node contains two pointers: one to a node with left delimiting key 0, ++ * and another to a node with left delimiting key 100. Item we interested in ++ * may well happen in the sub-tree rooted at the first pointer. ++ * ++ * To handle this search_to_left() is called when search reaches stop ++ * level. This function checks it is _possible_ that item we are looking for ++ * is in the left neighbor (this can be done by comparing delimiting keys) and ++ * if so, tries to lock left neighbor (this is low priority lock, so it can ++ * deadlock, tree traversal is just restarted if it did) and then checks ++ * whether left neighbor actually contains items with our key. ++ * ++ * Note that this is done on the stop level only. It is possible to try such ++ * left-check on each level, but as duplicate keys are supposed to be rare ++ * (very unlikely that more than one node is completely filled with items with ++ * duplicate keys), it sis cheaper to scan to the left on the stop level once. ++ * ++ */ ++static level_lookup_result search_to_left(cbk_handle * h /* search handle */ ) ++{ ++ level_lookup_result result; ++ coord_t *coord; ++ znode *node; ++ znode *neighbor; ++ ++ lock_handle lh; ++ ++ assert("nikita-1761", h != NULL); ++ assert("nikita-1762", h->level == h->stop_level); ++ ++ init_lh(&lh); ++ coord = h->coord; ++ node = h->active_lh->node; ++ assert("nikita-1763", coord_is_leftmost_unit(coord)); ++ ++ h->result = ++ reiser4_get_left_neighbor(&lh, node, (int)h->lock_mode, ++ GN_CAN_USE_UPPER_LEVELS); ++ neighbor = NULL; ++ switch (h->result) { ++ case -E_DEADLOCK: ++ result = LOOKUP_REST; ++ break; ++ case 0:{ ++ node_plugin *nplug; ++ coord_t crd; ++ lookup_bias bias; ++ ++ neighbor = lh.node; ++ h->result = zload(neighbor); ++ if (h->result != 0) { ++ result = LOOKUP_DONE; ++ break; ++ } ++ ++ nplug = neighbor->nplug; ++ ++ coord_init_zero(&crd); ++ bias = h->bias; ++ h->bias = FIND_EXACT; ++ h->result = ++ nplug->lookup(neighbor, h->key, h->bias, &crd); ++ h->bias = bias; ++ ++ if (h->result == NS_NOT_FOUND) { ++ case -E_NO_NEIGHBOR: ++ h->result = CBK_COORD_FOUND; ++ if (!(h->flags & CBK_IN_CACHE)) ++ cbk_cache_add(node); ++ default: /* some other error */ ++ result = LOOKUP_DONE; ++ } else if (h->result == NS_FOUND) { ++ read_lock_dk(znode_get_tree(neighbor)); ++ h->rd_key = *znode_get_ld_key(node); ++ leftmost_key_in_node(neighbor, &h->ld_key); ++ read_unlock_dk(znode_get_tree(neighbor)); ++ h->flags |= CBK_DKSET; ++ ++ h->block = *znode_get_block(neighbor); ++ /* clear coord -> node so that cbk_level_lookup() ++ wouldn't overwrite parent hint in neighbor. ++ ++ Parent hint was set up by ++ reiser4_get_left_neighbor() ++ */ ++ /* FIXME: why do we have to spinlock here? */ ++ write_lock_tree(znode_get_tree(neighbor)); ++ h->coord->node = NULL; ++ write_unlock_tree(znode_get_tree(neighbor)); ++ result = LOOKUP_CONT; ++ } else { ++ result = LOOKUP_DONE; ++ } ++ if (neighbor != NULL) ++ zrelse(neighbor); ++ } ++ } ++ done_lh(&lh); ++ return result; ++} ++ ++/* debugging aid: return symbolic name of search bias */ ++static const char *bias_name(lookup_bias bias /* bias to get name of */ ) ++{ ++ if (bias == FIND_EXACT) ++ return "exact"; ++ else if (bias == FIND_MAX_NOT_MORE_THAN) ++ return "left-slant"; ++/* else if( bias == RIGHT_SLANT_BIAS ) */ ++/* return "right-bias"; */ ++ else { ++ static char buf[30]; ++ ++ sprintf(buf, "unknown: %i", bias); ++ return buf; ++ } ++} ++ ++#if REISER4_DEBUG ++/* debugging aid: print human readable information about @p */ ++void print_coord_content(const char *prefix /* prefix to print */ , ++ coord_t * p /* coord to print */ ) ++{ ++ reiser4_key key; ++ ++ if (p == NULL) { ++ printk("%s: null\n", prefix); ++ return; ++ } ++ if ((p->node != NULL) && znode_is_loaded(p->node) ++ && coord_is_existing_item(p)) ++ printk("%s: data: %p, length: %i\n", prefix, ++ item_body_by_coord(p), item_length_by_coord(p)); ++ if (znode_is_loaded(p->node)) { ++ item_key_by_coord(p, &key); ++ reiser4_print_key(prefix, &key); ++ } ++} ++ ++/* debugging aid: print human readable information about @block */ ++void reiser4_print_address(const char *prefix /* prefix to print */ , ++ const reiser4_block_nr * block /* block number to print */ ) ++{ ++ printk("%s: %s\n", prefix, sprint_address(block)); ++} ++#endif ++ ++/* return string containing human readable representation of @block */ ++char *sprint_address(const reiser4_block_nr * ++ block /* block number to print */ ) ++{ ++ static char address[30]; ++ ++ if (block == NULL) ++ sprintf(address, "null"); ++ else if (reiser4_blocknr_is_fake(block)) ++ sprintf(address, "%llx", (unsigned long long)(*block)); ++ else ++ sprintf(address, "%llu", (unsigned long long)(*block)); ++ return address; ++} ++ ++/* release parent node during traversal */ ++static void put_parent(cbk_handle * h /* search handle */ ) ++{ ++ assert("nikita-383", h != NULL); ++ if (h->parent_lh->node != NULL) { ++ longterm_unlock_znode(h->parent_lh); ++ } ++} ++ ++/* helper function used by coord_by_key(): release reference to parent znode ++ stored in handle before processing its child. */ ++static void hput(cbk_handle * h /* search handle */ ) ++{ ++ assert("nikita-385", h != NULL); ++ done_lh(h->parent_lh); ++ done_lh(h->active_lh); ++} ++ ++/* Helper function used by cbk(): update delimiting keys of child node (stored ++ in h->active_lh->node) using key taken from parent on the parent level. */ ++static int setup_delimiting_keys(cbk_handle * h /* search handle */ ) ++{ ++ znode *active; ++ reiser4_tree *tree; ++ ++ assert("nikita-1088", h != NULL); ++ ++ active = h->active_lh->node; ++ ++ /* fast check without taking dk lock. This is safe, because ++ * JNODE_DKSET is never cleared once set. */ ++ if (!ZF_ISSET(active, JNODE_DKSET)) { ++ tree = znode_get_tree(active); ++ write_lock_dk(tree); ++ if (!ZF_ISSET(active, JNODE_DKSET)) { ++ znode_set_ld_key(active, &h->ld_key); ++ znode_set_rd_key(active, &h->rd_key); ++ ZF_SET(active, JNODE_DKSET); ++ } ++ write_unlock_dk(tree); ++ return 1; ++ } ++ return 0; ++} ++ ++/* true if @block makes sense for the @tree. Used to detect corrupted node ++ * pointers */ ++static int ++block_nr_is_correct(reiser4_block_nr * block /* block number to check */ , ++ reiser4_tree * tree /* tree to check against */ ) ++{ ++ assert("nikita-757", block != NULL); ++ assert("nikita-758", tree != NULL); ++ ++ /* check to see if it exceeds the size of the device. */ ++ return reiser4_blocknr_is_sane_for(tree->super, block); ++} ++ ++/* check consistency of fields */ ++static int sanity_check(cbk_handle * h /* search handle */ ) ++{ ++ assert("nikita-384", h != NULL); ++ ++ if (h->level < h->stop_level) { ++ h->error = "Buried under leaves"; ++ h->result = RETERR(-EIO); ++ return LOOKUP_DONE; ++ } else if (!block_nr_is_correct(&h->block, h->tree)) { ++ h->error = "bad block number"; ++ h->result = RETERR(-EIO); ++ return LOOKUP_DONE; ++ } else ++ return 0; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/status_flags.c b/fs/reiser4/status_flags.c +new file mode 100644 +index 0000000..b32f89a +--- /dev/null ++++ b/fs/reiser4/status_flags.c +@@ -0,0 +1,175 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Functions that deal with reiser4 status block, query status and update it, if needed */ ++ ++#include ++#include ++#include ++#include ++#include "debug.h" ++#include "dformat.h" ++#include "status_flags.h" ++#include "super.h" ++ ++/* This is our end I/O handler that marks page uptodate if IO was successful. It also ++ unconditionally unlocks the page, so we can see that io was done. ++ We do not free bio, because we hope to reuse that. */ ++static int reiser4_status_endio(struct bio *bio, unsigned int bytes_done, ++ int err) ++{ ++ if (bio->bi_size) ++ return 1; ++ ++ if (test_bit(BIO_UPTODATE, &bio->bi_flags)) { ++ SetPageUptodate(bio->bi_io_vec->bv_page); ++ } else { ++ ClearPageUptodate(bio->bi_io_vec->bv_page); ++ SetPageError(bio->bi_io_vec->bv_page); ++ } ++ unlock_page(bio->bi_io_vec->bv_page); ++ return 0; ++} ++ ++/* Initialise status code. This is expected to be called from the disk format ++ code. block paremeter is where status block lives. */ ++int reiser4_status_init(reiser4_block_nr block) ++{ ++ struct super_block *sb = reiser4_get_current_sb(); ++ struct reiser4_status *statuspage; ++ struct bio *bio; ++ struct page *page; ++ ++ get_super_private(sb)->status_page = NULL; ++ get_super_private(sb)->status_bio = NULL; ++ ++ page = alloc_pages(reiser4_ctx_gfp_mask_get(), 0); ++ if (!page) ++ return -ENOMEM; ++ ++ bio = bio_alloc(reiser4_ctx_gfp_mask_get(), 1); ++ if (bio != NULL) { ++ bio->bi_sector = block * (sb->s_blocksize >> 9); ++ bio->bi_bdev = sb->s_bdev; ++ bio->bi_io_vec[0].bv_page = page; ++ bio->bi_io_vec[0].bv_len = sb->s_blocksize; ++ bio->bi_io_vec[0].bv_offset = 0; ++ bio->bi_vcnt = 1; ++ bio->bi_size = sb->s_blocksize; ++ bio->bi_end_io = reiser4_status_endio; ++ } else { ++ __free_pages(page, 0); ++ return -ENOMEM; ++ } ++ lock_page(page); ++ submit_bio(READ, bio); ++ blk_run_address_space(reiser4_get_super_fake(sb)->i_mapping); ++ wait_on_page_locked(page); ++ if (!PageUptodate(page)) { ++ warning("green-2007", ++ "I/O error while tried to read status page\n"); ++ return -EIO; ++ } ++ ++ statuspage = (struct reiser4_status *)kmap_atomic(page, KM_USER0); ++ if (memcmp ++ (statuspage->magic, REISER4_STATUS_MAGIC, ++ sizeof(REISER4_STATUS_MAGIC))) { ++ /* Magic does not match. */ ++ kunmap_atomic((char *)statuspage, KM_USER0); ++ warning("green-2008", "Wrong magic in status block\n"); ++ __free_pages(page, 0); ++ bio_put(bio); ++ return -EINVAL; ++ } ++ kunmap_atomic((char *)statuspage, KM_USER0); ++ ++ get_super_private(sb)->status_page = page; ++ get_super_private(sb)->status_bio = bio; ++ return 0; ++} ++ ++/* Query the status of fs. Returns if the FS can be safely mounted. ++ Also if "status" and "extended" parameters are given, it will fill ++ actual parts of status from disk there. */ ++int reiser4_status_query(u64 * status, u64 * extended) ++{ ++ struct super_block *sb = reiser4_get_current_sb(); ++ struct reiser4_status *statuspage; ++ int retval; ++ ++ if (!get_super_private(sb)->status_page) { // No status page? ++ return REISER4_STATUS_MOUNT_UNKNOWN; ++ } ++ statuspage = (struct reiser4_status *) ++ kmap_atomic(get_super_private(sb)->status_page, KM_USER0); ++ switch ((long)le64_to_cpu(get_unaligned(&statuspage->status))) { // FIXME: this cast is a hack for 32 bit arches to work. ++ case REISER4_STATUS_OK: ++ retval = REISER4_STATUS_MOUNT_OK; ++ break; ++ case REISER4_STATUS_CORRUPTED: ++ retval = REISER4_STATUS_MOUNT_WARN; ++ break; ++ case REISER4_STATUS_DAMAGED: ++ case REISER4_STATUS_DESTROYED: ++ case REISER4_STATUS_IOERROR: ++ retval = REISER4_STATUS_MOUNT_RO; ++ break; ++ default: ++ retval = REISER4_STATUS_MOUNT_UNKNOWN; ++ break; ++ } ++ ++ if (status) ++ *status = le64_to_cpu(get_unaligned(&statuspage->status)); ++ if (extended) ++ *extended = le64_to_cpu(get_unaligned(&statuspage->extended_status)); ++ ++ kunmap_atomic((char *)statuspage, KM_USER0); ++ return retval; ++} ++ ++/* This function should be called when something bad happens (e.g. from reiser4_panic). ++ It fills the status structure and tries to push it to disk. */ ++int reiser4_status_write(__u64 status, __u64 extended_status, char *message) ++{ ++ struct super_block *sb = reiser4_get_current_sb(); ++ struct reiser4_status *statuspage; ++ struct bio *bio = get_super_private(sb)->status_bio; ++ ++ if (!get_super_private(sb)->status_page) { // No status page? ++ return -1; ++ } ++ statuspage = (struct reiser4_status *) ++ kmap_atomic(get_super_private(sb)->status_page, KM_USER0); ++ ++ put_unaligned(cpu_to_le64(status), &statuspage->status); ++ put_unaligned(cpu_to_le64(extended_status), &statuspage->extended_status); ++ strncpy(statuspage->texterror, message, REISER4_TEXTERROR_LEN); ++ ++ kunmap_atomic((char *)statuspage, KM_USER0); ++ bio->bi_bdev = sb->s_bdev; ++ bio->bi_io_vec[0].bv_page = get_super_private(sb)->status_page; ++ bio->bi_io_vec[0].bv_len = sb->s_blocksize; ++ bio->bi_io_vec[0].bv_offset = 0; ++ bio->bi_vcnt = 1; ++ bio->bi_size = sb->s_blocksize; ++ bio->bi_end_io = reiser4_status_endio; ++ lock_page(get_super_private(sb)->status_page); // Safe as nobody should touch our page. ++ /* We can block now, but we have no other choice anyway */ ++ submit_bio(WRITE, bio); ++ blk_run_address_space(reiser4_get_super_fake(sb)->i_mapping); ++ return 0; // We do not wait for io to finish. ++} ++ ++/* Frees the page with status and bio structure. Should be called by disk format at umount time */ ++int reiser4_status_finish(void) ++{ ++ struct super_block *sb = reiser4_get_current_sb(); ++ ++ __free_pages(get_super_private(sb)->status_page, 0); ++ get_super_private(sb)->status_page = NULL; ++ bio_put(get_super_private(sb)->status_bio); ++ get_super_private(sb)->status_bio = NULL; ++ return 0; ++} +diff --git a/fs/reiser4/status_flags.h b/fs/reiser4/status_flags.h +new file mode 100644 +index 0000000..6cfa5ad +--- /dev/null ++++ b/fs/reiser4/status_flags.h +@@ -0,0 +1,43 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Here we declare structures and flags that store reiser4 status on disk. ++ The status that helps us to find out if the filesystem is valid or if it ++ contains some critical, or not so critical errors */ ++ ++#if !defined( __REISER4_STATUS_FLAGS_H__ ) ++#define __REISER4_STATUS_FLAGS_H__ ++ ++#include "dformat.h" ++/* These are major status flags */ ++#define REISER4_STATUS_OK 0 ++#define REISER4_STATUS_CORRUPTED 0x1 ++#define REISER4_STATUS_DAMAGED 0x2 ++#define REISER4_STATUS_DESTROYED 0x4 ++#define REISER4_STATUS_IOERROR 0x8 ++ ++/* Return values for reiser4_status_query() */ ++#define REISER4_STATUS_MOUNT_OK 0 ++#define REISER4_STATUS_MOUNT_WARN 1 ++#define REISER4_STATUS_MOUNT_RO 2 ++#define REISER4_STATUS_MOUNT_UNKNOWN -1 ++ ++#define REISER4_TEXTERROR_LEN 256 ++ ++#define REISER4_STATUS_MAGIC "ReiSeR4StATusBl" ++/* We probably need to keep its size under sector size which is 512 bytes */ ++struct reiser4_status { ++ char magic[16]; ++ d64 status; /* Current FS state */ ++ d64 extended_status; /* Any additional info that might have sense in addition to "status". E.g. ++ last sector where io error happened if status is "io error encountered" */ ++ d64 stacktrace[10]; /* Last ten functional calls made (addresses) */ ++ char texterror[REISER4_TEXTERROR_LEN]; /* Any error message if appropriate, otherwise filled with zeroes */ ++}; ++ ++int reiser4_status_init(reiser4_block_nr block); ++int reiser4_status_query(u64 * status, u64 * extended); ++int reiser4_status_write(u64 status, u64 extended_status, char *message); ++int reiser4_status_finish(void); ++ ++#endif +diff --git a/fs/reiser4/super.c b/fs/reiser4/super.c +new file mode 100644 +index 0000000..bc4113e +--- /dev/null ++++ b/fs/reiser4/super.c +@@ -0,0 +1,316 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Super-block manipulations. */ ++ ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "plugin/security/perm.h" ++#include "plugin/space/space_allocator.h" ++#include "plugin/plugin.h" ++#include "tree.h" ++#include "vfs_ops.h" ++#include "super.h" ++#include "reiser4.h" ++ ++#include /* for __u?? */ ++#include /* for struct super_block */ ++ ++static __u64 reserved_for_gid(const struct super_block *super, gid_t gid); ++static __u64 reserved_for_uid(const struct super_block *super, uid_t uid); ++static __u64 reserved_for_root(const struct super_block *super); ++ ++/* Return reiser4-specific part of super block */ ++reiser4_super_info_data *get_super_private_nocheck(const struct super_block *super /* super block ++ * queried */ ) ++{ ++ return (reiser4_super_info_data *) super->s_fs_info; ++} ++ ++/* Return reiser4 fstype: value that is returned in ->f_type field by statfs() */ ++long reiser4_statfs_type(const struct super_block *super UNUSED_ARG) ++{ ++ assert("nikita-448", super != NULL); ++ assert("nikita-449", is_reiser4_super(super)); ++ return (long)REISER4_SUPER_MAGIC; ++} ++ ++/* functions to read/modify fields of reiser4_super_info_data */ ++ ++/* get number of blocks in file system */ ++__u64 reiser4_block_count(const struct super_block *super /* super block ++ queried */ ) ++{ ++ assert("vs-494", super != NULL); ++ assert("vs-495", is_reiser4_super(super)); ++ return get_super_private(super)->block_count; ++} ++ ++#if REISER4_DEBUG ++/* ++ * number of blocks in the current file system ++ */ ++__u64 reiser4_current_block_count(void) ++{ ++ return get_current_super_private()->block_count; ++} ++#endif /* REISER4_DEBUG */ ++ ++/* set number of block in filesystem */ ++void reiser4_set_block_count(const struct super_block *super, __u64 nr) ++{ ++ assert("vs-501", super != NULL); ++ assert("vs-502", is_reiser4_super(super)); ++ get_super_private(super)->block_count = nr; ++ /* ++ * The proper calculation of the reserved space counter (%5 of device ++ * block counter) we need a 64 bit division which is missing in Linux ++ * on i386 platform. Because we do not need a precise calculation here ++ * we can replace a div64 operation by this combination of ++ * multiplication and shift: 51. / (2^10) == .0498 . ++ * FIXME: this is a bug. It comes up only for very small filesystems ++ * which probably are never used. Nevertheless, it is a bug. Number of ++ * reserved blocks must be not less than maximal number of blocks which ++ * get grabbed with BA_RESERVED. ++ */ ++ get_super_private(super)->blocks_reserved = ((nr * 51) >> 10); ++} ++ ++/* amount of blocks used (allocated for data) in file system */ ++__u64 reiser4_data_blocks(const struct super_block *super /* super block ++ queried */ ) ++{ ++ assert("nikita-452", super != NULL); ++ assert("nikita-453", is_reiser4_super(super)); ++ return get_super_private(super)->blocks_used; ++} ++ ++/* set number of block used in filesystem */ ++void reiser4_set_data_blocks(const struct super_block *super, __u64 nr) ++{ ++ assert("vs-503", super != NULL); ++ assert("vs-504", is_reiser4_super(super)); ++ get_super_private(super)->blocks_used = nr; ++} ++ ++/* amount of free blocks in file system */ ++__u64 reiser4_free_blocks(const struct super_block *super /* super block ++ queried */ ) ++{ ++ assert("nikita-454", super != NULL); ++ assert("nikita-455", is_reiser4_super(super)); ++ return get_super_private(super)->blocks_free; ++} ++ ++/* set number of blocks free in filesystem */ ++void reiser4_set_free_blocks(const struct super_block *super, __u64 nr) ++{ ++ assert("vs-505", super != NULL); ++ assert("vs-506", is_reiser4_super(super)); ++ get_super_private(super)->blocks_free = nr; ++} ++ ++/* get mkfs unique identifier */ ++__u32 reiser4_mkfs_id(const struct super_block *super /* super block ++ queried */ ) ++{ ++ assert("vpf-221", super != NULL); ++ assert("vpf-222", is_reiser4_super(super)); ++ return get_super_private(super)->mkfs_id; ++} ++ ++/* amount of free blocks in file system */ ++__u64 reiser4_free_committed_blocks(const struct super_block *super) ++{ ++ assert("vs-497", super != NULL); ++ assert("vs-498", is_reiser4_super(super)); ++ return get_super_private(super)->blocks_free_committed; ++} ++ ++/* amount of blocks in the file system reserved for @uid and @gid */ ++long reiser4_reserved_blocks(const struct super_block *super /* super block ++ queried */ , ++ uid_t uid /* user id */ , ++ gid_t gid /* group id */ ) ++{ ++ long reserved; ++ ++ assert("nikita-456", super != NULL); ++ assert("nikita-457", is_reiser4_super(super)); ++ ++ reserved = 0; ++ if (REISER4_SUPPORT_GID_SPACE_RESERVATION) ++ reserved += reserved_for_gid(super, gid); ++ if (REISER4_SUPPORT_UID_SPACE_RESERVATION) ++ reserved += reserved_for_uid(super, uid); ++ if (REISER4_SUPPORT_ROOT_SPACE_RESERVATION && (uid == 0)) ++ reserved += reserved_for_root(super); ++ return reserved; ++} ++ ++/* get/set value of/to grabbed blocks counter */ ++__u64 reiser4_grabbed_blocks(const struct super_block * super) ++{ ++ assert("zam-512", super != NULL); ++ assert("zam-513", is_reiser4_super(super)); ++ ++ return get_super_private(super)->blocks_grabbed; ++} ++ ++__u64 reiser4_flush_reserved(const struct super_block * super) ++{ ++ assert("vpf-285", super != NULL); ++ assert("vpf-286", is_reiser4_super(super)); ++ ++ return get_super_private(super)->blocks_flush_reserved; ++} ++ ++/* get/set value of/to counter of fake allocated formatted blocks */ ++__u64 reiser4_fake_allocated(const struct super_block * super) ++{ ++ assert("zam-516", super != NULL); ++ assert("zam-517", is_reiser4_super(super)); ++ ++ return get_super_private(super)->blocks_fake_allocated; ++} ++ ++/* get/set value of/to counter of fake allocated unformatted blocks */ ++__u64 reiser4_fake_allocated_unformatted(const struct super_block * super) ++{ ++ assert("zam-516", super != NULL); ++ assert("zam-517", is_reiser4_super(super)); ++ ++ return get_super_private(super)->blocks_fake_allocated_unformatted; ++} ++ ++/* get/set value of/to counter of clustered blocks */ ++__u64 reiser4_clustered_blocks(const struct super_block * super) ++{ ++ assert("edward-601", super != NULL); ++ assert("edward-602", is_reiser4_super(super)); ++ ++ return get_super_private(super)->blocks_clustered; ++} ++ ++/* space allocator used by this file system */ ++reiser4_space_allocator * reiser4_get_space_allocator(const struct super_block ++ *super) ++{ ++ assert("nikita-1965", super != NULL); ++ assert("nikita-1966", is_reiser4_super(super)); ++ return &get_super_private(super)->space_allocator; ++} ++ ++/* return fake inode used to bind formatted nodes in the page cache */ ++struct inode *reiser4_get_super_fake(const struct super_block *super /* super block ++ queried */ ) ++{ ++ assert("nikita-1757", super != NULL); ++ return get_super_private(super)->fake; ++} ++ ++/* return fake inode used to bind copied on capture nodes in the page cache */ ++struct inode *reiser4_get_cc_fake(const struct super_block *super /* super block ++ queried */ ) ++{ ++ assert("nikita-1757", super != NULL); ++ return get_super_private(super)->cc; ++} ++ ++/* return fake inode used to bind bitmaps and journlal heads */ ++struct inode *reiser4_get_bitmap_fake(const struct super_block *super) ++{ ++ assert("nikita-17571", super != NULL); ++ return get_super_private(super)->bitmap; ++} ++ ++/* tree used by this file system */ ++reiser4_tree *reiser4_get_tree(const struct super_block * super /* super block ++ * queried */ ) ++{ ++ assert("nikita-460", super != NULL); ++ assert("nikita-461", is_reiser4_super(super)); ++ return &get_super_private(super)->tree; ++} ++ ++/* Check that @super is (looks like) reiser4 super block. This is mainly for ++ use in assertions. */ ++int is_reiser4_super(const struct super_block *super /* super block ++ * queried */ ) ++{ ++ return ++ super != NULL && ++ get_super_private(super) != NULL && ++ super->s_op == &(get_super_private(super)->ops.super); ++} ++ ++int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f) ++{ ++ return test_bit((int)f, &get_super_private(super)->fs_flags); ++} ++ ++/* amount of blocks reserved for given group in file system */ ++static __u64 reserved_for_gid(const struct super_block *super UNUSED_ARG /* super ++ * block ++ * queried */ , ++ gid_t gid UNUSED_ARG /* group id */ ) ++{ ++ return 0; ++} ++ ++/* amount of blocks reserved for given user in file system */ ++static __u64 reserved_for_uid(const struct super_block *super UNUSED_ARG /* super ++ block ++ queried */ , ++ uid_t uid UNUSED_ARG /* user id */ ) ++{ ++ return 0; ++} ++ ++/* amount of blocks reserved for super user in file system */ ++static __u64 reserved_for_root(const struct super_block *super UNUSED_ARG /* super ++ block ++ queried */ ) ++{ ++ return 0; ++} ++ ++/* ++ * true if block number @blk makes sense for the file system at @super. ++ */ ++int ++reiser4_blocknr_is_sane_for(const struct super_block *super, ++ const reiser4_block_nr * blk) ++{ ++ reiser4_super_info_data *sbinfo; ++ ++ assert("nikita-2957", super != NULL); ++ assert("nikita-2958", blk != NULL); ++ ++ if (reiser4_blocknr_is_fake(blk)) ++ return 1; ++ ++ sbinfo = get_super_private(super); ++ return *blk < sbinfo->block_count; ++} ++ ++#if REISER4_DEBUG ++/* ++ * true, if block number @blk makes sense for the current file system ++ */ ++int reiser4_blocknr_is_sane(const reiser4_block_nr * blk) ++{ ++ return reiser4_blocknr_is_sane_for(reiser4_get_current_sb(), blk); ++} ++#endif /* REISER4_DEBUG */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/super.h b/fs/reiser4/super.h +new file mode 100644 +index 0000000..120f021 +--- /dev/null ++++ b/fs/reiser4/super.h +@@ -0,0 +1,464 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Super-block functions. See super.c for details. */ ++ ++#if !defined( __REISER4_SUPER_H__ ) ++#define __REISER4_SUPER_H__ ++ ++#include "tree.h" ++#include "entd.h" ++#include "wander.h" ++#include "fsdata.h" ++#include "plugin/object.h" ++#include "plugin/space/space_allocator.h" ++ ++/* ++ * Flush algorithms parameters. ++ */ ++typedef struct { ++ unsigned relocate_threshold; ++ unsigned relocate_distance; ++ unsigned written_threshold; ++ unsigned scan_maxnodes; ++} flush_params; ++ ++typedef enum { ++ /* ++ * True if this file system doesn't support hard-links (multiple names) ++ * for directories: this is default UNIX behavior. ++ * ++ * If hard-links on directoires are not allowed, file system is Acyclic ++ * Directed Graph (modulo dot, and dotdot, of course). ++ * ++ * This is used by reiser4_link(). ++ */ ++ REISER4_ADG = 0, ++ /* ++ * set if all nodes in internal tree have the same node layout plugin. ++ * If so, znode_guess_plugin() will return tree->node_plugin in stead ++ * of guessing plugin by plugin id stored in the node. ++ */ ++ REISER4_ONE_NODE_PLUGIN = 1, ++ /* if set, bsd gid assignment is supported. */ ++ REISER4_BSD_GID = 2, ++ /* [mac]_time are 32 bit in inode */ ++ REISER4_32_BIT_TIMES = 3, ++ /* load all bitmap blocks at mount time */ ++ REISER4_DONT_LOAD_BITMAP = 5, ++ /* enforce atomicity during write(2) */ ++ REISER4_ATOMIC_WRITE = 6, ++ /* don't use write barriers in the log writer code. */ ++ REISER4_NO_WRITE_BARRIER = 7 ++} reiser4_fs_flag; ++ ++/* ++ * VFS related operation vectors. ++ */ ++typedef struct object_ops { ++ struct super_operations super; ++ struct dentry_operations dentry; ++ struct export_operations export; ++} object_ops; ++ ++/* reiser4-specific part of super block ++ ++ Locking ++ ++ Fields immutable after mount: ++ ++ ->oid* ++ ->space* ++ ->default_[ug]id ++ ->mkfs_id ++ ->trace_flags ++ ->debug_flags ++ ->fs_flags ++ ->df_plug ++ ->optimal_io_size ++ ->plug ++ ->flush ++ ->u (bad name) ++ ->txnmgr ++ ->ra_params ++ ->fsuid ++ ->journal_header ++ ->journal_footer ++ ++ Fields protected by ->lnode_guard ++ ++ ->lnode_htable ++ ++ Fields protected by per-super block spin lock ++ ++ ->block_count ++ ->blocks_used ++ ->blocks_free ++ ->blocks_free_committed ++ ->blocks_grabbed ++ ->blocks_fake_allocated_unformatted ++ ->blocks_fake_allocated ++ ->blocks_flush_reserved ++ ->eflushed ++ ->blocknr_hint_default ++ ++ After journal replaying during mount, ++ ++ ->last_committed_tx ++ ++ is protected by ->tmgr.commit_mutex ++ ++ Invariants involving this data-type: ++ ++ [sb-block-counts] ++ [sb-grabbed] ++ [sb-fake-allocated] ++*/ ++struct reiser4_super_info_data { ++ /* ++ * guard spinlock which protects reiser4 super block fields (currently ++ * blocks_free, blocks_free_committed) ++ */ ++ spinlock_t guard; ++ ++ /* next oid that will be returned by oid_allocate() */ ++ oid_t next_to_use; ++ /* total number of used oids */ ++ oid_t oids_in_use; ++ ++ /* space manager plugin */ ++ reiser4_space_allocator space_allocator; ++ ++ /* reiser4 internal tree */ ++ reiser4_tree tree; ++ ++ /* ++ * default user id used for light-weight files without their own ++ * stat-data. ++ */ ++ uid_t default_uid; ++ ++ /* ++ * default group id used for light-weight files without their own ++ * stat-data. ++ */ ++ gid_t default_gid; ++ ++ /* mkfs identifier generated at mkfs time. */ ++ __u32 mkfs_id; ++ /* amount of blocks in a file system */ ++ __u64 block_count; ++ ++ /* inviolable reserve */ ++ __u64 blocks_reserved; ++ ++ /* amount of blocks used by file system data and meta-data. */ ++ __u64 blocks_used; ++ ++ /* ++ * amount of free blocks. This is "working" free blocks counter. It is ++ * like "working" bitmap, please see block_alloc.c for description. ++ */ ++ __u64 blocks_free; ++ ++ /* ++ * free block count for fs committed state. This is "commit" version of ++ * free block counter. ++ */ ++ __u64 blocks_free_committed; ++ ++ /* ++ * number of blocks reserved for further allocation, for all ++ * threads. ++ */ ++ __u64 blocks_grabbed; ++ ++ /* number of fake allocated unformatted blocks in tree. */ ++ __u64 blocks_fake_allocated_unformatted; ++ ++ /* number of fake allocated formatted blocks in tree. */ ++ __u64 blocks_fake_allocated; ++ ++ /* number of blocks reserved for flush operations. */ ++ __u64 blocks_flush_reserved; ++ ++ /* number of blocks reserved for cluster operations. */ ++ __u64 blocks_clustered; ++ ++ /* unique file-system identifier */ ++ __u32 fsuid; ++ ++ /* On-disk format version. If does not equal to the disk_format ++ plugin version, some format updates (e.g. enlarging plugin ++ set, etc) may have place on mount. */ ++ int version; ++ ++ /* file-system wide flags. See reiser4_fs_flag enum */ ++ unsigned long fs_flags; ++ ++ /* transaction manager */ ++ txn_mgr tmgr; ++ ++ /* ent thread */ ++ entd_context entd; ++ ++ /* fake inode used to bind formatted nodes */ ++ struct inode *fake; ++ /* inode used to bind bitmaps (and journal heads) */ ++ struct inode *bitmap; ++ /* inode used to bind copied on capture nodes */ ++ struct inode *cc; ++ ++ /* disk layout plugin */ ++ disk_format_plugin *df_plug; ++ ++ /* disk layout specific part of reiser4 super info data */ ++ union { ++ format40_super_info format40; ++ } u; ++ ++ /* value we return in st_blksize on stat(2) */ ++ unsigned long optimal_io_size; ++ ++ /* parameters for the flush algorithm */ ++ flush_params flush; ++ ++ /* pointers to jnodes for journal header and footer */ ++ jnode *journal_header; ++ jnode *journal_footer; ++ ++ journal_location jloc; ++ ++ /* head block number of last committed transaction */ ++ __u64 last_committed_tx; ++ ++ /* ++ * we remember last written location for using as a hint for new block ++ * allocation ++ */ ++ __u64 blocknr_hint_default; ++ ++ /* committed number of files (oid allocator state variable ) */ ++ __u64 nr_files_committed; ++ ++ ra_params_t ra_params; ++ ++ /* ++ * A mutex for serializing cut tree operation if out-of-free-space: ++ * the only one cut_tree thread is allowed to grab space from reserved ++ * area (it is 5% of disk space) ++ */ ++ struct mutex delete_mutex; ++ /* task owning ->delete_mutex */ ++ struct task_struct *delete_mutex_owner; ++ ++ /* Diskmap's blocknumber */ ++ __u64 diskmap_block; ++ ++ /* What to do in case of error */ ++ int onerror; ++ ++ /* operations for objects on this file system */ ++ object_ops ops; ++ ++ /* ++ * structure to maintain d_cursors. See plugin/file_ops_readdir.c for ++ * more details ++ */ ++ d_cursor_info d_info; ++ ++#ifdef CONFIG_REISER4_BADBLOCKS ++ /* Alternative master superblock offset (in bytes) */ ++ unsigned long altsuper; ++#endif ++ struct repacker *repacker; ++ struct page *status_page; ++ struct bio *status_bio; ++ ++#if REISER4_DEBUG ++ /* ++ * minimum used blocks value (includes super blocks, bitmap blocks and ++ * other fs reserved areas), depends on fs format and fs size. ++ */ ++ __u64 min_blocks_used; ++ ++ /* ++ * when debugging is on, all jnodes (including znodes, bitmaps, etc.) ++ * are kept on a list anchored at sbinfo->all_jnodes. This list is ++ * protected by sbinfo->all_guard spin lock. This lock should be taken ++ * with _irq modifier, because it is also modified from interrupt ++ * contexts (by RCU). ++ */ ++ spinlock_t all_guard; ++ /* list of all jnodes */ ++ struct list_head all_jnodes; ++#endif ++ struct dentry *debugfs_root; ++}; ++ ++extern reiser4_super_info_data *get_super_private_nocheck(const struct ++ super_block *super); ++ ++/* Return reiser4-specific part of super block */ ++static inline reiser4_super_info_data *get_super_private(const struct ++ super_block *super) ++{ ++ assert("nikita-447", super != NULL); ++ ++ return (reiser4_super_info_data *) super->s_fs_info; ++} ++ ++/* get ent context for the @super */ ++static inline entd_context *get_entd_context(struct super_block *super) ++{ ++ return &get_super_private(super)->entd; ++} ++ ++/* "Current" super-block: main super block used during current system ++ call. Reference to this super block is stored in reiser4_context. */ ++static inline struct super_block *reiser4_get_current_sb(void) ++{ ++ return get_current_context()->super; ++} ++ ++/* Reiser4-specific part of "current" super-block: main super block used ++ during current system call. Reference to this super block is stored in ++ reiser4_context. */ ++static inline reiser4_super_info_data *get_current_super_private(void) ++{ ++ return get_super_private(reiser4_get_current_sb()); ++} ++ ++static inline ra_params_t *get_current_super_ra_params(void) ++{ ++ return &(get_current_super_private()->ra_params); ++} ++ ++/* ++ * true, if file system on @super is read-only ++ */ ++static inline int rofs_super(struct super_block *super) ++{ ++ return super->s_flags & MS_RDONLY; ++} ++ ++/* ++ * true, if @tree represents read-only file system ++ */ ++static inline int rofs_tree(reiser4_tree * tree) ++{ ++ return rofs_super(tree->super); ++} ++ ++/* ++ * true, if file system where @inode lives on, is read-only ++ */ ++static inline int rofs_inode(struct inode *inode) ++{ ++ return rofs_super(inode->i_sb); ++} ++ ++/* ++ * true, if file system where @node lives on, is read-only ++ */ ++static inline int rofs_jnode(jnode * node) ++{ ++ return rofs_tree(jnode_get_tree(node)); ++} ++ ++extern __u64 reiser4_current_block_count(void); ++ ++extern void build_object_ops(struct super_block *super, object_ops * ops); ++ ++#define REISER4_SUPER_MAGIC 0x52345362 /* (*(__u32 *)"R4Sb"); */ ++ ++static inline void spin_lock_reiser4_super(reiser4_super_info_data *sbinfo) ++{ ++ spin_lock(&(sbinfo->guard)); ++} ++ ++static inline void spin_unlock_reiser4_super(reiser4_super_info_data *sbinfo) ++{ ++ assert_spin_locked(&(sbinfo->guard)); ++ spin_unlock(&(sbinfo->guard)); ++} ++ ++extern __u64 reiser4_flush_reserved(const struct super_block *); ++extern int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f); ++extern long reiser4_statfs_type(const struct super_block *super); ++extern __u64 reiser4_block_count(const struct super_block *super); ++extern void reiser4_set_block_count(const struct super_block *super, __u64 nr); ++extern __u64 reiser4_data_blocks(const struct super_block *super); ++extern void reiser4_set_data_blocks(const struct super_block *super, __u64 nr); ++extern __u64 reiser4_free_blocks(const struct super_block *super); ++extern void reiser4_set_free_blocks(const struct super_block *super, __u64 nr); ++extern __u32 reiser4_mkfs_id(const struct super_block *super); ++ ++extern __u64 reiser4_free_committed_blocks(const struct super_block *super); ++ ++extern __u64 reiser4_grabbed_blocks(const struct super_block *); ++extern __u64 reiser4_fake_allocated(const struct super_block *); ++extern __u64 reiser4_fake_allocated_unformatted(const struct super_block *); ++extern __u64 reiser4_clustered_blocks(const struct super_block *); ++ ++extern long reiser4_reserved_blocks(const struct super_block *super, uid_t uid, ++ gid_t gid); ++ ++extern reiser4_space_allocator * ++reiser4_get_space_allocator(const struct super_block *super); ++extern reiser4_oid_allocator * ++reiser4_get_oid_allocator(const struct super_block *super); ++extern struct inode *reiser4_get_super_fake(const struct super_block *super); ++extern struct inode *reiser4_get_cc_fake(const struct super_block *super); ++extern struct inode *reiser4_get_bitmap_fake(const struct super_block *super); ++extern reiser4_tree *reiser4_get_tree(const struct super_block *super); ++extern int is_reiser4_super(const struct super_block *super); ++ ++extern int reiser4_blocknr_is_sane(const reiser4_block_nr * blk); ++extern int reiser4_blocknr_is_sane_for(const struct super_block *super, ++ const reiser4_block_nr * blk); ++extern int reiser4_fill_super(struct super_block *s, void *data, int silent); ++extern int reiser4_done_super(struct super_block *s); ++ ++/* step of fill super */ ++extern int reiser4_init_fs_info(struct super_block *); ++extern void reiser4_done_fs_info(struct super_block *); ++extern int reiser4_init_super_data(struct super_block *, char *opt_string); ++extern int reiser4_init_read_super(struct super_block *, int silent); ++extern int reiser4_init_root_inode(struct super_block *); ++extern reiser4_plugin *get_default_plugin(pset_member memb); ++ ++/* Maximal possible object id. */ ++#define ABSOLUTE_MAX_OID ((oid_t)~0) ++ ++#define OIDS_RESERVED ( 1 << 16 ) ++int oid_init_allocator(struct super_block *, oid_t nr_files, oid_t next); ++oid_t oid_allocate(struct super_block *); ++int oid_release(struct super_block *, oid_t); ++oid_t oid_next(const struct super_block *); ++void oid_count_allocated(void); ++void oid_count_released(void); ++long oids_used(const struct super_block *); ++ ++#if REISER4_DEBUG ++void print_fs_info(const char *prefix, const struct super_block *); ++#endif ++ ++extern void destroy_reiser4_cache(struct kmem_cache **); ++ ++extern struct super_operations reiser4_super_operations; ++extern struct export_operations reiser4_export_operations; ++extern struct dentry_operations reiser4_dentry_operations; ++ ++/* __REISER4_SUPER_H__ */ ++#endif ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 120 ++ * End: ++ */ +diff --git a/fs/reiser4/super_ops.c b/fs/reiser4/super_ops.c +new file mode 100644 +index 0000000..41e9c1a +--- /dev/null ++++ b/fs/reiser4/super_ops.c +@@ -0,0 +1,730 @@ ++/* Copyright 2005 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++#include "inode.h" ++#include "page_cache.h" ++#include "ktxnmgrd.h" ++#include "flush.h" ++#include "safe_link.h" ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* slab cache for inodes */ ++static struct kmem_cache *inode_cache; ++ ++static struct dentry *reiser4_debugfs_root = NULL; ++ ++/** ++ * init_once - constructor for reiser4 inodes ++ * @obj: inode to be initialized ++ * @cache: cache @obj belongs to ++ * @flags: SLAB flags ++ * ++ * Initialization function to be called when new page is allocated by reiser4 ++ * inode cache. It is set on inode cache creation. ++ */ ++static void init_once(void *obj, struct kmem_cache *cache, unsigned long flags) ++{ ++ reiser4_inode_object *info; ++ ++ info = obj; ++ ++ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == ++ SLAB_CTOR_CONSTRUCTOR) { ++ /* initialize vfs inode */ ++ inode_init_once(&info->vfs_inode); ++ ++ /* ++ * initialize reiser4 specific part fo inode. ++ * NOTE-NIKITA add here initializations for locks, list heads, ++ * etc. that will be added to our private inode part. ++ */ ++ INIT_LIST_HEAD(get_readdir_list(&info->vfs_inode)); ++ init_rwsem(&info->p.conv_sem); ++ /* init semaphore which is used during inode loading */ ++ loading_init_once(&info->p); ++ INIT_RADIX_TREE(jnode_tree_by_reiser4_inode(&info->p), ++ GFP_ATOMIC); ++#if REISER4_DEBUG ++ info->p.nr_jnodes = 0; ++#endif ++ } ++} ++ ++/** ++ * init_inodes - create znode cache ++ * ++ * Initializes slab cache of inodes. It is part of reiser4 module initialization. ++ */ ++static int init_inodes(void) ++{ ++ inode_cache = kmem_cache_create("reiser4_inode", ++ sizeof(reiser4_inode_object), ++ 0, ++ SLAB_HWCACHE_ALIGN | ++ SLAB_RECLAIM_ACCOUNT, init_once, NULL); ++ if (inode_cache == NULL) ++ return RETERR(-ENOMEM); ++ return 0; ++} ++ ++/** ++ * done_inodes - delete inode cache ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++static void done_inodes(void) ++{ ++ destroy_reiser4_cache(&inode_cache); ++} ++ ++/** ++ * reiser4_alloc_inode - alloc_inode of super operations ++ * @super: super block new inode is allocated for ++ * ++ * Allocates new inode, initializes reiser4 specific part of it. ++ */ ++static struct inode *reiser4_alloc_inode(struct super_block *super) ++{ ++ reiser4_inode_object *obj; ++ ++ assert("nikita-1696", super != NULL); ++ obj = kmem_cache_alloc(inode_cache, reiser4_ctx_gfp_mask_get()); ++ if (obj != NULL) { ++ reiser4_inode *info; ++ ++ info = &obj->p; ++ ++ info->pset = plugin_set_get_empty(); ++ info->hset = plugin_set_get_empty(); ++ info->extmask = 0; ++ info->locality_id = 0ull; ++ info->plugin_mask = 0; ++ info->heir_mask = 0; ++#if !REISER4_INO_IS_OID ++ info->oid_hi = 0; ++#endif ++ reiser4_seal_init(&info->sd_seal, NULL, NULL); ++ coord_init_invalid(&info->sd_coord, NULL); ++ info->flags = 0; ++ spin_lock_init(&info->guard); ++ /* this deals with info's loading semaphore */ ++ loading_alloc(info); ++ info->vroot = UBER_TREE_ADDR; ++ return &obj->vfs_inode; ++ } else ++ return NULL; ++} ++ ++/** ++ * reiser4_destroy_inode - destroy_inode of super operations ++ * @inode: inode being destroyed ++ * ++ * Puts reiser4 specific portion of inode, frees memory occupied by inode. ++ */ ++static void reiser4_destroy_inode(struct inode *inode) ++{ ++ reiser4_inode *info; ++ ++ info = reiser4_inode_data(inode); ++ ++ assert("vs-1220", inode_has_no_jnodes(info)); ++ ++ if (!is_bad_inode(inode) && is_inode_loaded(inode)) { ++ file_plugin *fplug = inode_file_plugin(inode); ++ if (fplug->destroy_inode != NULL) ++ fplug->destroy_inode(inode); ++ } ++ reiser4_dispose_cursors(inode); ++ if (info->pset) ++ plugin_set_put(info->pset); ++ if (info->hset) ++ plugin_set_put(info->hset); ++ ++ /* ++ * cannot add similar assertion about ->i_list as prune_icache return ++ * inode into slab with dangling ->list.{next,prev}. This is safe, ++ * because they are re-initialized in the new_inode(). ++ */ ++ assert("nikita-2895", list_empty(&inode->i_dentry)); ++ assert("nikita-2896", hlist_unhashed(&inode->i_hash)); ++ assert("nikita-2898", list_empty_careful(get_readdir_list(inode))); ++ ++ /* this deals with info's loading semaphore */ ++ loading_destroy(info); ++ ++ kmem_cache_free(inode_cache, ++ container_of(info, reiser4_inode_object, p)); ++} ++ ++/** ++ * reiser4_dirty_inode - dirty_inode of super operations ++ * @inode: inode being dirtied ++ * ++ * Updates stat data. ++ */ ++static void reiser4_dirty_inode(struct inode *inode) ++{ ++ int result; ++ ++ if (!is_in_reiser4_context()) ++ return; ++ assert("", !IS_RDONLY(inode)); ++ assert("", (inode_file_plugin(inode)->estimate.update(inode) <= ++ get_current_context()->grabbed_blocks)); ++ ++ result = reiser4_update_sd(inode); ++ if (result) ++ warning("", "failed to dirty inode for %llu: %d", ++ get_inode_oid(inode), result); ++} ++ ++/** ++ * reiser4_delete_inode - delete_inode of super operations ++ * @inode: inode to delete ++ * ++ * Calls file plugin's delete_object method to delete object items from ++ * filesystem tree and calls clear_inode. ++ */ ++static void reiser4_delete_inode(struct inode *inode) ++{ ++ reiser4_context *ctx; ++ file_plugin *fplug; ++ ++ ctx = reiser4_init_context(inode->i_sb); ++ if (IS_ERR(ctx)) { ++ warning("vs-15", "failed to init context"); ++ return; ++ } ++ ++ if (is_inode_loaded(inode)) { ++ fplug = inode_file_plugin(inode); ++ if (fplug != NULL && fplug->delete_object != NULL) ++ fplug->delete_object(inode); ++ } ++ ++ truncate_inode_pages(&inode->i_data, 0); ++ inode->i_blocks = 0; ++ clear_inode(inode); ++ reiser4_exit_context(ctx); ++} ++ ++/** ++ * reiser4_put_super - put_super of super operations ++ * @super: super block to free ++ * ++ * Stops daemons, release resources, umounts in short. ++ */ ++static void reiser4_put_super(struct super_block *super) ++{ ++ reiser4_super_info_data *sbinfo; ++ reiser4_context *ctx; ++ ++ sbinfo = get_super_private(super); ++ assert("vs-1699", sbinfo); ++ ++ debugfs_remove(sbinfo->tmgr.debugfs_atom_count); ++ debugfs_remove(sbinfo->tmgr.debugfs_id_count); ++ debugfs_remove(sbinfo->debugfs_root); ++ ++ ctx = reiser4_init_context(super); ++ if (IS_ERR(ctx)) { ++ warning("vs-17", "failed to init context"); ++ return; ++ } ++ ++ /* have disk format plugin to free its resources */ ++ if (get_super_private(super)->df_plug->release) ++ get_super_private(super)->df_plug->release(super); ++ ++ reiser4_done_formatted_fake(super); ++ ++ /* stop daemons: ktxnmgr and entd */ ++ reiser4_done_entd(super); ++ reiser4_done_ktxnmgrd(super); ++ reiser4_done_txnmgr(&sbinfo->tmgr); ++ ++ reiser4_done_fs_info(super); ++ reiser4_exit_context(ctx); ++} ++ ++/** ++ * reiser4_write_super - write_super of super operations ++ * @super: super block to write ++ * ++ * Captures znode associated with super block, comit all transactions. ++ */ ++static void reiser4_write_super(struct super_block *super) ++{ ++ int ret; ++ reiser4_context *ctx; ++ ++ assert("vs-1700", !rofs_super(super)); ++ ++ ctx = reiser4_init_context(super); ++ if (IS_ERR(ctx)) { ++ warning("vs-16", "failed to init context"); ++ return; ++ } ++ ++ ret = reiser4_capture_super_block(super); ++ if (ret != 0) ++ warning("vs-1701", ++ "reiser4_capture_super_block failed in write_super: %d", ++ ret); ++ ret = txnmgr_force_commit_all(super, 0); ++ if (ret != 0) ++ warning("jmacd-77113", ++ "txn_force failed in write_super: %d", ret); ++ ++ super->s_dirt = 0; ++ ++ reiser4_exit_context(ctx); ++} ++ ++/** ++ * reiser4_statfs - statfs of super operations ++ * @super: super block of file system in queried ++ * @stafs: buffer to fill with statistics ++ * ++ * Returns information about filesystem. ++ */ ++static int reiser4_statfs(struct dentry *dentry, struct kstatfs *statfs) ++{ ++ sector_t total; ++ sector_t reserved; ++ sector_t free; ++ sector_t forroot; ++ sector_t deleted; ++ reiser4_context *ctx; ++ struct super_block *super = dentry->d_sb; ++ ++ assert("nikita-408", super != NULL); ++ assert("nikita-409", statfs != NULL); ++ ++ ctx = reiser4_init_context(super); ++ if (IS_ERR(ctx)) ++ return PTR_ERR(ctx); ++ ++ statfs->f_type = reiser4_statfs_type(super); ++ statfs->f_bsize = super->s_blocksize; ++ ++ /* ++ * 5% of total block space is reserved. This is needed for flush and ++ * for truncates (so that we are able to perform truncate/unlink even ++ * on the otherwise completely full file system). If this reservation ++ * is hidden from statfs(2), users will mistakenly guess that they ++ * have enough free space to complete some operation, which is ++ * frustrating. ++ * ++ * Another possible solution is to subtract ->blocks_reserved from ++ * ->f_bfree, but changing available space seems less intrusive than ++ * letting user to see 5% of disk space to be used directly after ++ * mkfs. ++ */ ++ total = reiser4_block_count(super); ++ reserved = get_super_private(super)->blocks_reserved; ++ deleted = txnmgr_count_deleted_blocks(); ++ free = reiser4_free_blocks(super) + deleted; ++ forroot = reiser4_reserved_blocks(super, 0, 0); ++ ++ /* ++ * These counters may be in inconsistent state because we take the ++ * values without keeping any global spinlock. Here we do a sanity ++ * check that free block counter does not exceed the number of all ++ * blocks. ++ */ ++ if (free > total) ++ free = total; ++ statfs->f_blocks = total - reserved; ++ /* make sure statfs->f_bfree is never larger than statfs->f_blocks */ ++ if (free > reserved) ++ free -= reserved; ++ else ++ free = 0; ++ statfs->f_bfree = free; ++ ++ if (free > forroot) ++ free -= forroot; ++ else ++ free = 0; ++ statfs->f_bavail = free; ++ ++ statfs->f_files = 0; ++ statfs->f_ffree = 0; ++ ++ /* maximal acceptable name length depends on directory plugin. */ ++ assert("nikita-3351", super->s_root->d_inode != NULL); ++ statfs->f_namelen = reiser4_max_filename_len(super->s_root->d_inode); ++ reiser4_exit_context(ctx); ++ return 0; ++} ++ ++/** ++ * reiser4_clear_inode - clear_inode of super operation ++ * @inode: inode about to destroy ++ * ++ * Does sanity checks: being destroyed should have all jnodes detached. ++ */ ++static void reiser4_clear_inode(struct inode *inode) ++{ ++#if REISER4_DEBUG ++ reiser4_inode *r4_inode; ++ ++ r4_inode = reiser4_inode_data(inode); ++ if (!inode_has_no_jnodes(r4_inode)) ++ warning("vs-1732", "reiser4 inode has %ld jnodes\n", ++ r4_inode->nr_jnodes); ++#endif ++} ++ ++/** ++ * reiser4_sync_inodes - sync_inodes of super operations ++ * @super: ++ * @wbc: ++ * ++ * This method is called by background and non-backgound writeback. Reiser4's ++ * implementation uses generic_sync_sb_inodes to call reiser4_writepages for ++ * each of dirty inodes. Reiser4_writepages handles pages dirtied via shared ++ * mapping - dirty pages get into atoms. Writeout is called to flush some ++ * atoms. ++ */ ++static void reiser4_sync_inodes(struct super_block *super, ++ struct writeback_control *wbc) ++{ ++ reiser4_context *ctx; ++ long to_write; ++ ++ if (wbc->for_kupdate) ++ /* reiser4 has its own means of periodical write-out */ ++ return; ++ ++ to_write = wbc->nr_to_write; ++ assert("vs-49", wbc->older_than_this == NULL); ++ ++ ctx = reiser4_init_context(super); ++ if (IS_ERR(ctx)) { ++ warning("vs-13", "failed to init context"); ++ return; ++ } ++ ++ /* ++ * call reiser4_writepages for each of dirty inodes to turn dirty pages ++ * into transactions if they were not yet. ++ */ ++ generic_sync_sb_inodes(super, wbc); ++ ++ /* flush goes here */ ++ wbc->nr_to_write = to_write; ++ reiser4_writeout(super, wbc); ++ ++ /* avoid recursive calls to ->sync_inodes */ ++ context_set_commit_async(ctx); ++ reiser4_exit_context(ctx); ++} ++ ++/** ++ * reiser4_show_options - show_options of super operations ++ * @m: file where to write information ++ * @mnt: mount structure ++ * ++ * Makes reiser4 mount options visible in /proc/mounts. ++ */ ++static int reiser4_show_options(struct seq_file *m, struct vfsmount *mnt) ++{ ++ struct super_block *super; ++ reiser4_super_info_data *sbinfo; ++ ++ super = mnt->mnt_sb; ++ sbinfo = get_super_private(super); ++ ++ seq_printf(m, ",atom_max_size=0x%x", sbinfo->tmgr.atom_max_size); ++ seq_printf(m, ",atom_max_age=0x%x", sbinfo->tmgr.atom_max_age); ++ seq_printf(m, ",atom_min_size=0x%x", sbinfo->tmgr.atom_min_size); ++ seq_printf(m, ",atom_max_flushers=0x%x", ++ sbinfo->tmgr.atom_max_flushers); ++ seq_printf(m, ",cbk_cache_slots=0x%x", ++ sbinfo->tree.cbk_cache.nr_slots); ++ ++ return 0; ++} ++ ++struct super_operations reiser4_super_operations = { ++ .alloc_inode = reiser4_alloc_inode, ++ .destroy_inode = reiser4_destroy_inode, ++ .dirty_inode = reiser4_dirty_inode, ++ .delete_inode = reiser4_delete_inode, ++ .put_super = reiser4_put_super, ++ .write_super = reiser4_write_super, ++ .statfs = reiser4_statfs, ++ .clear_inode = reiser4_clear_inode, ++ .sync_inodes = reiser4_sync_inodes, ++ .show_options = reiser4_show_options ++}; ++ ++/** ++ * fill_super - initialize super block on mount ++ * @super: super block to fill ++ * @data: reiser4 specific mount option ++ * @silent: ++ * ++ * This is to be called by reiser4_get_sb. Mounts filesystem. ++ */ ++static int fill_super(struct super_block *super, void *data, int silent) ++{ ++ reiser4_context ctx; ++ int result; ++ reiser4_super_info_data *sbinfo; ++ ++ assert("zam-989", super != NULL); ++ ++ super->s_op = NULL; ++ init_stack_context(&ctx, super); ++ ++ /* allocate reiser4 specific super block */ ++ if ((result = reiser4_init_fs_info(super)) != 0) ++ goto failed_init_sinfo; ++ ++ sbinfo = get_super_private(super); ++ /* initialize various reiser4 parameters, parse mount options */ ++ if ((result = reiser4_init_super_data(super, data)) != 0) ++ goto failed_init_super_data; ++ ++ /* read reiser4 master super block, initialize disk format plugin */ ++ if ((result = reiser4_init_read_super(super, silent)) != 0) ++ goto failed_init_read_super; ++ ++ /* initialize transaction manager */ ++ reiser4_init_txnmgr(&sbinfo->tmgr); ++ ++ /* initialize ktxnmgrd context and start kernel thread ktxnmrgd */ ++ if ((result = reiser4_init_ktxnmgrd(super)) != 0) ++ goto failed_init_ktxnmgrd; ++ ++ /* initialize entd context and start kernel thread entd */ ++ if ((result = reiser4_init_entd(super)) != 0) ++ goto failed_init_entd; ++ ++ /* initialize address spaces for formatted nodes and bitmaps */ ++ if ((result = reiser4_init_formatted_fake(super)) != 0) ++ goto failed_init_formatted_fake; ++ ++ /* initialize disk format plugin */ ++ if ((result = get_super_private(super)->df_plug->init_format(super, data)) != 0 ) ++ goto failed_init_disk_format; ++ ++ /* ++ * There are some 'committed' versions of reiser4 super block counters, ++ * which correspond to reiser4 on-disk state. These counters are ++ * initialized here ++ */ ++ sbinfo->blocks_free_committed = sbinfo->blocks_free; ++ sbinfo->nr_files_committed = oids_used(super); ++ ++ /* get inode of root directory */ ++ if ((result = reiser4_init_root_inode(super)) != 0) ++ goto failed_init_root_inode; ++ ++ if ((result = get_super_private(super)->df_plug->version_update(super)) != 0 ) ++ goto failed_update_format_version; ++ ++ process_safelinks(super); ++ reiser4_exit_context(&ctx); ++ ++ sbinfo->debugfs_root = debugfs_create_dir(super->s_id, ++ reiser4_debugfs_root); ++ if (sbinfo->debugfs_root) { ++ sbinfo->tmgr.debugfs_atom_count = ++ debugfs_create_u32("atom_count", S_IFREG|S_IRUSR, ++ sbinfo->debugfs_root, ++ &sbinfo->tmgr.atom_count); ++ sbinfo->tmgr.debugfs_id_count = ++ debugfs_create_u32("id_count", S_IFREG|S_IRUSR, ++ sbinfo->debugfs_root, ++ &sbinfo->tmgr.id_count); ++ } ++ return 0; ++ ++ failed_update_format_version: ++ failed_init_root_inode: ++ if (sbinfo->df_plug->release) ++ sbinfo->df_plug->release(super); ++ failed_init_disk_format: ++ reiser4_done_formatted_fake(super); ++ failed_init_formatted_fake: ++ reiser4_done_entd(super); ++ failed_init_entd: ++ reiser4_done_ktxnmgrd(super); ++ failed_init_ktxnmgrd: ++ reiser4_done_txnmgr(&sbinfo->tmgr); ++ failed_init_read_super: ++ failed_init_super_data: ++ reiser4_done_fs_info(super); ++ failed_init_sinfo: ++ reiser4_exit_context(&ctx); ++ return result; ++} ++ ++/** ++ * reiser4_get_sb - get_sb of file_system_type operations ++ * @fs_type: ++ * @flags: mount flags MS_RDONLY, MS_VERBOSE, etc ++ * @dev_name: block device file name ++ * @data: specific mount options ++ * ++ * Reiser4 mount entry. ++ */ ++static int reiser4_get_sb(struct file_system_type *fs_type, ++ int flags, ++ const char *dev_name, ++ void *data, ++ struct vfsmount *mnt) ++{ ++ return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); ++} ++ ++/* structure describing the reiser4 filesystem implementation */ ++static struct file_system_type reiser4_fs_type = { ++ .owner = THIS_MODULE, ++ .name = "reiser4", ++ .fs_flags = FS_REQUIRES_DEV, ++ .get_sb = reiser4_get_sb, ++ .kill_sb = kill_block_super, ++ .next = NULL ++}; ++ ++void destroy_reiser4_cache(struct kmem_cache **cachep) ++{ ++ kmem_cache_destroy(*cachep); ++ *cachep = NULL; ++} ++ ++/** ++ * init_reiser4 - reiser4 initialization entry point ++ * ++ * Initializes reiser4 slabs, registers reiser4 filesystem type. It is called ++ * on kernel initialization or during reiser4 module load. ++ */ ++static int __init init_reiser4(void) ++{ ++ int result; ++ ++ printk(KERN_INFO ++ "Loading Reiser4. " ++ "See www.namesys.com for a description of Reiser4.\n"); ++ ++ /* initialize slab cache of inodes */ ++ if ((result = init_inodes()) != 0) ++ goto failed_inode_cache; ++ ++ /* initialize cache of znodes */ ++ if ((result = init_znodes()) != 0) ++ goto failed_init_znodes; ++ ++ /* initialize all plugins */ ++ if ((result = init_plugins()) != 0) ++ goto failed_init_plugins; ++ ++ /* initialize cache of plugin_set-s and plugin_set's hash table */ ++ if ((result = init_plugin_set()) != 0) ++ goto failed_init_plugin_set; ++ ++ /* initialize caches of txn_atom-s and txn_handle-s */ ++ if ((result = init_txnmgr_static()) != 0) ++ goto failed_init_txnmgr_static; ++ ++ /* initialize cache of jnodes */ ++ if ((result = init_jnodes()) != 0) ++ goto failed_init_jnodes; ++ ++ /* initialize cache of flush queues */ ++ if ((result = reiser4_init_fqs()) != 0) ++ goto failed_init_fqs; ++ ++ /* initialize cache of structures attached to dentry->d_fsdata */ ++ if ((result = reiser4_init_dentry_fsdata()) != 0) ++ goto failed_init_dentry_fsdata; ++ ++ /* initialize cache of structures attached to file->private_data */ ++ if ((result = reiser4_init_file_fsdata()) != 0) ++ goto failed_init_file_fsdata; ++ ++ /* ++ * initialize cache of d_cursors. See plugin/file_ops_readdir.c for ++ * more details ++ */ ++ if ((result = reiser4_init_d_cursor()) != 0) ++ goto failed_init_d_cursor; ++ ++ if ((result = register_filesystem(&reiser4_fs_type)) == 0) { ++ reiser4_debugfs_root = debugfs_create_dir("reiser4", NULL); ++ return 0; ++ } ++ ++ reiser4_done_d_cursor(); ++ failed_init_d_cursor: ++ reiser4_done_file_fsdata(); ++ failed_init_file_fsdata: ++ reiser4_done_dentry_fsdata(); ++ failed_init_dentry_fsdata: ++ reiser4_done_fqs(); ++ failed_init_fqs: ++ done_jnodes(); ++ failed_init_jnodes: ++ done_txnmgr_static(); ++ failed_init_txnmgr_static: ++ done_plugin_set(); ++ failed_init_plugin_set: ++ failed_init_plugins: ++ done_znodes(); ++ failed_init_znodes: ++ done_inodes(); ++ failed_inode_cache: ++ return result; ++} ++ ++/** ++ * done_reiser4 - reiser4 exit entry point ++ * ++ * Unregister reiser4 filesystem type, deletes caches. It is called on shutdown ++ * or at module unload. ++ */ ++static void __exit done_reiser4(void) ++{ ++ int result; ++ ++ debugfs_remove(reiser4_debugfs_root); ++ result = unregister_filesystem(&reiser4_fs_type); ++ BUG_ON(result != 0); ++ reiser4_done_d_cursor(); ++ reiser4_done_file_fsdata(); ++ reiser4_done_dentry_fsdata(); ++ reiser4_done_fqs(); ++ done_jnodes(); ++ done_txnmgr_static(); ++ done_plugin_set(); ++ done_znodes(); ++ destroy_reiser4_cache(&inode_cache); ++} ++ ++module_init(init_reiser4); ++module_exit(done_reiser4); ++ ++MODULE_DESCRIPTION("Reiser4 filesystem"); ++MODULE_AUTHOR("Hans Reiser "); ++ ++MODULE_LICENSE("GPL"); ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/tap.c b/fs/reiser4/tap.c +new file mode 100644 +index 0000000..cfa5179 +--- /dev/null ++++ b/fs/reiser4/tap.c +@@ -0,0 +1,377 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* ++ Tree Access Pointer (tap). ++ ++ tap is data structure combining coord and lock handle (mostly). It is ++ useful when one has to scan tree nodes (for example, in readdir, or flush), ++ for tap functions allow to move tap in either direction transparently ++ crossing unit/item/node borders. ++ ++ Tap doesn't provide automatic synchronization of its fields as it is ++ supposed to be per-thread object. ++*/ ++ ++#include "forward.h" ++#include "debug.h" ++#include "coord.h" ++#include "tree.h" ++#include "context.h" ++#include "tap.h" ++#include "znode.h" ++#include "tree_walk.h" ++ ++#if REISER4_DEBUG ++static int tap_invariant(const tap_t * tap); ++static void tap_check(const tap_t * tap); ++#else ++#define tap_check(tap) noop ++#endif ++ ++/** load node tap is pointing to, if not loaded already */ ++int reiser4_tap_load(tap_t * tap) ++{ ++ tap_check(tap); ++ if (tap->loaded == 0) { ++ int result; ++ ++ result = zload_ra(tap->coord->node, &tap->ra_info); ++ if (result != 0) ++ return result; ++ coord_clear_iplug(tap->coord); ++ } ++ ++tap->loaded; ++ tap_check(tap); ++ return 0; ++} ++ ++/** release node tap is pointing to. Dual to tap_load() */ ++void reiser4_tap_relse(tap_t * tap) ++{ ++ tap_check(tap); ++ if (tap->loaded > 0) { ++ --tap->loaded; ++ if (tap->loaded == 0) { ++ zrelse(tap->coord->node); ++ } ++ } ++ tap_check(tap); ++} ++ ++/** ++ * init tap to consist of @coord and @lh. Locks on nodes will be acquired with ++ * @mode ++ */ ++void reiser4_tap_init(tap_t * tap, coord_t * coord, lock_handle * lh, ++ znode_lock_mode mode) ++{ ++ tap->coord = coord; ++ tap->lh = lh; ++ tap->mode = mode; ++ tap->loaded = 0; ++ INIT_LIST_HEAD(&tap->linkage); ++ reiser4_init_ra_info(&tap->ra_info); ++} ++ ++/** add @tap to the per-thread list of all taps */ ++void reiser4_tap_monitor(tap_t * tap) ++{ ++ assert("nikita-2623", tap != NULL); ++ tap_check(tap); ++ list_add(&tap->linkage, reiser4_taps_list()); ++ tap_check(tap); ++} ++ ++/* duplicate @src into @dst. Copy lock handle. @dst is not initially ++ * loaded. */ ++void reiser4_tap_copy(tap_t * dst, tap_t * src) ++{ ++ assert("nikita-3193", src != NULL); ++ assert("nikita-3194", dst != NULL); ++ ++ *dst->coord = *src->coord; ++ if (src->lh->node) ++ copy_lh(dst->lh, src->lh); ++ dst->mode = src->mode; ++ dst->loaded = 0; ++ INIT_LIST_HEAD(&dst->linkage); ++ dst->ra_info = src->ra_info; ++} ++ ++/** finish with @tap */ ++void reiser4_tap_done(tap_t * tap) ++{ ++ assert("nikita-2565", tap != NULL); ++ tap_check(tap); ++ if (tap->loaded > 0) ++ zrelse(tap->coord->node); ++ done_lh(tap->lh); ++ tap->loaded = 0; ++ list_del_init(&tap->linkage); ++ tap->coord->node = NULL; ++} ++ ++/** ++ * move @tap to the new node, locked with @target. Load @target, if @tap was ++ * already loaded. ++ */ ++int reiser4_tap_move(tap_t * tap, lock_handle * target) ++{ ++ int result = 0; ++ ++ assert("nikita-2567", tap != NULL); ++ assert("nikita-2568", target != NULL); ++ assert("nikita-2570", target->node != NULL); ++ assert("nikita-2569", tap->coord->node == tap->lh->node); ++ ++ tap_check(tap); ++ if (tap->loaded > 0) ++ result = zload_ra(target->node, &tap->ra_info); ++ ++ if (result == 0) { ++ if (tap->loaded > 0) ++ zrelse(tap->coord->node); ++ done_lh(tap->lh); ++ copy_lh(tap->lh, target); ++ tap->coord->node = target->node; ++ coord_clear_iplug(tap->coord); ++ } ++ tap_check(tap); ++ return result; ++} ++ ++/** ++ * move @tap to @target. Acquire lock on @target, if @tap was already ++ * loaded. ++ */ ++static int tap_to(tap_t * tap, znode * target) ++{ ++ int result; ++ ++ assert("nikita-2624", tap != NULL); ++ assert("nikita-2625", target != NULL); ++ ++ tap_check(tap); ++ result = 0; ++ if (tap->coord->node != target) { ++ lock_handle here; ++ ++ init_lh(&here); ++ result = longterm_lock_znode(&here, target, ++ tap->mode, ZNODE_LOCK_HIPRI); ++ if (result == 0) { ++ result = reiser4_tap_move(tap, &here); ++ done_lh(&here); ++ } ++ } ++ tap_check(tap); ++ return result; ++} ++ ++/** ++ * move @tap to given @target, loading and locking @target->node if ++ * necessary ++ */ ++int tap_to_coord(tap_t * tap, coord_t * target) ++{ ++ int result; ++ ++ tap_check(tap); ++ result = tap_to(tap, target->node); ++ if (result == 0) ++ coord_dup(tap->coord, target); ++ tap_check(tap); ++ return result; ++} ++ ++/** return list of all taps */ ++struct list_head *reiser4_taps_list(void) ++{ ++ return &get_current_context()->taps; ++} ++ ++/** helper function for go_{next,prev}_{item,unit,node}() */ ++int go_dir_el(tap_t * tap, sideof dir, int units_p) ++{ ++ coord_t dup; ++ coord_t *coord; ++ int result; ++ ++ int (*coord_dir) (coord_t *); ++ int (*get_dir_neighbor) (lock_handle *, znode *, int, int); ++ void (*coord_init) (coord_t *, const znode *); ++ ON_DEBUG(int (*coord_check) (const coord_t *)); ++ ++ assert("nikita-2556", tap != NULL); ++ assert("nikita-2557", tap->coord != NULL); ++ assert("nikita-2558", tap->lh != NULL); ++ assert("nikita-2559", tap->coord->node != NULL); ++ ++ tap_check(tap); ++ if (dir == LEFT_SIDE) { ++ coord_dir = units_p ? coord_prev_unit : coord_prev_item; ++ get_dir_neighbor = reiser4_get_left_neighbor; ++ coord_init = coord_init_last_unit; ++ } else { ++ coord_dir = units_p ? coord_next_unit : coord_next_item; ++ get_dir_neighbor = reiser4_get_right_neighbor; ++ coord_init = coord_init_first_unit; ++ } ++ ON_DEBUG(coord_check = ++ units_p ? coord_is_existing_unit : coord_is_existing_item); ++ assert("nikita-2560", coord_check(tap->coord)); ++ ++ coord = tap->coord; ++ coord_dup(&dup, coord); ++ if (coord_dir(&dup) != 0) { ++ do { ++ /* move to the left neighboring node */ ++ lock_handle dup; ++ ++ init_lh(&dup); ++ result = ++ get_dir_neighbor(&dup, coord->node, (int)tap->mode, ++ GN_CAN_USE_UPPER_LEVELS); ++ if (result == 0) { ++ result = reiser4_tap_move(tap, &dup); ++ if (result == 0) ++ coord_init(tap->coord, dup.node); ++ done_lh(&dup); ++ } ++ /* skip empty nodes */ ++ } while ((result == 0) && node_is_empty(coord->node)); ++ } else { ++ result = 0; ++ coord_dup(coord, &dup); ++ } ++ assert("nikita-2564", ergo(!result, coord_check(tap->coord))); ++ tap_check(tap); ++ return result; ++} ++ ++/** ++ * move @tap to the next unit, transparently crossing item and node ++ * boundaries ++ */ ++int go_next_unit(tap_t * tap) ++{ ++ return go_dir_el(tap, RIGHT_SIDE, 1); ++} ++ ++/** ++ * move @tap to the previous unit, transparently crossing item and node ++ * boundaries ++ */ ++int go_prev_unit(tap_t * tap) ++{ ++ return go_dir_el(tap, LEFT_SIDE, 1); ++} ++ ++/** ++ * @shift times apply @actor to the @tap. This is used to move @tap by ++ * @shift units (or items, or nodes) in either direction. ++ */ ++static int rewind_to(tap_t * tap, go_actor_t actor, int shift) ++{ ++ int result; ++ ++ assert("nikita-2555", shift >= 0); ++ assert("nikita-2562", tap->coord->node == tap->lh->node); ++ ++ tap_check(tap); ++ result = reiser4_tap_load(tap); ++ if (result != 0) ++ return result; ++ ++ for (; shift > 0; --shift) { ++ result = actor(tap); ++ assert("nikita-2563", tap->coord->node == tap->lh->node); ++ if (result != 0) ++ break; ++ } ++ reiser4_tap_relse(tap); ++ tap_check(tap); ++ return result; ++} ++ ++/** move @tap @shift units rightward */ ++int rewind_right(tap_t * tap, int shift) ++{ ++ return rewind_to(tap, go_next_unit, shift); ++} ++ ++/** move @tap @shift units leftward */ ++int rewind_left(tap_t * tap, int shift) ++{ ++ return rewind_to(tap, go_prev_unit, shift); ++} ++ ++#if REISER4_DEBUG ++/** debugging function: print @tap content in human readable form */ ++static void print_tap(const char *prefix, const tap_t * tap) ++{ ++ if (tap == NULL) { ++ printk("%s: null tap\n", prefix); ++ return; ++ } ++ printk("%s: loaded: %i, in-list: %i, node: %p, mode: %s\n", prefix, ++ tap->loaded, (&tap->linkage == tap->linkage.next && ++ &tap->linkage == tap->linkage.prev), ++ tap->lh->node, ++ lock_mode_name(tap->mode)); ++ print_coord("\tcoord", tap->coord, 0); ++} ++ ++/** check [tap-sane] invariant */ ++static int tap_invariant(const tap_t * tap) ++{ ++ /* [tap-sane] invariant */ ++ ++ if (tap == NULL) ++ return 1; ++ /* tap->mode is one of ++ * ++ * {ZNODE_NO_LOCK, ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}, and ++ */ ++ if (tap->mode != ZNODE_NO_LOCK && ++ tap->mode != ZNODE_READ_LOCK && tap->mode != ZNODE_WRITE_LOCK) ++ return 2; ++ /* tap->coord != NULL, and */ ++ if (tap->coord == NULL) ++ return 3; ++ /* tap->lh != NULL, and */ ++ if (tap->lh == NULL) ++ return 4; ++ /* tap->loaded > 0 => znode_is_loaded(tap->coord->node), and */ ++ if (!ergo(tap->loaded, znode_is_loaded(tap->coord->node))) ++ return 5; ++ /* tap->coord->node == tap->lh->node if tap->lh->node is not 0 */ ++ if (tap->lh->node != NULL && tap->coord->node != tap->lh->node) ++ return 6; ++ return 0; ++} ++ ++/** debugging function: check internal @tap consistency */ ++static void tap_check(const tap_t * tap) ++{ ++ int result; ++ ++ result = tap_invariant(tap); ++ if (result != 0) { ++ print_tap("broken", tap); ++ reiser4_panic("nikita-2831", "tap broken: %i\n", result); ++ } ++} ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/tap.h b/fs/reiser4/tap.h +new file mode 100644 +index 0000000..1416729 +--- /dev/null ++++ b/fs/reiser4/tap.h +@@ -0,0 +1,70 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* Tree Access Pointers. See tap.c for more details. */ ++ ++#if !defined( __REISER4_TAP_H__ ) ++#define __REISER4_TAP_H__ ++ ++#include "forward.h" ++#include "readahead.h" ++ ++/** ++ tree_access_pointer aka tap. Data structure combining coord_t and lock ++ handle. ++ Invariants involving this data-type, see doc/lock-ordering for details: ++ ++ [tap-sane] ++ */ ++struct tree_access_pointer { ++ /* coord tap is at */ ++ coord_t *coord; ++ /* lock handle on ->coord->node */ ++ lock_handle *lh; ++ /* mode of lock acquired by this tap */ ++ znode_lock_mode mode; ++ /* incremented by reiser4_tap_load(). ++ Decremented by reiser4_tap_relse(). */ ++ int loaded; ++ /* list of taps */ ++ struct list_head linkage; ++ /* read-ahead hint */ ++ ra_info_t ra_info; ++}; ++ ++typedef int (*go_actor_t) (tap_t * tap); ++ ++extern int reiser4_tap_load(tap_t * tap); ++extern void reiser4_tap_relse(tap_t * tap); ++extern void reiser4_tap_init(tap_t * tap, coord_t * coord, lock_handle * lh, ++ znode_lock_mode mode); ++extern void reiser4_tap_monitor(tap_t * tap); ++extern void reiser4_tap_copy(tap_t * dst, tap_t * src); ++extern void reiser4_tap_done(tap_t * tap); ++extern int reiser4_tap_move(tap_t * tap, lock_handle * target); ++extern int tap_to_coord(tap_t * tap, coord_t * target); ++ ++extern int go_dir_el(tap_t * tap, sideof dir, int units_p); ++extern int go_next_unit(tap_t * tap); ++extern int go_prev_unit(tap_t * tap); ++extern int rewind_right(tap_t * tap, int shift); ++extern int rewind_left(tap_t * tap, int shift); ++ ++extern struct list_head *reiser4_taps_list(void); ++ ++#define for_all_taps(tap) \ ++ for (tap = list_entry(reiser4_taps_list()->next, tap_t, linkage); \ ++ reiser4_taps_list() != &tap->linkage; \ ++ tap = list_entry(tap->linkage.next, tap_t, linkage)) ++ ++/* __REISER4_TAP_H__ */ ++#endif ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/tree.c b/fs/reiser4/tree.c +new file mode 100644 +index 0000000..32548d2 +--- /dev/null ++++ b/fs/reiser4/tree.c +@@ -0,0 +1,1876 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* ++ * KEYS IN A TREE. ++ * ++ * The tree consists of nodes located on the disk. Node in the tree is either ++ * formatted or unformatted. Formatted node is one that has structure ++ * understood by the tree balancing and traversal code. Formatted nodes are ++ * further classified into leaf and internal nodes. Latter distinctions is ++ * (almost) of only historical importance: general structure of leaves and ++ * internal nodes is the same in Reiser4. Unformatted nodes contain raw data ++ * that are part of bodies of ordinary files and attributes. ++ * ++ * Each node in the tree spawns some interval in the key space. Key ranges for ++ * all nodes in the tree are disjoint. Actually, this only holds in some weak ++ * sense, because of the non-unique keys: intersection of key ranges for ++ * different nodes is either empty, or consists of exactly one key. ++ * ++ * Formatted node consists of a sequence of items. Each item spawns some ++ * interval in key space. Key ranges for all items in a tree are disjoint, ++ * modulo non-unique keys again. Items within nodes are ordered in the key ++ * order of the smallest key in a item. ++ * ++ * Particular type of item can be further split into units. Unit is piece of ++ * item that can be cut from item and moved into another item of the same ++ * time. Units are used by balancing code to repack data during balancing. ++ * ++ * Unit can be further split into smaller entities (for example, extent unit ++ * represents several pages, and it is natural for extent code to operate on ++ * particular pages and even bytes within one unit), but this is of no ++ * relevance to the generic balancing and lookup code. ++ * ++ * Although item is said to "spawn" range or interval of keys, it is not ++ * necessary that item contains piece of data addressable by each and every ++ * key in this range. For example, compound directory item, consisting of ++ * units corresponding to directory entries and keyed by hashes of file names, ++ * looks more as having "discrete spectrum": only some disjoint keys inside ++ * range occupied by this item really address data. ++ * ++ * No than less, each item always has well-defined least (minimal) key, that ++ * is recorded in item header, stored in the node this item is in. Also, item ++ * plugin can optionally define method ->max_key_inside() returning maximal ++ * key that can _possibly_ be located within this item. This method is used ++ * (mainly) to determine when given piece of data should be merged into ++ * existing item, in stead of creating new one. Because of this, even though ++ * ->max_key_inside() can be larger that any key actually located in the item, ++ * intervals ++ * ++ * [ reiser4_min_key( item ), ->max_key_inside( item ) ] ++ * ++ * are still disjoint for all items within the _same_ node. ++ * ++ * In memory node is represented by znode. It plays several roles: ++ * ++ * . something locks are taken on ++ * ++ * . something tracked by transaction manager (this is going to change) ++ * ++ * . something used to access node data ++ * ++ * . something used to maintain tree structure in memory: sibling and ++ * parental linkage. ++ * ++ * . something used to organize nodes into "slums" ++ * ++ * More on znodes see in znode.[ch] ++ * ++ * DELIMITING KEYS ++ * ++ * To simplify balancing, allow some flexibility in locking and speed up ++ * important coord cache optimization, we keep delimiting keys of nodes in ++ * memory. Depending on disk format (implemented by appropriate node plugin) ++ * node on disk can record both left and right delimiting key, only one of ++ * them, or none. Still, our balancing and tree traversal code keep both ++ * delimiting keys for a node that is in memory stored in the znode. When ++ * node is first brought into memory during tree traversal, its left ++ * delimiting key is taken from its parent, and its right delimiting key is ++ * either next key in its parent, or is right delimiting key of parent if ++ * node is the rightmost child of parent. ++ * ++ * Physical consistency of delimiting key is protected by special dk ++ * read-write lock. That is, delimiting keys can only be inspected or ++ * modified under this lock. But dk lock is only sufficient for fast ++ * "pessimistic" check, because to simplify code and to decrease lock ++ * contention, balancing (carry) only updates delimiting keys right before ++ * unlocking all locked nodes on the given tree level. For example, ++ * coord-by-key cache scans LRU list of recently accessed znodes. For each ++ * node it first does fast check under dk spin lock. If key looked for is ++ * not between delimiting keys for this node, next node is inspected and so ++ * on. If key is inside of the key range, long term lock is taken on node ++ * and key range is rechecked. ++ * ++ * COORDINATES ++ * ++ * To find something in the tree, you supply a key, and the key is resolved ++ * by coord_by_key() into a coord (coordinate) that is valid as long as the ++ * node the coord points to remains locked. As mentioned above trees ++ * consist of nodes that consist of items that consist of units. A unit is ++ * the smallest and indivisible piece of tree as far as balancing and tree ++ * search are concerned. Each node, item, and unit can be addressed by ++ * giving its level in the tree and the key occupied by this entity. A node ++ * knows what the key ranges are of the items within it, and how to find its ++ * items and invoke their item handlers, but it does not know how to access ++ * individual units within its items except through the item handlers. ++ * coord is a structure containing a pointer to the node, the ordinal number ++ * of the item within this node (a sort of item offset), and the ordinal ++ * number of the unit within this item. ++ * ++ * TREE LOOKUP ++ * ++ * There are two types of access to the tree: lookup and modification. ++ * ++ * Lookup is a search for the key in the tree. Search can look for either ++ * exactly the key given to it, or for the largest key that is not greater ++ * than the key given to it. This distinction is determined by "bias" ++ * parameter of search routine (coord_by_key()). coord_by_key() either ++ * returns error (key is not in the tree, or some kind of external error ++ * occurred), or successfully resolves key into coord. ++ * ++ * This resolution is done by traversing tree top-to-bottom from root level ++ * to the desired level. On levels above twig level (level one above the ++ * leaf level) nodes consist exclusively of internal items. Internal item is ++ * nothing more than pointer to the tree node on the child level. On twig ++ * level nodes consist of internal items intermixed with extent ++ * items. Internal items form normal search tree structure used by traversal ++ * to descent through the tree. ++ * ++ * TREE LOOKUP OPTIMIZATIONS ++ * ++ * Tree lookup described above is expensive even if all nodes traversed are ++ * already in the memory: for each node binary search within it has to be ++ * performed and binary searches are CPU consuming and tend to destroy CPU ++ * caches. ++ * ++ * Several optimizations are used to work around this: ++ * ++ * . cbk_cache (look-aside cache for tree traversals, see search.c for ++ * details) ++ * ++ * . seals (see seal.[ch]) ++ * ++ * . vroot (see search.c) ++ * ++ * General search-by-key is layered thusly: ++ * ++ * [check seal, if any] --ok--> done ++ * | ++ * failed ++ * | ++ * V ++ * [vroot defined] --no--> node = tree_root ++ * | | ++ * yes | ++ * | | ++ * V | ++ * node = vroot | ++ * | | ++ * | | ++ * | | ++ * V V ++ * [check cbk_cache for key] --ok--> done ++ * | ++ * failed ++ * | ++ * V ++ * [start tree traversal from node] ++ * ++ */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/item/static_stat.h" ++#include "plugin/item/item.h" ++#include "plugin/node/node.h" ++#include "plugin/plugin.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree_walk.h" ++#include "carry.h" ++#include "carry_ops.h" ++#include "tap.h" ++#include "tree.h" ++#include "vfs_ops.h" ++#include "page_cache.h" ++#include "super.h" ++#include "reiser4.h" ++#include "inode.h" ++ ++#include /* for struct super_block */ ++#include ++ ++/* Disk address (block number) never ever used for any real tree node. This is ++ used as block number of "uber" znode. ++ ++ Invalid block addresses are 0 by tradition. ++ ++*/ ++const reiser4_block_nr UBER_TREE_ADDR = 0ull; ++ ++#define CUT_TREE_MIN_ITERATIONS 64 ++ ++static int find_child_by_addr(znode * parent, znode * child, coord_t * result); ++ ++/* return node plugin of coord->node */ ++node_plugin *node_plugin_by_coord(const coord_t * coord) ++{ ++ assert("vs-1", coord != NULL); ++ assert("vs-2", coord->node != NULL); ++ ++ return coord->node->nplug; ++} ++ ++/* insert item into tree. Fields of @coord are updated so that they can be ++ * used by consequent insert operation. */ ++insert_result insert_by_key(reiser4_tree * tree /* tree to insert new item ++ * into */ , ++ const reiser4_key * key /* key of new item */ , ++ reiser4_item_data * data /* parameters for item ++ * creation */ , ++ coord_t * coord /* resulting insertion coord */ , ++ lock_handle * lh /* resulting lock ++ * handle */ , ++ tree_level stop_level /** level where to insert */ , ++ __u32 flags /* insertion flags */ ) ++{ ++ int result; ++ ++ assert("nikita-358", tree != NULL); ++ assert("nikita-360", coord != NULL); ++ ++ result = coord_by_key(tree, key, coord, lh, ZNODE_WRITE_LOCK, ++ FIND_EXACT, stop_level, stop_level, ++ flags | CBK_FOR_INSERT, NULL /*ra_info */ ); ++ switch (result) { ++ default: ++ break; ++ case CBK_COORD_FOUND: ++ result = IBK_ALREADY_EXISTS; ++ break; ++ case CBK_COORD_NOTFOUND: ++ assert("nikita-2017", coord->node != NULL); ++ result = insert_by_coord(coord, data, key, lh, 0 /*flags */ ); ++ break; ++ } ++ return result; ++} ++ ++/* insert item by calling carry. Helper function called if short-cut ++ insertion failed */ ++static insert_result insert_with_carry_by_coord(coord_t * coord, /* coord where to insert */ ++ lock_handle * lh, /* lock handle of insertion ++ * node */ ++ reiser4_item_data * data, /* parameters of new ++ * item */ ++ const reiser4_key * key, /* key of new item */ ++ carry_opcode cop, /* carry operation to perform */ ++ cop_insert_flag flags ++ /* carry flags */ ) ++{ ++ int result; ++ carry_pool *pool; ++ carry_level *lowest_level; ++ carry_insert_data *cdata; ++ carry_op *op; ++ ++ assert("umka-314", coord != NULL); ++ ++ /* allocate carry_pool and 3 carry_level-s */ ++ pool = ++ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) + ++ sizeof(*cdata)); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ lowest_level = (carry_level *) (pool + 1); ++ init_carry_level(lowest_level, pool); ++ ++ op = reiser4_post_carry(lowest_level, cop, coord->node, 0); ++ if (IS_ERR(op) || (op == NULL)) { ++ done_carry_pool(pool); ++ return RETERR(op ? PTR_ERR(op) : -EIO); ++ } ++ cdata = (carry_insert_data *) (lowest_level + 3); ++ cdata->coord = coord; ++ cdata->data = data; ++ cdata->key = key; ++ op->u.insert.d = cdata; ++ if (flags == 0) ++ flags = znode_get_tree(coord->node)->carry.insert_flags; ++ op->u.insert.flags = flags; ++ op->u.insert.type = COPT_ITEM_DATA; ++ op->u.insert.child = NULL; ++ if (lh != NULL) { ++ assert("nikita-3245", lh->node == coord->node); ++ lowest_level->track_type = CARRY_TRACK_CHANGE; ++ lowest_level->tracked = lh; ++ } ++ ++ result = reiser4_carry(lowest_level, NULL); ++ done_carry_pool(pool); ++ ++ return result; ++} ++ ++/* form carry queue to perform paste of @data with @key at @coord, and launch ++ its execution by calling carry(). ++ ++ Instruct carry to update @lh it after balancing insertion coord moves into ++ different block. ++ ++*/ ++static int paste_with_carry(coord_t * coord, /* coord of paste */ ++ lock_handle * lh, /* lock handle of node ++ * where item is ++ * pasted */ ++ reiser4_item_data * data, /* parameters of new ++ * item */ ++ const reiser4_key * key, /* key of new item */ ++ unsigned flags /* paste flags */ ) ++{ ++ int result; ++ carry_pool *pool; ++ carry_level *lowest_level; ++ carry_insert_data *cdata; ++ carry_op *op; ++ ++ assert("umka-315", coord != NULL); ++ assert("umka-316", key != NULL); ++ ++ pool = ++ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) + ++ sizeof(*cdata)); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ lowest_level = (carry_level *) (pool + 1); ++ init_carry_level(lowest_level, pool); ++ ++ op = reiser4_post_carry(lowest_level, COP_PASTE, coord->node, 0); ++ if (IS_ERR(op) || (op == NULL)) { ++ done_carry_pool(pool); ++ return RETERR(op ? PTR_ERR(op) : -EIO); ++ } ++ cdata = (carry_insert_data *) (lowest_level + 3); ++ cdata->coord = coord; ++ cdata->data = data; ++ cdata->key = key; ++ op->u.paste.d = cdata; ++ if (flags == 0) ++ flags = znode_get_tree(coord->node)->carry.paste_flags; ++ op->u.paste.flags = flags; ++ op->u.paste.type = COPT_ITEM_DATA; ++ if (lh != NULL) { ++ lowest_level->track_type = CARRY_TRACK_CHANGE; ++ lowest_level->tracked = lh; ++ } ++ ++ result = reiser4_carry(lowest_level, NULL); ++ done_carry_pool(pool); ++ ++ return result; ++} ++ ++/* insert item at the given coord. ++ ++ First try to skip carry by directly calling ->create_item() method of node ++ plugin. If this is impossible (there is not enough free space in the node, ++ or leftmost item in the node is created), call insert_with_carry_by_coord() ++ that will do full carry(). ++ ++*/ ++insert_result insert_by_coord(coord_t * coord /* coord where to ++ * insert. coord->node has ++ * to be write locked by ++ * caller */ , ++ reiser4_item_data * data /* data to be ++ * inserted */ , ++ const reiser4_key * key /* key of new item */ , ++ lock_handle * lh /* lock handle of write ++ * lock on node */ , ++ __u32 flags /* insertion flags */ ) ++{ ++ unsigned item_size; ++ int result; ++ znode *node; ++ ++ assert("vs-247", coord != NULL); ++ assert("vs-248", data != NULL); ++ assert("vs-249", data->length >= 0); ++ assert("nikita-1191", znode_is_write_locked(coord->node)); ++ ++ node = coord->node; ++ coord_clear_iplug(coord); ++ result = zload(node); ++ if (result != 0) ++ return result; ++ ++ item_size = space_needed(node, NULL, data, 1); ++ if (item_size > znode_free_space(node) && ++ (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT) ++ && (flags & COPI_DONT_ALLOCATE)) { ++ /* we are forced to use free space of coord->node and new item ++ does not fit into it. ++ ++ Currently we get here only when we allocate and copy units ++ of extent item from a node to its left neighbor during ++ "squalloc"-ing. If @node (this is left neighbor) does not ++ have enough free space - we do not want to attempt any ++ shifting and allocations because we are in squeezing and ++ everything to the left of @node is tightly packed. ++ */ ++ result = -E_NODE_FULL; ++ } else if ((item_size <= znode_free_space(node)) && ++ !coord_is_before_leftmost(coord) && ++ (node_plugin_by_node(node)->fast_insert != NULL) ++ && node_plugin_by_node(node)->fast_insert(coord)) { ++ /* shortcut insertion without carry() overhead. ++ ++ Only possible if: ++ ++ - there is enough free space ++ ++ - insertion is not into the leftmost position in a node ++ (otherwise it would require updating of delimiting key in a ++ parent) ++ ++ - node plugin agrees with this ++ ++ */ ++ result = ++ node_plugin_by_node(node)->create_item(coord, key, data, ++ NULL); ++ znode_make_dirty(node); ++ } else { ++ /* otherwise do full-fledged carry(). */ ++ result = ++ insert_with_carry_by_coord(coord, lh, data, key, COP_INSERT, ++ flags); ++ } ++ zrelse(node); ++ return result; ++} ++ ++/* @coord is set to leaf level and @data is to be inserted to twig level */ ++insert_result ++insert_extent_by_coord(coord_t * ++ coord ++ /* coord where to insert. coord->node * has to be write * locked by caller */ ++ , ++ reiser4_item_data * data /* data to be inserted */ , ++ const reiser4_key * key /* key of new item */ , ++ lock_handle * ++ lh /* lock handle of write lock on * node */ ) ++{ ++ assert("vs-405", coord != NULL); ++ assert("vs-406", data != NULL); ++ assert("vs-407", data->length > 0); ++ assert("vs-408", znode_is_write_locked(coord->node)); ++ assert("vs-409", znode_get_level(coord->node) == LEAF_LEVEL); ++ ++ return insert_with_carry_by_coord(coord, lh, data, key, COP_EXTENT, ++ 0 /*flags */ ); ++} ++ ++/* Insert into the item at the given coord. ++ ++ First try to skip carry by directly calling ->paste() method of item ++ plugin. If this is impossible (there is not enough free space in the node, ++ or we are pasting into leftmost position in the node), call ++ paste_with_carry() that will do full carry(). ++ ++*/ ++/* paste_into_item */ ++int insert_into_item(coord_t * coord /* coord of pasting */ , ++ lock_handle * lh /* lock handle on node involved */ , ++ const reiser4_key * key /* key of unit being pasted */ , ++ reiser4_item_data * data /* parameters for new unit */ , ++ unsigned flags /* insert/paste flags */ ) ++{ ++ int result; ++ int size_change; ++ node_plugin *nplug; ++ item_plugin *iplug; ++ ++ assert("umka-317", coord != NULL); ++ assert("umka-318", key != NULL); ++ ++ iplug = item_plugin_by_coord(coord); ++ nplug = node_plugin_by_coord(coord); ++ ++ assert("nikita-1480", iplug == data->iplug); ++ ++ size_change = space_needed(coord->node, coord, data, 0); ++ if (size_change > (int)znode_free_space(coord->node) && ++ (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT) ++ && (flags & COPI_DONT_ALLOCATE)) { ++ /* we are forced to use free space of coord->node and new data ++ does not fit into it. */ ++ return -E_NODE_FULL; ++ } ++ ++ /* shortcut paste without carry() overhead. ++ ++ Only possible if: ++ ++ - there is enough free space ++ ++ - paste is not into the leftmost unit in a node (otherwise ++ it would require updating of delimiting key in a parent) ++ ++ - node plugin agrees with this ++ ++ - item plugin agrees with us ++ */ ++ if (size_change <= (int)znode_free_space(coord->node) && ++ (coord->item_pos != 0 || ++ coord->unit_pos != 0 || coord->between == AFTER_UNIT) && ++ coord->unit_pos != 0 && nplug->fast_paste != NULL && ++ nplug->fast_paste(coord) && ++ iplug->b.fast_paste != NULL && iplug->b.fast_paste(coord)) { ++ if (size_change > 0) ++ nplug->change_item_size(coord, size_change); ++ /* NOTE-NIKITA: huh? where @key is used? */ ++ result = iplug->b.paste(coord, data, NULL); ++ if (size_change < 0) ++ nplug->change_item_size(coord, size_change); ++ znode_make_dirty(coord->node); ++ } else ++ /* otherwise do full-fledged carry(). */ ++ result = paste_with_carry(coord, lh, data, key, flags); ++ return result; ++} ++ ++/* this either appends or truncates item @coord */ ++int reiser4_resize_item(coord_t * coord /* coord of item being resized */ , ++ reiser4_item_data * data /* parameters of resize */ , ++ reiser4_key * key /* key of new unit */ , ++ lock_handle * lh /* lock handle of node ++ * being modified */ , ++ cop_insert_flag flags /* carry flags */ ) ++{ ++ int result; ++ znode *node; ++ ++ assert("nikita-362", coord != NULL); ++ assert("nikita-363", data != NULL); ++ assert("vs-245", data->length != 0); ++ ++ node = coord->node; ++ coord_clear_iplug(coord); ++ result = zload(node); ++ if (result != 0) ++ return result; ++ ++ if (data->length < 0) ++ result = node_plugin_by_coord(coord)->shrink_item(coord, ++ -data->length); ++ else ++ result = insert_into_item(coord, lh, key, data, flags); ++ ++ zrelse(node); ++ return result; ++} ++ ++/* insert flow @f */ ++int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f) ++{ ++ int result; ++ carry_pool *pool; ++ carry_level *lowest_level; ++ reiser4_item_data *data; ++ carry_op *op; ++ ++ pool = ++ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) + ++ sizeof(*data)); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ lowest_level = (carry_level *) (pool + 1); ++ init_carry_level(lowest_level, pool); ++ ++ op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node, ++ 0 /* operate directly on coord -> node */ ); ++ if (IS_ERR(op) || (op == NULL)) { ++ done_carry_pool(pool); ++ return RETERR(op ? PTR_ERR(op) : -EIO); ++ } ++ ++ /* these are permanent during insert_flow */ ++ data = (reiser4_item_data *) (lowest_level + 3); ++ data->user = 1; ++ data->iplug = item_plugin_by_id(FORMATTING_ID); ++ data->arg = NULL; ++ /* data.length and data.data will be set before calling paste or ++ insert */ ++ data->length = 0; ++ data->data = NULL; ++ ++ op->u.insert_flow.flags = 0; ++ op->u.insert_flow.insert_point = coord; ++ op->u.insert_flow.flow = f; ++ op->u.insert_flow.data = data; ++ op->u.insert_flow.new_nodes = 0; ++ ++ lowest_level->track_type = CARRY_TRACK_CHANGE; ++ lowest_level->tracked = lh; ++ ++ result = reiser4_carry(lowest_level, NULL); ++ done_carry_pool(pool); ++ ++ return result; ++} ++ ++/* Given a coord in parent node, obtain a znode for the corresponding child */ ++znode *child_znode(const coord_t * parent_coord /* coord of pointer to ++ * child */ , ++ znode * parent /* parent of child */ , ++ int incore_p /* if !0 only return child if already in ++ * memory */ , ++ int setup_dkeys_p /* if !0 update delimiting keys of ++ * child */ ) ++{ ++ znode *child; ++ ++ assert("nikita-1374", parent_coord != NULL); ++ assert("nikita-1482", parent != NULL); ++#if REISER4_DEBUG ++ if (setup_dkeys_p) ++ assert_rw_not_locked(&(znode_get_tree(parent)->dk_lock)); ++#endif ++ assert("nikita-2947", znode_is_any_locked(parent)); ++ ++ if (znode_get_level(parent) <= LEAF_LEVEL) { ++ /* trying to get child of leaf node */ ++ warning("nikita-1217", "Child of maize?"); ++ return ERR_PTR(RETERR(-EIO)); ++ } ++ if (item_is_internal(parent_coord)) { ++ reiser4_block_nr addr; ++ item_plugin *iplug; ++ reiser4_tree *tree; ++ ++ iplug = item_plugin_by_coord(parent_coord); ++ assert("vs-512", iplug->s.internal.down_link); ++ iplug->s.internal.down_link(parent_coord, NULL, &addr); ++ ++ tree = znode_get_tree(parent); ++ if (incore_p) ++ child = zlook(tree, &addr); ++ else ++ child = ++ zget(tree, &addr, parent, ++ znode_get_level(parent) - 1, ++ reiser4_ctx_gfp_mask_get()); ++ if ((child != NULL) && !IS_ERR(child) && setup_dkeys_p) ++ set_child_delimiting_keys(parent, parent_coord, child); ++ } else { ++ warning("nikita-1483", "Internal item expected"); ++ child = ERR_PTR(RETERR(-EIO)); ++ } ++ return child; ++} ++ ++/* remove znode from transaction */ ++static void uncapture_znode(znode * node) ++{ ++ struct page *page; ++ ++ assert("zam-1001", ZF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ ++ if (!reiser4_blocknr_is_fake(znode_get_block(node))) { ++ int ret; ++ ++ /* An already allocated block goes right to the atom's delete set. */ ++ ret = ++ reiser4_dealloc_block(znode_get_block(node), 0, ++ BA_DEFER | BA_FORMATTED); ++ if (ret) ++ warning("zam-942", ++ "can\'t add a block (%llu) number to atom's delete set\n", ++ (unsigned long long)(*znode_get_block(node))); ++ ++ spin_lock_znode(node); ++ /* Here we return flush reserved block which was reserved at the ++ * moment when this allocated node was marked dirty and still ++ * not used by flush in node relocation procedure. */ ++ if (ZF_ISSET(node, JNODE_FLUSH_RESERVED)) { ++ txn_atom *atom; ++ ++ atom = jnode_get_atom(ZJNODE(node)); ++ assert("zam-939", atom != NULL); ++ spin_unlock_znode(node); ++ flush_reserved2grabbed(atom, (__u64) 1); ++ spin_unlock_atom(atom); ++ } else ++ spin_unlock_znode(node); ++ } else { ++ /* znode has assigned block which is counted as "fake ++ allocated". Return it back to "free blocks") */ ++ fake_allocated2free((__u64) 1, BA_FORMATTED); ++ } ++ ++ /* ++ * uncapture page from transaction. There is a possibility of a race ++ * with ->releasepage(): reiser4_releasepage() detaches page from this ++ * jnode and we have nothing to uncapture. To avoid this, get ++ * reference of node->pg under jnode spin lock. reiser4_uncapture_page() ++ * will deal with released page itself. ++ */ ++ spin_lock_znode(node); ++ page = znode_page(node); ++ if (likely(page != NULL)) { ++ /* ++ * reiser4_uncapture_page() can only be called when we are sure ++ * that znode is pinned in memory, which we are, because ++ * forget_znode() is only called from longterm_unlock_znode(). ++ */ ++ page_cache_get(page); ++ spin_unlock_znode(node); ++ lock_page(page); ++ reiser4_uncapture_page(page); ++ unlock_page(page); ++ page_cache_release(page); ++ } else { ++ txn_atom *atom; ++ ++ /* handle "flush queued" znodes */ ++ while (1) { ++ atom = jnode_get_atom(ZJNODE(node)); ++ assert("zam-943", atom != NULL); ++ ++ if (!ZF_ISSET(node, JNODE_FLUSH_QUEUED) ++ || !atom->nr_running_queues) ++ break; ++ ++ spin_unlock_znode(node); ++ reiser4_atom_wait_event(atom); ++ spin_lock_znode(node); ++ } ++ ++ reiser4_uncapture_block(ZJNODE(node)); ++ spin_unlock_atom(atom); ++ zput(node); ++ } ++} ++ ++/* This is called from longterm_unlock_znode() when last lock is released from ++ the node that has been removed from the tree. At this point node is removed ++ from sibling list and its lock is invalidated. */ ++void forget_znode(lock_handle * handle) ++{ ++ znode *node; ++ reiser4_tree *tree; ++ ++ assert("umka-319", handle != NULL); ++ ++ node = handle->node; ++ tree = znode_get_tree(node); ++ ++ assert("vs-164", znode_is_write_locked(node)); ++ assert("nikita-1280", ZF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ assert_rw_locked(&(node->lock.guard)); ++ ++ /* We assume that this node was detached from its parent before ++ * unlocking, it gives no way to reach this node from parent through a ++ * down link. The node should have no children and, thereby, can't be ++ * reached from them by their parent pointers. The only way to obtain a ++ * reference to the node is to use sibling pointers from its left and ++ * right neighbors. In the next several lines we remove the node from ++ * the sibling list. */ ++ ++ write_lock_tree(tree); ++ sibling_list_remove(node); ++ znode_remove(node, tree); ++ write_unlock_tree(tree); ++ ++ /* Here we set JNODE_DYING and cancel all pending lock requests. It ++ * forces all lock requestor threads to repeat iterations of getting ++ * lock on a child, neighbor or parent node. But, those threads can't ++ * come to this node again, because this node is no longer a child, ++ * neighbor or parent of any other node. This order of znode ++ * invalidation does not allow other threads to waste cpu time is a busy ++ * loop, trying to lock dying object. The exception is in the flush ++ * code when we take node directly from atom's capture list.*/ ++ reiser4_invalidate_lock(handle); ++ uncapture_znode(node); ++} ++ ++/* Check that internal item at @pointer really contains pointer to @child. */ ++int check_tree_pointer(const coord_t * pointer /* would-be pointer to ++ * @child */ , ++ const znode * child /* child znode */ ) ++{ ++ assert("nikita-1016", pointer != NULL); ++ assert("nikita-1017", child != NULL); ++ assert("nikita-1018", pointer->node != NULL); ++ ++ assert("nikita-1325", znode_is_any_locked(pointer->node)); ++ ++ assert("nikita-2985", ++ znode_get_level(pointer->node) == znode_get_level(child) + 1); ++ ++ coord_clear_iplug((coord_t *) pointer); ++ ++ if (coord_is_existing_unit(pointer)) { ++ item_plugin *iplug; ++ reiser4_block_nr addr; ++ ++ if (item_is_internal(pointer)) { ++ iplug = item_plugin_by_coord(pointer); ++ assert("vs-513", iplug->s.internal.down_link); ++ iplug->s.internal.down_link(pointer, NULL, &addr); ++ /* check that cached value is correct */ ++ if (disk_addr_eq(&addr, znode_get_block(child))) { ++ return NS_FOUND; ++ } ++ } ++ } ++ /* warning ("jmacd-1002", "tree pointer incorrect"); */ ++ return NS_NOT_FOUND; ++} ++ ++/* find coord of pointer to new @child in @parent. ++ ++ Find the &coord_t in the @parent where pointer to a given @child will ++ be in. ++ ++*/ ++int find_new_child_ptr(znode * parent /* parent znode, passed locked */ , ++ znode * ++ child UNUSED_ARG /* child znode, passed locked */ , ++ znode * left /* left brother of new node */ , ++ coord_t * result /* where result is stored in */ ) ++{ ++ int ret; ++ ++ assert("nikita-1486", parent != NULL); ++ assert("nikita-1487", child != NULL); ++ assert("nikita-1488", result != NULL); ++ ++ ret = find_child_ptr(parent, left, result); ++ if (ret != NS_FOUND) { ++ warning("nikita-1489", "Cannot find brother position: %i", ret); ++ return RETERR(-EIO); ++ } else { ++ result->between = AFTER_UNIT; ++ return RETERR(NS_NOT_FOUND); ++ } ++} ++ ++/* find coord of pointer to @child in @parent. ++ ++ Find the &coord_t in the @parent where pointer to a given @child is in. ++ ++*/ ++int find_child_ptr(znode * parent /* parent znode, passed locked */ , ++ znode * child /* child znode, passed locked */ , ++ coord_t * result /* where result is stored in */ ) ++{ ++ int lookup_res; ++ node_plugin *nplug; ++ /* left delimiting key of a child */ ++ reiser4_key ld; ++ reiser4_tree *tree; ++ ++ assert("nikita-934", parent != NULL); ++ assert("nikita-935", child != NULL); ++ assert("nikita-936", result != NULL); ++ assert("zam-356", znode_is_loaded(parent)); ++ ++ coord_init_zero(result); ++ result->node = parent; ++ ++ nplug = parent->nplug; ++ assert("nikita-939", nplug != NULL); ++ ++ tree = znode_get_tree(parent); ++ /* NOTE-NIKITA taking read-lock on tree here assumes that @result is ++ * not aliased to ->in_parent of some znode. Otherwise, ++ * parent_coord_to_coord() below would modify data protected by tree ++ * lock. */ ++ read_lock_tree(tree); ++ /* fast path. Try to use cached value. Lock tree to keep ++ node->pos_in_parent and pos->*_blocknr consistent. */ ++ if (child->in_parent.item_pos + 1 != 0) { ++ parent_coord_to_coord(&child->in_parent, result); ++ if (check_tree_pointer(result, child) == NS_FOUND) { ++ read_unlock_tree(tree); ++ return NS_FOUND; ++ } ++ ++ child->in_parent.item_pos = (unsigned short)~0; ++ } ++ read_unlock_tree(tree); ++ ++ /* is above failed, find some key from @child. We are looking for the ++ least key in a child. */ ++ read_lock_dk(tree); ++ ld = *znode_get_ld_key(child); ++ read_unlock_dk(tree); ++ /* ++ * now, lookup parent with key just found. Note, that left delimiting ++ * key doesn't identify node uniquely, because (in extremely rare ++ * case) two nodes can have equal left delimiting keys, if one of them ++ * is completely filled with directory entries that all happened to be ++ * hash collision. But, we check block number in check_tree_pointer() ++ * and, so, are safe. ++ */ ++ lookup_res = nplug->lookup(parent, &ld, FIND_EXACT, result); ++ /* update cached pos_in_node */ ++ if (lookup_res == NS_FOUND) { ++ write_lock_tree(tree); ++ coord_to_parent_coord(result, &child->in_parent); ++ write_unlock_tree(tree); ++ lookup_res = check_tree_pointer(result, child); ++ } ++ if (lookup_res == NS_NOT_FOUND) ++ lookup_res = find_child_by_addr(parent, child, result); ++ return lookup_res; ++} ++ ++/* find coord of pointer to @child in @parent by scanning ++ ++ Find the &coord_t in the @parent where pointer to a given @child ++ is in by scanning all internal items in @parent and comparing block ++ numbers in them with that of @child. ++ ++*/ ++static int find_child_by_addr(znode * parent /* parent znode, passed locked */ , ++ znode * child /* child znode, passed locked */ , ++ coord_t * result /* where result is stored in */ ) ++{ ++ int ret; ++ ++ assert("nikita-1320", parent != NULL); ++ assert("nikita-1321", child != NULL); ++ assert("nikita-1322", result != NULL); ++ ++ ret = NS_NOT_FOUND; ++ ++ for_all_units(result, parent) { ++ if (check_tree_pointer(result, child) == NS_FOUND) { ++ write_lock_tree(znode_get_tree(parent)); ++ coord_to_parent_coord(result, &child->in_parent); ++ write_unlock_tree(znode_get_tree(parent)); ++ ret = NS_FOUND; ++ break; ++ } ++ } ++ return ret; ++} ++ ++/* true, if @addr is "unallocated block number", which is just address, with ++ highest bit set. */ ++int is_disk_addr_unallocated(const reiser4_block_nr * addr /* address to ++ * check */ ) ++{ ++ assert("nikita-1766", addr != NULL); ++ cassert(sizeof(reiser4_block_nr) == 8); ++ return (*addr & REISER4_BLOCKNR_STATUS_BIT_MASK) == ++ REISER4_UNALLOCATED_STATUS_VALUE; ++} ++ ++/* returns true if removing bytes of given range of key [from_key, to_key] ++ causes removing of whole item @from */ ++static int ++item_removed_completely(coord_t * from, const reiser4_key * from_key, ++ const reiser4_key * to_key) ++{ ++ item_plugin *iplug; ++ reiser4_key key_in_item; ++ ++ assert("umka-325", from != NULL); ++ assert("", item_is_extent(from)); ++ ++ /* check first key just for case */ ++ item_key_by_coord(from, &key_in_item); ++ if (keygt(from_key, &key_in_item)) ++ return 0; ++ ++ /* check last key */ ++ iplug = item_plugin_by_coord(from); ++ assert("vs-611", iplug && iplug->s.file.append_key); ++ ++ iplug->s.file.append_key(from, &key_in_item); ++ set_key_offset(&key_in_item, get_key_offset(&key_in_item) - 1); ++ ++ if (keylt(to_key, &key_in_item)) ++ /* last byte is not removed */ ++ return 0; ++ return 1; ++} ++ ++/* helper function for prepare_twig_kill(): @left and @right are formatted ++ * neighbors of extent item being completely removed. Load and lock neighbors ++ * and store lock handles into @cdata for later use by kill_hook_extent() */ ++static int ++prepare_children(znode * left, znode * right, carry_kill_data * kdata) ++{ ++ int result; ++ int left_loaded; ++ int right_loaded; ++ ++ result = 0; ++ left_loaded = right_loaded = 0; ++ ++ if (left != NULL) { ++ result = zload(left); ++ if (result == 0) { ++ left_loaded = 1; ++ result = longterm_lock_znode(kdata->left, left, ++ ZNODE_READ_LOCK, ++ ZNODE_LOCK_LOPRI); ++ } ++ } ++ if (result == 0 && right != NULL) { ++ result = zload(right); ++ if (result == 0) { ++ right_loaded = 1; ++ result = longterm_lock_znode(kdata->right, right, ++ ZNODE_READ_LOCK, ++ ZNODE_LOCK_HIPRI | ++ ZNODE_LOCK_NONBLOCK); ++ } ++ } ++ if (result != 0) { ++ done_lh(kdata->left); ++ done_lh(kdata->right); ++ if (left_loaded != 0) ++ zrelse(left); ++ if (right_loaded != 0) ++ zrelse(right); ++ } ++ return result; ++} ++ ++static void done_children(carry_kill_data * kdata) ++{ ++ if (kdata->left != NULL && kdata->left->node != NULL) { ++ zrelse(kdata->left->node); ++ done_lh(kdata->left); ++ } ++ if (kdata->right != NULL && kdata->right->node != NULL) { ++ zrelse(kdata->right->node); ++ done_lh(kdata->right); ++ } ++} ++ ++/* part of cut_node. It is called when cut_node is called to remove or cut part ++ of extent item. When head of that item is removed - we have to update right ++ delimiting of left neighbor of extent. When item is removed completely - we ++ have to set sibling link between left and right neighbor of removed ++ extent. This may return -E_DEADLOCK because of trying to get left neighbor ++ locked. So, caller should repeat an attempt ++*/ ++/* Audited by: umka (2002.06.16) */ ++static int ++prepare_twig_kill(carry_kill_data * kdata, znode * locked_left_neighbor) ++{ ++ int result; ++ reiser4_key key; ++ lock_handle left_lh; ++ lock_handle right_lh; ++ coord_t left_coord; ++ coord_t *from; ++ znode *left_child; ++ znode *right_child; ++ reiser4_tree *tree; ++ int left_zloaded_here, right_zloaded_here; ++ ++ from = kdata->params.from; ++ assert("umka-326", from != NULL); ++ assert("umka-327", kdata->params.to != NULL); ++ ++ /* for one extent item only yet */ ++ assert("vs-591", item_is_extent(from)); ++ assert("vs-592", from->item_pos == kdata->params.to->item_pos); ++ ++ if ((kdata->params.from_key ++ && keygt(kdata->params.from_key, item_key_by_coord(from, &key))) ++ || from->unit_pos != 0) { ++ /* head of item @from is not removed, there is nothing to ++ worry about */ ++ return 0; ++ } ++ ++ result = 0; ++ left_zloaded_here = 0; ++ right_zloaded_here = 0; ++ ++ left_child = right_child = NULL; ++ ++ coord_dup(&left_coord, from); ++ init_lh(&left_lh); ++ init_lh(&right_lh); ++ if (coord_prev_unit(&left_coord)) { ++ /* @from is leftmost item in its node */ ++ if (!locked_left_neighbor) { ++ result = ++ reiser4_get_left_neighbor(&left_lh, from->node, ++ ZNODE_READ_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ switch (result) { ++ case 0: ++ break; ++ case -E_NO_NEIGHBOR: ++ /* there is no formatted node to the left of ++ from->node */ ++ warning("vs-605", ++ "extent item has smallest key in " ++ "the tree and it is about to be removed"); ++ return 0; ++ case -E_DEADLOCK: ++ /* need to restart */ ++ default: ++ return result; ++ } ++ ++ /* we have acquired left neighbor of from->node */ ++ result = zload(left_lh.node); ++ if (result) ++ goto done; ++ ++ locked_left_neighbor = left_lh.node; ++ } else { ++ /* squalloc_right_twig_cut should have supplied locked ++ * left neighbor */ ++ assert("vs-834", ++ znode_is_write_locked(locked_left_neighbor)); ++ result = zload(locked_left_neighbor); ++ if (result) ++ return result; ++ } ++ ++ left_zloaded_here = 1; ++ coord_init_last_unit(&left_coord, locked_left_neighbor); ++ } ++ ++ if (!item_is_internal(&left_coord)) { ++ /* what else but extent can be on twig level */ ++ assert("vs-606", item_is_extent(&left_coord)); ++ ++ /* there is no left formatted child */ ++ if (left_zloaded_here) ++ zrelse(locked_left_neighbor); ++ done_lh(&left_lh); ++ return 0; ++ } ++ ++ tree = znode_get_tree(left_coord.node); ++ left_child = child_znode(&left_coord, left_coord.node, 1, 0); ++ ++ if (IS_ERR(left_child)) { ++ result = PTR_ERR(left_child); ++ goto done; ++ } ++ ++ /* left child is acquired, calculate new right delimiting key for it ++ and get right child if it is necessary */ ++ if (item_removed_completely ++ (from, kdata->params.from_key, kdata->params.to_key)) { ++ /* try to get right child of removed item */ ++ coord_t right_coord; ++ ++ assert("vs-607", ++ kdata->params.to->unit_pos == ++ coord_last_unit_pos(kdata->params.to)); ++ coord_dup(&right_coord, kdata->params.to); ++ if (coord_next_unit(&right_coord)) { ++ /* @to is rightmost unit in the node */ ++ result = ++ reiser4_get_right_neighbor(&right_lh, from->node, ++ ZNODE_READ_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ switch (result) { ++ case 0: ++ result = zload(right_lh.node); ++ if (result) ++ goto done; ++ ++ right_zloaded_here = 1; ++ coord_init_first_unit(&right_coord, ++ right_lh.node); ++ item_key_by_coord(&right_coord, &key); ++ break; ++ ++ case -E_NO_NEIGHBOR: ++ /* there is no formatted node to the right of ++ from->node */ ++ read_lock_dk(tree); ++ key = *znode_get_rd_key(from->node); ++ read_unlock_dk(tree); ++ right_coord.node = NULL; ++ result = 0; ++ break; ++ default: ++ /* real error */ ++ goto done; ++ } ++ } else { ++ /* there is an item to the right of @from - take its key */ ++ item_key_by_coord(&right_coord, &key); ++ } ++ ++ /* try to get right child of @from */ ++ if (right_coord.node && /* there is right neighbor of @from */ ++ item_is_internal(&right_coord)) { /* it is internal item */ ++ right_child = child_znode(&right_coord, ++ right_coord.node, 1, 0); ++ ++ if (IS_ERR(right_child)) { ++ result = PTR_ERR(right_child); ++ goto done; ++ } ++ ++ } ++ /* whole extent is removed between znodes left_child and right_child. Prepare them for linking and ++ update of right delimiting key of left_child */ ++ result = prepare_children(left_child, right_child, kdata); ++ } else { ++ /* head of item @to is removed. left_child has to get right delimting key update. Prepare it for that */ ++ result = prepare_children(left_child, NULL, kdata); ++ } ++ ++ done: ++ if (right_child) ++ zput(right_child); ++ if (right_zloaded_here) ++ zrelse(right_lh.node); ++ done_lh(&right_lh); ++ ++ if (left_child) ++ zput(left_child); ++ if (left_zloaded_here) ++ zrelse(locked_left_neighbor); ++ done_lh(&left_lh); ++ return result; ++} ++ ++/* this is used to remove part of node content between coordinates @from and @to. Units to which @from and @to are set ++ are to be cut completely */ ++/* for try_to_merge_with_left, delete_copied, reiser4_delete_node */ ++int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key, /* first key to be removed */ ++ const reiser4_key * to_key, /* last key to be removed */ ++ reiser4_key * ++ smallest_removed /* smallest key actually removed */ ) ++{ ++ int result; ++ carry_pool *pool; ++ carry_level *lowest_level; ++ carry_cut_data *cut_data; ++ carry_op *op; ++ ++ assert("vs-1715", coord_compare(from, to) != COORD_CMP_ON_RIGHT); ++ ++ pool = ++ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) + ++ sizeof(*cut_data)); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ lowest_level = (carry_level *) (pool + 1); ++ init_carry_level(lowest_level, pool); ++ ++ op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0); ++ assert("vs-1509", op != 0); ++ if (IS_ERR(op)) { ++ done_carry_pool(pool); ++ return PTR_ERR(op); ++ } ++ ++ cut_data = (carry_cut_data *) (lowest_level + 3); ++ cut_data->params.from = from; ++ cut_data->params.to = to; ++ cut_data->params.from_key = from_key; ++ cut_data->params.to_key = to_key; ++ cut_data->params.smallest_removed = smallest_removed; ++ ++ op->u.cut_or_kill.is_cut = 1; ++ op->u.cut_or_kill.u.cut = cut_data; ++ ++ result = reiser4_carry(lowest_level, NULL); ++ done_carry_pool(pool); ++ ++ return result; ++} ++ ++/* cut part of the node ++ ++ Cut part or whole content of node. ++ ++ cut data between @from and @to of @from->node and call carry() to make ++ corresponding changes in the tree. @from->node may become empty. If so - ++ pointer to it will be removed. Neighboring nodes are not changed. Smallest ++ removed key is stored in @smallest_removed ++ ++*/ ++int kill_node_content(coord_t * from, /* coord of the first unit/item that will be eliminated */ ++ coord_t * to, /* coord of the last unit/item that will be eliminated */ ++ const reiser4_key * from_key, /* first key to be removed */ ++ const reiser4_key * to_key, /* last key to be removed */ ++ reiser4_key * smallest_removed, /* smallest key actually removed */ ++ znode * locked_left_neighbor, /* this is set when kill_node_content is called with left neighbor ++ * locked (in squalloc_right_twig_cut, namely) */ ++ struct inode *inode, /* inode of file whose item (or its part) is to be killed. This is necessary to ++ invalidate pages together with item pointing to them */ ++ int truncate) ++{ /* this call is made for file truncate) */ ++ int result; ++ carry_pool *pool; ++ carry_level *lowest_level; ++ carry_kill_data *kdata; ++ lock_handle *left_child; ++ lock_handle *right_child; ++ carry_op *op; ++ ++ assert("umka-328", from != NULL); ++ assert("vs-316", !node_is_empty(from->node)); ++ assert("nikita-1812", coord_is_existing_unit(from) ++ && coord_is_existing_unit(to)); ++ ++ /* allocate carry_pool, 3 carry_level-s, carry_kill_data and structures for kill_hook_extent */ ++ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) + ++ sizeof(carry_kill_data) + ++ 2 * sizeof(lock_handle) + ++ 5 * sizeof(reiser4_key) + 2 * sizeof(coord_t)); ++ if (IS_ERR(pool)) ++ return PTR_ERR(pool); ++ ++ lowest_level = (carry_level *) (pool + 1); ++ init_carry_level(lowest_level, pool); ++ ++ kdata = (carry_kill_data *) (lowest_level + 3); ++ left_child = (lock_handle *) (kdata + 1); ++ right_child = left_child + 1; ++ ++ init_lh(left_child); ++ init_lh(right_child); ++ ++ kdata->params.from = from; ++ kdata->params.to = to; ++ kdata->params.from_key = from_key; ++ kdata->params.to_key = to_key; ++ kdata->params.smallest_removed = smallest_removed; ++ kdata->params.truncate = truncate; ++ kdata->flags = 0; ++ kdata->inode = inode; ++ kdata->left = left_child; ++ kdata->right = right_child; ++ /* memory for 5 reiser4_key and 2 coord_t will be used in kill_hook_extent */ ++ kdata->buf = (char *)(right_child + 1); ++ ++ if (znode_get_level(from->node) == TWIG_LEVEL && item_is_extent(from)) { ++ /* left child of extent item may have to get updated right ++ delimiting key and to get linked with right child of extent ++ @from if it will be removed completely */ ++ result = prepare_twig_kill(kdata, locked_left_neighbor); ++ if (result) { ++ done_children(kdata); ++ done_carry_pool(pool); ++ return result; ++ } ++ } ++ ++ op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0); ++ if (IS_ERR(op) || (op == NULL)) { ++ done_children(kdata); ++ done_carry_pool(pool); ++ return RETERR(op ? PTR_ERR(op) : -EIO); ++ } ++ ++ op->u.cut_or_kill.is_cut = 0; ++ op->u.cut_or_kill.u.kill = kdata; ++ ++ result = reiser4_carry(lowest_level, NULL); ++ ++ done_children(kdata); ++ done_carry_pool(pool); ++ return result; ++} ++ ++void ++fake_kill_hook_tail(struct inode *inode, loff_t start, loff_t end, int truncate) ++{ ++ if (reiser4_inode_get_flag(inode, REISER4_HAS_MMAP)) { ++ pgoff_t start_pg, end_pg; ++ ++ start_pg = start >> PAGE_CACHE_SHIFT; ++ end_pg = (end - 1) >> PAGE_CACHE_SHIFT; ++ ++ if ((start & (PAGE_CACHE_SIZE - 1)) == 0) { ++ /* ++ * kill up to the page boundary. ++ */ ++ assert("vs-123456", start_pg == end_pg); ++ reiser4_invalidate_pages(inode->i_mapping, start_pg, 1, ++ truncate); ++ } else if (start_pg != end_pg) { ++ /* ++ * page boundary is within killed portion of node. ++ */ ++ assert("vs-654321", end_pg - start_pg == 1); ++ reiser4_invalidate_pages(inode->i_mapping, end_pg, ++ end_pg - start_pg, 1); ++ } ++ } ++ inode_sub_bytes(inode, end - start); ++} ++ ++/** ++ * Delete whole @node from the reiser4 tree without loading it. ++ * ++ * @left: locked left neighbor, ++ * @node: node to be deleted, ++ * @smallest_removed: leftmost key of deleted node, ++ * @object: inode pointer, if we truncate a file body. ++ * @truncate: true if called for file truncate. ++ * ++ * @return: 0 if success, error code otherwise. ++ * ++ * NOTE: if @object!=NULL we assume that @smallest_removed != NULL and it ++ * contains the right value of the smallest removed key from the previous ++ * cut_worker() iteration. This is needed for proper accounting of ++ * "i_blocks" and "i_bytes" fields of the @object. ++ */ ++int reiser4_delete_node(znode * node, reiser4_key * smallest_removed, ++ struct inode *object, int truncate) ++{ ++ lock_handle parent_lock; ++ coord_t cut_from; ++ coord_t cut_to; ++ reiser4_tree *tree; ++ int ret; ++ ++ assert("zam-937", node != NULL); ++ assert("zam-933", znode_is_write_locked(node)); ++ assert("zam-999", smallest_removed != NULL); ++ ++ init_lh(&parent_lock); ++ ++ ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK); ++ if (ret) ++ return ret; ++ ++ assert("zam-934", !znode_above_root(parent_lock.node)); ++ ++ ret = zload(parent_lock.node); ++ if (ret) ++ goto failed_nozrelse; ++ ++ ret = find_child_ptr(parent_lock.node, node, &cut_from); ++ if (ret) ++ goto failed; ++ ++ /* decrement child counter and set parent pointer to NULL before ++ deleting the list from parent node because of checks in ++ internal_kill_item_hook (we can delete the last item from the parent ++ node, the parent node is going to be deleted and its c_count should ++ be zero). */ ++ ++ tree = znode_get_tree(node); ++ write_lock_tree(tree); ++ init_parent_coord(&node->in_parent, NULL); ++ --parent_lock.node->c_count; ++ write_unlock_tree(tree); ++ ++ assert("zam-989", item_is_internal(&cut_from)); ++ ++ /* @node should be deleted after unlocking. */ ++ ZF_SET(node, JNODE_HEARD_BANSHEE); ++ ++ /* remove a pointer from the parent node to the node being deleted. */ ++ coord_dup(&cut_to, &cut_from); ++ /* FIXME: shouldn't this be kill_node_content */ ++ ret = cut_node_content(&cut_from, &cut_to, NULL, NULL, NULL); ++ if (ret) ++ /* FIXME(Zam): Should we re-connect the node to its parent if ++ * cut_node fails? */ ++ goto failed; ++ ++ { ++ reiser4_tree *tree = current_tree; ++ __u64 start_offset = 0, end_offset = 0; ++ ++ read_lock_tree(tree); ++ write_lock_dk(tree); ++ if (object) { ++ /* We use @smallest_removed and the left delimiting of ++ * the current node for @object->i_blocks, i_bytes ++ * calculation. We assume that the items after the ++ * *@smallest_removed key have been deleted from the ++ * file body. */ ++ start_offset = get_key_offset(znode_get_ld_key(node)); ++ end_offset = get_key_offset(smallest_removed); ++ } ++ ++ assert("zam-1021", znode_is_connected(node)); ++ if (node->left) ++ znode_set_rd_key(node->left, znode_get_rd_key(node)); ++ ++ *smallest_removed = *znode_get_ld_key(node); ++ ++ write_unlock_dk(tree); ++ read_unlock_tree(tree); ++ ++ if (object) { ++ /* we used to perform actions which are to be performed on items on their removal from tree in ++ special item method - kill_hook. Here for optimization reasons we avoid reading node ++ containing item we remove and can not call item's kill hook. Instead we call function which ++ does exactly the same things as tail kill hook in assumption that node we avoid reading ++ contains only one item and that item is a tail one. */ ++ fake_kill_hook_tail(object, start_offset, end_offset, ++ truncate); ++ } ++ } ++ failed: ++ zrelse(parent_lock.node); ++ failed_nozrelse: ++ done_lh(&parent_lock); ++ ++ return ret; ++} ++ ++static int can_delete(const reiser4_key *key, znode *node) ++{ ++ int result; ++ ++ read_lock_dk(current_tree); ++ result = keyle(key, znode_get_ld_key(node)); ++ read_unlock_dk(current_tree); ++ return result; ++} ++ ++/** ++ * This subroutine is not optimal but implementation seems to ++ * be easier). ++ * ++ * @tap: the point deletion process begins from, ++ * @from_key: the beginning of the deleted key range, ++ * @to_key: the end of the deleted key range, ++ * @smallest_removed: the smallest removed key, ++ * @truncate: true if called for file truncate. ++ * @progress: return true if a progress in file items deletions was made, ++ * @smallest_removed value is actual in that case. ++ * ++ * @return: 0 if success, error code otherwise, -E_REPEAT means that long ++ * reiser4_cut_tree operation was interrupted for allowing atom commit. ++ */ ++int ++cut_tree_worker_common(tap_t * tap, const reiser4_key * from_key, ++ const reiser4_key * to_key, ++ reiser4_key * smallest_removed, struct inode *object, ++ int truncate, int *progress) ++{ ++ lock_handle next_node_lock; ++ coord_t left_coord; ++ int result; ++ ++ assert("zam-931", tap->coord->node != NULL); ++ assert("zam-932", znode_is_write_locked(tap->coord->node)); ++ ++ *progress = 0; ++ init_lh(&next_node_lock); ++ ++ while (1) { ++ znode *node; /* node from which items are cut */ ++ node_plugin *nplug; /* node plugin for @node */ ++ ++ node = tap->coord->node; ++ ++ /* Move next_node_lock to the next node on the left. */ ++ result = ++ reiser4_get_left_neighbor(&next_node_lock, node, ++ ZNODE_WRITE_LOCK, ++ GN_CAN_USE_UPPER_LEVELS); ++ if (result != 0 && result != -E_NO_NEIGHBOR) ++ break; ++ /* Check can we delete the node as a whole. */ ++ if (*progress && znode_get_level(node) == LEAF_LEVEL && ++ can_delete(from_key, node)) { ++ result = reiser4_delete_node(node, smallest_removed, ++ object, truncate); ++ } else { ++ result = reiser4_tap_load(tap); ++ if (result) ++ return result; ++ ++ /* Prepare the second (right) point for cut_node() */ ++ if (*progress) ++ coord_init_last_unit(tap->coord, node); ++ ++ else if (item_plugin_by_coord(tap->coord)->b.lookup == ++ NULL) ++ /* set rightmost unit for the items without lookup method */ ++ tap->coord->unit_pos = ++ coord_last_unit_pos(tap->coord); ++ ++ nplug = node->nplug; ++ ++ assert("vs-686", nplug); ++ assert("vs-687", nplug->lookup); ++ ++ /* left_coord is leftmost unit cut from @node */ ++ result = nplug->lookup(node, from_key, ++ FIND_MAX_NOT_MORE_THAN, ++ &left_coord); ++ ++ if (IS_CBKERR(result)) ++ break; ++ ++ /* adjust coordinates so that they are set to existing units */ ++ if (coord_set_to_right(&left_coord) ++ || coord_set_to_left(tap->coord)) { ++ result = 0; ++ break; ++ } ++ ++ if (coord_compare(&left_coord, tap->coord) == ++ COORD_CMP_ON_RIGHT) { ++ /* keys from @from_key to @to_key are not in the tree */ ++ result = 0; ++ break; ++ } ++ ++ if (left_coord.item_pos != tap->coord->item_pos) { ++ /* do not allow to cut more than one item. It is added to solve problem of truncating ++ partially converted files. If file is partially converted there may exist a twig node ++ containing both internal item or items pointing to leaf nodes with formatting items ++ and extent item. We do not want to kill internal items being at twig node here ++ because cut_tree_worker assumes killing them from level level */ ++ coord_dup(&left_coord, tap->coord); ++ assert("vs-1652", ++ coord_is_existing_unit(&left_coord)); ++ left_coord.unit_pos = 0; ++ } ++ ++ /* cut data from one node */ ++ // *smallest_removed = *reiser4_min_key(); ++ result = ++ kill_node_content(&left_coord, tap->coord, from_key, ++ to_key, smallest_removed, ++ next_node_lock.node, object, ++ truncate); ++ reiser4_tap_relse(tap); ++ } ++ if (result) ++ break; ++ ++ ++(*progress); ++ ++ /* Check whether all items with keys >= from_key were removed ++ * from the tree. */ ++ if (keyle(smallest_removed, from_key)) ++ /* result = 0; */ ++ break; ++ ++ if (next_node_lock.node == NULL) ++ break; ++ ++ result = reiser4_tap_move(tap, &next_node_lock); ++ done_lh(&next_node_lock); ++ if (result) ++ break; ++ ++ /* Break long reiser4_cut_tree operation (deletion of a large ++ file) if atom requires commit. */ ++ if (*progress > CUT_TREE_MIN_ITERATIONS ++ && current_atom_should_commit()) { ++ result = -E_REPEAT; ++ break; ++ } ++ } ++ done_lh(&next_node_lock); ++ // assert("vs-301", !keyeq(&smallest_removed, reiser4_min_key())); ++ return result; ++} ++ ++/* there is a fundamental problem with optimizing deletes: VFS does it ++ one file at a time. Another problem is that if an item can be ++ anything, then deleting items must be done one at a time. It just ++ seems clean to writes this to specify a from and a to key, and cut ++ everything between them though. */ ++ ++/* use this function with care if deleting more than what is part of a single file. */ ++/* do not use this when cutting a single item, it is suboptimal for that */ ++ ++/* You are encouraged to write plugin specific versions of this. It ++ cannot be optimal for all plugins because it works item at a time, ++ and some plugins could sometimes work node at a time. Regular files ++ however are not optimizable to work node at a time because of ++ extents needing to free the blocks they point to. ++ ++ Optimizations compared to v3 code: ++ ++ It does not balance (that task is left to memory pressure code). ++ ++ Nodes are deleted only if empty. ++ ++ Uses extents. ++ ++ Performs read-ahead of formatted nodes whose contents are part of ++ the deletion. ++*/ ++ ++/** ++ * Delete everything from the reiser4 tree between two keys: @from_key and ++ * @to_key. ++ * ++ * @from_key: the beginning of the deleted key range, ++ * @to_key: the end of the deleted key range, ++ * @smallest_removed: the smallest removed key, ++ * @object: owner of cutting items. ++ * @truncate: true if called for file truncate. ++ * @progress: return true if a progress in file items deletions was made, ++ * @smallest_removed value is actual in that case. ++ * ++ * @return: 0 if success, error code otherwise, -E_REPEAT means that long cut_tree ++ * operation was interrupted for allowing atom commit . ++ */ ++ ++int reiser4_cut_tree_object(reiser4_tree * tree, const reiser4_key * from_key, ++ const reiser4_key * to_key, ++ reiser4_key * smallest_removed_p, ++ struct inode *object, int truncate, int *progress) ++{ ++ lock_handle lock; ++ int result; ++ tap_t tap; ++ coord_t right_coord; ++ reiser4_key smallest_removed; ++ int (*cut_tree_worker) (tap_t *, const reiser4_key *, ++ const reiser4_key *, reiser4_key *, ++ struct inode *, int, int *); ++ STORE_COUNTERS; ++ ++ assert("umka-329", tree != NULL); ++ assert("umka-330", from_key != NULL); ++ assert("umka-331", to_key != NULL); ++ assert("zam-936", keyle(from_key, to_key)); ++ ++ if (smallest_removed_p == NULL) ++ smallest_removed_p = &smallest_removed; ++ ++ init_lh(&lock); ++ ++ do { ++ /* Find rightmost item to cut away from the tree. */ ++ result = reiser4_object_lookup(object, to_key, &right_coord, ++ &lock, ZNODE_WRITE_LOCK, ++ FIND_MAX_NOT_MORE_THAN, ++ TWIG_LEVEL, LEAF_LEVEL, ++ CBK_UNIQUE, NULL /*ra_info */); ++ if (result != CBK_COORD_FOUND) ++ break; ++ if (object == NULL ++ || inode_file_plugin(object)->cut_tree_worker == NULL) ++ cut_tree_worker = cut_tree_worker_common; ++ else ++ cut_tree_worker = ++ inode_file_plugin(object)->cut_tree_worker; ++ reiser4_tap_init(&tap, &right_coord, &lock, ZNODE_WRITE_LOCK); ++ result = ++ cut_tree_worker(&tap, from_key, to_key, smallest_removed_p, ++ object, truncate, progress); ++ reiser4_tap_done(&tap); ++ ++ reiser4_preempt_point(); ++ ++ } while (0); ++ ++ done_lh(&lock); ++ ++ if (result) { ++ switch (result) { ++ case -E_NO_NEIGHBOR: ++ result = 0; ++ break; ++ case -E_DEADLOCK: ++ result = -E_REPEAT; ++ case -E_REPEAT: ++ case -ENOMEM: ++ case -ENOENT: ++ break; ++ default: ++ warning("nikita-2861", "failure: %i", result); ++ } ++ } ++ ++ CHECK_COUNTERS; ++ return result; ++} ++ ++/* repeat reiser4_cut_tree_object until everything is deleted. ++ * unlike cut_file_items, it does not end current transaction if -E_REPEAT ++ * is returned by cut_tree_object. */ ++int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from, ++ const reiser4_key * to, struct inode *inode, int truncate) ++{ ++ int result; ++ int progress; ++ ++ do { ++ result = reiser4_cut_tree_object(tree, from, to, NULL, ++ inode, truncate, &progress); ++ } while (result == -E_REPEAT); ++ ++ return result; ++} ++ ++/* finishing reiser4 initialization */ ++int reiser4_init_tree(reiser4_tree * tree /* pointer to structure being ++ * initialized */ , ++ const reiser4_block_nr * root_block /* address of a root block ++ * on a disk */ , ++ tree_level height /* height of a tree */ , ++ node_plugin * nplug /* default node plugin */ ) ++{ ++ int result; ++ ++ assert("nikita-306", tree != NULL); ++ assert("nikita-307", root_block != NULL); ++ assert("nikita-308", height > 0); ++ assert("nikita-309", nplug != NULL); ++ assert("zam-587", tree->super != NULL); ++ ++ tree->root_block = *root_block; ++ tree->height = height; ++ tree->estimate_one_insert = calc_estimate_one_insert(height); ++ tree->nplug = nplug; ++ ++ tree->znode_epoch = 1ull; ++ ++ cbk_cache_init(&tree->cbk_cache); ++ ++ result = znodes_tree_init(tree); ++ if (result == 0) ++ result = jnodes_tree_init(tree); ++ if (result == 0) { ++ tree->uber = zget(tree, &UBER_TREE_ADDR, NULL, 0, ++ reiser4_ctx_gfp_mask_get()); ++ if (IS_ERR(tree->uber)) { ++ result = PTR_ERR(tree->uber); ++ tree->uber = NULL; ++ } ++ } ++ return result; ++} ++ ++/* release resources associated with @tree */ ++void reiser4_done_tree(reiser4_tree * tree /* tree to release */ ) ++{ ++ if (tree == NULL) ++ return; ++ ++ if (tree->uber != NULL) { ++ zput(tree->uber); ++ tree->uber = NULL; ++ } ++ znodes_tree_done(tree); ++ jnodes_tree_done(tree); ++ cbk_cache_done(&tree->cbk_cache); ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/tree.h b/fs/reiser4/tree.h +new file mode 100644 +index 0000000..73aa70a +--- /dev/null ++++ b/fs/reiser4/tree.h +@@ -0,0 +1,577 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Tree operations. See fs/reiser4/tree.c for comments */ ++ ++#if !defined( __REISER4_TREE_H__ ) ++#define __REISER4_TREE_H__ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "plugin/node/node.h" ++#include "plugin/plugin.h" ++#include "znode.h" ++#include "tap.h" ++ ++#include /* for __u?? */ ++#include /* for struct super_block */ ++#include ++#include /* for struct task_struct */ ++ ++/* fictive block number never actually used */ ++extern const reiser4_block_nr UBER_TREE_ADDR; ++ ++/* &cbk_cache_slot - entry in a coord cache. ++ ++ This is entry in a coord_by_key (cbk) cache, represented by ++ &cbk_cache. ++ ++*/ ++typedef struct cbk_cache_slot { ++ /* cached node */ ++ znode *node; ++ /* linkage to the next cbk cache slot in a LRU order */ ++ struct list_head lru; ++} cbk_cache_slot; ++ ++/* &cbk_cache - coord cache. This is part of reiser4_tree. ++ ++ cbk_cache is supposed to speed up tree lookups by caching results of recent ++ successful lookups (we don't cache negative results as dentry cache ++ does). Cache consists of relatively small number of entries kept in a LRU ++ order. Each entry (&cbk_cache_slot) contains a pointer to znode, from ++ which we can obtain a range of keys that covered by this znode. Before ++ embarking into real tree traversal we scan cbk_cache slot by slot and for ++ each slot check whether key we are looking for is between minimal and ++ maximal keys for node pointed to by this slot. If no match is found, real ++ tree traversal is performed and if result is successful, appropriate entry ++ is inserted into cache, possibly pulling least recently used entry out of ++ it. ++ ++ Tree spin lock is used to protect coord cache. If contention for this ++ lock proves to be too high, more finer grained locking can be added. ++ ++ Invariants involving parts of this data-type: ++ ++ [cbk-cache-invariant] ++*/ ++typedef struct cbk_cache { ++ /* serializator */ ++ rwlock_t guard; ++ int nr_slots; ++ /* head of LRU list of cache slots */ ++ struct list_head lru; ++ /* actual array of slots */ ++ cbk_cache_slot *slot; ++} cbk_cache; ++ ++/* level_lookup_result - possible outcome of looking up key at some level. ++ This is used by coord_by_key when traversing tree downward. */ ++typedef enum { ++ /* continue to the next level */ ++ LOOKUP_CONT, ++ /* done. Either required item was found, or we can prove it ++ doesn't exist, or some error occurred. */ ++ LOOKUP_DONE, ++ /* restart traversal from the root. Infamous "repetition". */ ++ LOOKUP_REST ++} level_lookup_result; ++ ++/* This is representation of internal reiser4 tree where all file-system ++ data and meta-data are stored. This structure is passed to all tree ++ manipulation functions. It's different from the super block because: ++ we don't want to limit ourselves to strictly one to one mapping ++ between super blocks and trees, and, because they are logically ++ different: there are things in a super block that have no relation to ++ the tree (bitmaps, journalling area, mount options, etc.) and there ++ are things in a tree that bear no relation to the super block, like ++ tree of znodes. ++ ++ At this time, there is only one tree ++ per filesystem, and this struct is part of the super block. We only ++ call the super block the super block for historical reasons (most ++ other filesystems call the per filesystem metadata the super block). ++*/ ++ ++struct reiser4_tree { ++ /* block_nr == 0 is fake znode. Write lock it, while changing ++ tree height. */ ++ /* disk address of root node of a tree */ ++ reiser4_block_nr root_block; ++ ++ /* level of the root node. If this is 1, tree consists of root ++ node only */ ++ tree_level height; ++ ++ /* ++ * this is cached here avoid calling plugins through function ++ * dereference all the time. ++ */ ++ __u64 estimate_one_insert; ++ ++ /* cache of recent tree lookup results */ ++ cbk_cache cbk_cache; ++ ++ /* hash table to look up znodes by block number. */ ++ z_hash_table zhash_table; ++ z_hash_table zfake_table; ++ /* hash table to look up jnodes by inode and offset. */ ++ j_hash_table jhash_table; ++ ++ /* lock protecting: ++ - parent pointers, ++ - sibling pointers, ++ - znode hash table ++ - coord cache ++ */ ++ /* NOTE: The "giant" tree lock can be replaced by more spin locks, ++ hoping they will be less contented. We can use one spin lock per one ++ znode hash bucket. With adding of some code complexity, sibling ++ pointers can be protected by both znode spin locks. However it looks ++ more SMP scalable we should test this locking change on n-ways (n > ++ 4) SMP machines. Current 4-ways machine test does not show that tree ++ lock is contented and it is a bottleneck (2003.07.25). */ ++ ++ rwlock_t tree_lock; ++ ++ /* lock protecting delimiting keys */ ++ rwlock_t dk_lock; ++ ++ /* spin lock protecting znode_epoch */ ++ spinlock_t epoch_lock; ++ /* version stamp used to mark znode updates. See seal.[ch] for more ++ * information. */ ++ __u64 znode_epoch; ++ ++ znode *uber; ++ node_plugin *nplug; ++ struct super_block *super; ++ struct { ++ /* carry flags used for insertion of new nodes */ ++ __u32 new_node_flags; ++ /* carry flags used for insertion of new extents */ ++ __u32 new_extent_flags; ++ /* carry flags used for paste operations */ ++ __u32 paste_flags; ++ /* carry flags used for insert operations */ ++ __u32 insert_flags; ++ } carry; ++}; ++ ++extern int reiser4_init_tree(reiser4_tree * tree, ++ const reiser4_block_nr * root_block, ++ tree_level height, node_plugin * default_plugin); ++extern void reiser4_done_tree(reiser4_tree * tree); ++ ++/* cbk flags: options for coord_by_key() */ ++typedef enum { ++ /* coord_by_key() is called for insertion. This is necessary because ++ of extents being located at the twig level. For explanation, see ++ comment just above is_next_item_internal(). ++ */ ++ CBK_FOR_INSERT = (1 << 0), ++ /* coord_by_key() is called with key that is known to be unique */ ++ CBK_UNIQUE = (1 << 1), ++ /* coord_by_key() can trust delimiting keys. This options is not user ++ accessible. coord_by_key() will set it automatically. It will be ++ only cleared by special-case in extents-on-the-twig-level handling ++ where it is necessary to insert item with a key smaller than ++ leftmost key in a node. This is necessary because of extents being ++ located at the twig level. For explanation, see comment just above ++ is_next_item_internal(). ++ */ ++ CBK_TRUST_DK = (1 << 2), ++ CBK_READA = (1 << 3), /* original: readahead leaves which contain items of certain file */ ++ CBK_READDIR_RA = (1 << 4), /* readdir: readahead whole directory and all its stat datas */ ++ CBK_DKSET = (1 << 5), ++ CBK_EXTENDED_COORD = (1 << 6), /* coord_t is actually */ ++ CBK_IN_CACHE = (1 << 7), /* node is already in cache */ ++ CBK_USE_CRABLOCK = (1 << 8) /* use crab_lock in stead of long term ++ * lock */ ++} cbk_flags; ++ ++/* insertion outcome. IBK = insert by key */ ++typedef enum { ++ IBK_INSERT_OK = 0, ++ IBK_ALREADY_EXISTS = -EEXIST, ++ IBK_IO_ERROR = -EIO, ++ IBK_NO_SPACE = -E_NODE_FULL, ++ IBK_OOM = -ENOMEM ++} insert_result; ++ ++#define IS_CBKERR(err) ((err) != CBK_COORD_FOUND && (err) != CBK_COORD_NOTFOUND) ++ ++typedef int (*tree_iterate_actor_t) (reiser4_tree * tree, coord_t * coord, ++ lock_handle * lh, void *arg); ++extern int reiser4_iterate_tree(reiser4_tree * tree, coord_t * coord, ++ lock_handle * lh, ++ tree_iterate_actor_t actor, void *arg, ++ znode_lock_mode mode, int through_units_p); ++extern int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode, ++ znode_lock_request pri, lock_handle * lh); ++ ++/* return node plugin of @node */ ++static inline node_plugin *node_plugin_by_node(const znode * ++ node /* node to query */ ) ++{ ++ assert("vs-213", node != NULL); ++ assert("vs-214", znode_is_loaded(node)); ++ ++ return node->nplug; ++} ++ ++/* number of items in @node */ ++static inline pos_in_node_t node_num_items(const znode * node) ++{ ++ assert("nikita-2754", znode_is_loaded(node)); ++ assert("nikita-2468", ++ node_plugin_by_node(node)->num_of_items(node) == node->nr_items); ++ ++ return node->nr_items; ++} ++ ++/* Return the number of items at the present node. Asserts coord->node != ++ NULL. */ ++static inline unsigned coord_num_items(const coord_t * coord) ++{ ++ assert("jmacd-9805", coord->node != NULL); ++ ++ return node_num_items(coord->node); ++} ++ ++/* true if @node is empty */ ++static inline int node_is_empty(const znode * node) ++{ ++ return node_num_items(node) == 0; ++} ++ ++typedef enum { ++ SHIFTED_SOMETHING = 0, ++ SHIFT_NO_SPACE = -E_NODE_FULL, ++ SHIFT_IO_ERROR = -EIO, ++ SHIFT_OOM = -ENOMEM, ++} shift_result; ++ ++extern node_plugin *node_plugin_by_coord(const coord_t * coord); ++extern int is_coord_in_node(const coord_t * coord); ++extern int key_in_node(const reiser4_key *, const coord_t *); ++extern void coord_item_move_to(coord_t * coord, int items); ++extern void coord_unit_move_to(coord_t * coord, int units); ++ ++/* there are two types of repetitive accesses (ra): intra-syscall ++ (local) and inter-syscall (global). Local ra is used when ++ during single syscall we add/delete several items and units in the ++ same place in a tree. Note that plan-A fragments local ra by ++ separating stat-data and file body in key-space. Global ra is ++ used when user does repetitive modifications in the same place in a ++ tree. ++ ++ Our ra implementation serves following purposes: ++ 1 it affects balancing decisions so that next operation in a row ++ can be performed faster; ++ 2 it affects lower-level read-ahead in page-cache; ++ 3 it allows to avoid unnecessary lookups by maintaining some state ++ across several operations (this is only for local ra); ++ 4 it leaves room for lazy-micro-balancing: when we start a sequence of ++ operations they are performed without actually doing any intra-node ++ shifts, until we finish sequence or scope of sequence leaves ++ current node, only then we really pack node (local ra only). ++*/ ++ ++/* another thing that can be useful is to keep per-tree and/or ++ per-process cache of recent lookups. This cache can be organised as a ++ list of block numbers of formatted nodes sorted by starting key in ++ this node. Balancings should invalidate appropriate parts of this ++ cache. ++*/ ++ ++lookup_result coord_by_key(reiser4_tree * tree, const reiser4_key * key, ++ coord_t * coord, lock_handle * handle, ++ znode_lock_mode lock, lookup_bias bias, ++ tree_level lock_level, tree_level stop_level, ++ __u32 flags, ra_info_t *); ++ ++lookup_result reiser4_object_lookup(struct inode *object, ++ const reiser4_key * key, ++ coord_t * coord, ++ lock_handle * lh, ++ znode_lock_mode lock_mode, ++ lookup_bias bias, ++ tree_level lock_level, ++ tree_level stop_level, ++ __u32 flags, ra_info_t * info); ++ ++insert_result insert_by_key(reiser4_tree * tree, const reiser4_key * key, ++ reiser4_item_data * data, coord_t * coord, ++ lock_handle * lh, ++ tree_level stop_level, __u32 flags); ++insert_result insert_by_coord(coord_t * coord, ++ reiser4_item_data * data, const reiser4_key * key, ++ lock_handle * lh, __u32); ++insert_result insert_extent_by_coord(coord_t * coord, ++ reiser4_item_data * data, ++ const reiser4_key * key, lock_handle * lh); ++int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key, ++ const reiser4_key * to_key, ++ reiser4_key * smallest_removed); ++int kill_node_content(coord_t * from, coord_t * to, ++ const reiser4_key * from_key, const reiser4_key * to_key, ++ reiser4_key * smallest_removed, ++ znode * locked_left_neighbor, struct inode *inode, ++ int truncate); ++ ++int reiser4_resize_item(coord_t * coord, reiser4_item_data * data, ++ reiser4_key * key, lock_handle * lh, cop_insert_flag); ++int insert_into_item(coord_t * coord, lock_handle * lh, const reiser4_key * key, ++ reiser4_item_data * data, unsigned); ++int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f); ++int find_new_child_ptr(znode * parent, znode * child, znode * left, ++ coord_t * result); ++ ++int shift_right_of_but_excluding_insert_coord(coord_t * insert_coord); ++int shift_left_of_and_including_insert_coord(coord_t * insert_coord); ++ ++void fake_kill_hook_tail(struct inode *, loff_t start, loff_t end, int); ++ ++extern int cut_tree_worker_common(tap_t *, const reiser4_key *, ++ const reiser4_key *, reiser4_key *, ++ struct inode *, int, int *); ++extern int reiser4_cut_tree_object(reiser4_tree *, const reiser4_key *, ++ const reiser4_key *, reiser4_key *, ++ struct inode *, int, int *); ++extern int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from, ++ const reiser4_key * to, struct inode *, int); ++ ++extern int reiser4_delete_node(znode *, reiser4_key *, struct inode *, int); ++extern int check_tree_pointer(const coord_t * pointer, const znode * child); ++extern int find_new_child_ptr(znode * parent, znode * child UNUSED_ARG, ++ znode * left, coord_t * result); ++extern int find_child_ptr(znode * parent, znode * child, coord_t * result); ++extern int set_child_delimiting_keys(znode * parent, const coord_t * in_parent, ++ znode * child); ++extern znode *child_znode(const coord_t * in_parent, znode * parent, ++ int incore_p, int setup_dkeys_p); ++ ++extern int cbk_cache_init(cbk_cache * cache); ++extern void cbk_cache_done(cbk_cache * cache); ++extern void cbk_cache_invalidate(const znode * node, reiser4_tree * tree); ++ ++extern char *sprint_address(const reiser4_block_nr * block); ++ ++#if REISER4_DEBUG ++extern void print_coord_content(const char *prefix, coord_t * p); ++extern void reiser4_print_address(const char *prefix, ++ const reiser4_block_nr * block); ++extern void print_tree_rec(const char *prefix, reiser4_tree * tree, ++ __u32 flags); ++extern void check_dkeys(znode *node); ++#else ++#define print_coord_content(p, c) noop ++#define reiser4_print_address(p, b) noop ++#endif ++ ++extern void forget_znode(lock_handle * handle); ++extern int deallocate_znode(znode * node); ++ ++extern int is_disk_addr_unallocated(const reiser4_block_nr * addr); ++ ++/* struct used internally to pack all numerous arguments of tree lookup. ++ Used to avoid passing a lot of arguments to helper functions. */ ++typedef struct cbk_handle { ++ /* tree we are in */ ++ reiser4_tree *tree; ++ /* key we are going after */ ++ const reiser4_key *key; ++ /* coord we will store result in */ ++ coord_t *coord; ++ /* type of lock to take on target node */ ++ znode_lock_mode lock_mode; ++ /* lookup bias. See comments at the declaration of lookup_bias */ ++ lookup_bias bias; ++ /* lock level: level starting from which tree traversal starts taking ++ * write locks. */ ++ tree_level lock_level; ++ /* level where search will stop. Either item will be found between ++ lock_level and stop_level, or CBK_COORD_NOTFOUND will be ++ returned. ++ */ ++ tree_level stop_level; ++ /* level we are currently at */ ++ tree_level level; ++ /* block number of @active node. Tree traversal operates on two ++ nodes: active and parent. */ ++ reiser4_block_nr block; ++ /* put here error message to be printed by caller */ ++ const char *error; ++ /* result passed back to caller */ ++ lookup_result result; ++ /* lock handles for active and parent */ ++ lock_handle *parent_lh; ++ lock_handle *active_lh; ++ reiser4_key ld_key; ++ reiser4_key rd_key; ++ /* flags, passed to the cbk routine. Bits of this bitmask are defined ++ in tree.h:cbk_flags enum. */ ++ __u32 flags; ++ ra_info_t *ra_info; ++ struct inode *object; ++} cbk_handle; ++ ++extern znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h); ++ ++/* eottl.c */ ++extern int handle_eottl(cbk_handle *h, int *outcome); ++ ++int lookup_multikey(cbk_handle * handle, int nr_keys); ++int lookup_couple(reiser4_tree * tree, ++ const reiser4_key * key1, const reiser4_key * key2, ++ coord_t * coord1, coord_t * coord2, ++ lock_handle * lh1, lock_handle * lh2, ++ znode_lock_mode lock_mode, lookup_bias bias, ++ tree_level lock_level, tree_level stop_level, __u32 flags, ++ int *result1, int *result2); ++ ++static inline void read_lock_tree(reiser4_tree *tree) ++{ ++ /* check that tree is not locked */ ++ assert("", (LOCK_CNT_NIL(rw_locked_tree) && ++ LOCK_CNT_NIL(read_locked_tree) && ++ LOCK_CNT_NIL(write_locked_tree))); ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", (LOCK_CNT_NIL(spin_locked_txnh) && ++ LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_NIL(spin_locked_stack))); ++ ++ read_lock(&(tree->tree_lock)); ++ ++ LOCK_CNT_INC(read_locked_tree); ++ LOCK_CNT_INC(rw_locked_tree); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline void read_unlock_tree(reiser4_tree *tree) ++{ ++ assert("nikita-1375", LOCK_CNT_GTZ(read_locked_tree)); ++ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(read_locked_tree); ++ LOCK_CNT_DEC(rw_locked_tree); ++ LOCK_CNT_DEC(spin_locked); ++ ++ read_unlock(&(tree->tree_lock)); ++} ++ ++static inline void write_lock_tree(reiser4_tree *tree) ++{ ++ /* check that tree is not locked */ ++ assert("", (LOCK_CNT_NIL(rw_locked_tree) && ++ LOCK_CNT_NIL(read_locked_tree) && ++ LOCK_CNT_NIL(write_locked_tree))); ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", (LOCK_CNT_NIL(spin_locked_txnh) && ++ LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_NIL(spin_locked_stack))); ++ ++ write_lock(&(tree->tree_lock)); ++ ++ LOCK_CNT_INC(write_locked_tree); ++ LOCK_CNT_INC(rw_locked_tree); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline void write_unlock_tree(reiser4_tree *tree) ++{ ++ assert("nikita-1375", LOCK_CNT_GTZ(write_locked_tree)); ++ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(write_locked_tree); ++ LOCK_CNT_DEC(rw_locked_tree); ++ LOCK_CNT_DEC(spin_locked); ++ ++ write_unlock(&(tree->tree_lock)); ++} ++ ++static inline void read_lock_dk(reiser4_tree *tree) ++{ ++ /* check that dk is not locked */ ++ assert("", (LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_NIL(read_locked_dk) && ++ LOCK_CNT_NIL(write_locked_dk))); ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", LOCK_CNT_NIL(spin_locked_stack)); ++ ++ read_lock(&((tree)->dk_lock)); ++ ++ LOCK_CNT_INC(read_locked_dk); ++ LOCK_CNT_INC(rw_locked_dk); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline void read_unlock_dk(reiser4_tree *tree) ++{ ++ assert("nikita-1375", LOCK_CNT_GTZ(read_locked_dk)); ++ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(read_locked_dk); ++ LOCK_CNT_DEC(rw_locked_dk); ++ LOCK_CNT_DEC(spin_locked); ++ ++ read_unlock(&(tree->dk_lock)); ++} ++ ++static inline void write_lock_dk(reiser4_tree *tree) ++{ ++ /* check that dk is not locked */ ++ assert("", (LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_NIL(read_locked_dk) && ++ LOCK_CNT_NIL(write_locked_dk))); ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", LOCK_CNT_NIL(spin_locked_stack)); ++ ++ write_lock(&((tree)->dk_lock)); ++ ++ LOCK_CNT_INC(write_locked_dk); ++ LOCK_CNT_INC(rw_locked_dk); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline void write_unlock_dk(reiser4_tree *tree) ++{ ++ assert("nikita-1375", LOCK_CNT_GTZ(write_locked_dk)); ++ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(write_locked_dk); ++ LOCK_CNT_DEC(rw_locked_dk); ++ LOCK_CNT_DEC(spin_locked); ++ ++ write_unlock(&(tree->dk_lock)); ++} ++ ++/* estimate api. Implementation is in estimate.c */ ++reiser4_block_nr estimate_one_insert_item(reiser4_tree *); ++reiser4_block_nr estimate_one_insert_into_item(reiser4_tree *); ++reiser4_block_nr estimate_insert_flow(tree_level); ++reiser4_block_nr estimate_one_item_removal(reiser4_tree *); ++reiser4_block_nr calc_estimate_one_insert(tree_level); ++reiser4_block_nr estimate_dirty_cluster(struct inode *); ++reiser4_block_nr estimate_insert_cluster(struct inode *); ++reiser4_block_nr estimate_update_cluster(struct inode *); ++ ++/* __REISER4_TREE_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/tree_mod.c b/fs/reiser4/tree_mod.c +new file mode 100644 +index 0000000..bcc6548 +--- /dev/null ++++ b/fs/reiser4/tree_mod.c +@@ -0,0 +1,386 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* ++ * Functions to add/delete new nodes to/from the tree. ++ * ++ * Functions from this file are used by carry (see carry*) to handle: ++ * ++ * . insertion of new formatted node into tree ++ * ++ * . addition of new tree root, increasing tree height ++ * ++ * . removing tree root, decreasing tree height ++ * ++ */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/plugin.h" ++#include "jnode.h" ++#include "znode.h" ++#include "tree_mod.h" ++#include "block_alloc.h" ++#include "tree_walk.h" ++#include "tree.h" ++#include "super.h" ++ ++#include ++ ++static int add_child_ptr(znode * parent, znode * child); ++/* warning only issued if error is not -E_REPEAT */ ++#define ewarning( error, ... ) \ ++ if( ( error ) != -E_REPEAT ) \ ++ warning( __VA_ARGS__ ) ++ ++/* allocate new node on the @level and immediately on the right of @brother. */ ++znode * reiser4_new_node(znode * brother /* existing left neighbor ++ * of new node */, ++ tree_level level /* tree level at which new node is to ++ * be allocated */) ++{ ++ znode *result; ++ int retcode; ++ reiser4_block_nr blocknr; ++ ++ assert("nikita-930", brother != NULL); ++ assert("umka-264", level < REAL_MAX_ZTREE_HEIGHT); ++ ++ retcode = assign_fake_blocknr_formatted(&blocknr); ++ if (retcode == 0) { ++ result = ++ zget(znode_get_tree(brother), &blocknr, NULL, level, ++ reiser4_ctx_gfp_mask_get()); ++ if (IS_ERR(result)) { ++ ewarning(PTR_ERR(result), "nikita-929", ++ "Cannot allocate znode for carry: %li", ++ PTR_ERR(result)); ++ return result; ++ } ++ /* cheap test, can be executed even when debugging is off */ ++ if (!znode_just_created(result)) { ++ warning("nikita-2213", ++ "Allocated already existing block: %llu", ++ (unsigned long long)blocknr); ++ zput(result); ++ return ERR_PTR(RETERR(-EIO)); ++ } ++ ++ assert("nikita-931", result != NULL); ++ result->nplug = znode_get_tree(brother)->nplug; ++ assert("nikita-933", result->nplug != NULL); ++ ++ retcode = zinit_new(result, reiser4_ctx_gfp_mask_get()); ++ if (retcode == 0) { ++ ZF_SET(result, JNODE_CREATED); ++ zrelse(result); ++ } else { ++ zput(result); ++ result = ERR_PTR(retcode); ++ } ++ } else { ++ /* failure to allocate new node during balancing. ++ This should never happen. Ever. Returning -E_REPEAT ++ is not viable solution, because "out of disk space" ++ is not transient error that will go away by itself. ++ */ ++ ewarning(retcode, "nikita-928", ++ "Cannot allocate block for carry: %i", retcode); ++ result = ERR_PTR(retcode); ++ } ++ assert("nikita-1071", result != NULL); ++ return result; ++} ++ ++/* allocate new root and add it to the tree ++ ++ This helper function is called by add_new_root(). ++ ++*/ ++znode *reiser4_add_tree_root(znode * old_root /* existing tree root */ , ++ znode * fake /* "fake" znode */ ) ++{ ++ reiser4_tree *tree = znode_get_tree(old_root); ++ znode *new_root = NULL; /* to shut gcc up */ ++ int result; ++ ++ assert("nikita-1069", old_root != NULL); ++ assert("umka-262", fake != NULL); ++ assert("umka-263", tree != NULL); ++ ++ /* "fake" znode---one always hanging just above current root. This ++ node is locked when new root is created or existing root is ++ deleted. Downward tree traversal takes lock on it before taking ++ lock on a root node. This avoids race conditions with root ++ manipulations. ++ ++ */ ++ assert("nikita-1348", znode_above_root(fake)); ++ assert("nikita-1211", znode_is_root(old_root)); ++ ++ result = 0; ++ if (tree->height >= REAL_MAX_ZTREE_HEIGHT) { ++ warning("nikita-1344", "Tree is too tall: %i", tree->height); ++ /* ext2 returns -ENOSPC when it runs out of free inodes with a ++ following comment (fs/ext2/ialloc.c:441): Is it really ++ ENOSPC? ++ ++ -EXFULL? -EINVAL? ++ */ ++ result = RETERR(-ENOSPC); ++ } else { ++ /* Allocate block for new root. It's not that ++ important where it will be allocated, as root is ++ almost always in memory. Moreover, allocate on ++ flush can be going here. ++ */ ++ assert("nikita-1448", znode_is_root(old_root)); ++ new_root = reiser4_new_node(fake, tree->height + 1); ++ if (!IS_ERR(new_root) && (result = zload(new_root)) == 0) { ++ lock_handle rlh; ++ ++ init_lh(&rlh); ++ result = ++ longterm_lock_znode(&rlh, new_root, ++ ZNODE_WRITE_LOCK, ++ ZNODE_LOCK_LOPRI); ++ if (result == 0) { ++ parent_coord_t *in_parent; ++ ++ znode_make_dirty(fake); ++ ++ /* new root is a child of "fake" node */ ++ write_lock_tree(tree); ++ ++ ++tree->height; ++ ++ /* recalculate max balance overhead */ ++ tree->estimate_one_insert = ++ estimate_one_insert_item(tree); ++ ++ tree->root_block = *znode_get_block(new_root); ++ in_parent = &new_root->in_parent; ++ init_parent_coord(in_parent, fake); ++ /* manually insert new root into sibling ++ * list. With this all nodes involved into ++ * balancing are connected after balancing is ++ * done---useful invariant to check. */ ++ sibling_list_insert_nolock(new_root, NULL); ++ write_unlock_tree(tree); ++ ++ /* insert into new root pointer to the ++ @old_root. */ ++ assert("nikita-1110", ++ WITH_DATA(new_root, ++ node_is_empty(new_root))); ++ write_lock_dk(tree); ++ znode_set_ld_key(new_root, reiser4_min_key()); ++ znode_set_rd_key(new_root, reiser4_max_key()); ++ write_unlock_dk(tree); ++ if (REISER4_DEBUG) { ++ ZF_CLR(old_root, JNODE_LEFT_CONNECTED); ++ ZF_CLR(old_root, JNODE_RIGHT_CONNECTED); ++ ZF_SET(old_root, JNODE_ORPHAN); ++ } ++ result = add_child_ptr(new_root, old_root); ++ done_lh(&rlh); ++ } ++ zrelse(new_root); ++ } ++ } ++ if (result != 0) ++ new_root = ERR_PTR(result); ++ return new_root; ++} ++ ++/* build &reiser4_item_data for inserting child pointer ++ ++ Build &reiser4_item_data that can be later used to insert pointer to @child ++ in its parent. ++ ++*/ ++void build_child_ptr_data(znode * child /* node pointer to which will be ++ * inserted */ , ++ reiser4_item_data * data /* where to store result */ ) ++{ ++ assert("nikita-1116", child != NULL); ++ assert("nikita-1117", data != NULL); ++ ++ /* ++ * NOTE: use address of child's blocknr as address of data to be ++ * inserted. As result of this data gets into on-disk structure in cpu ++ * byte order. internal's create_hook converts it to little endian byte ++ * order. ++ */ ++ data->data = (char *)znode_get_block(child); ++ /* data -> data is kernel space */ ++ data->user = 0; ++ data->length = sizeof(reiser4_block_nr); ++ /* FIXME-VS: hardcoded internal item? */ ++ ++ /* AUDIT: Is it possible that "item_plugin_by_id" may find nothing? */ ++ data->iplug = item_plugin_by_id(NODE_POINTER_ID); ++} ++ ++/* add pointer to @child into empty @parent. ++ ++ This is used when pointer to old root is inserted into new root which is ++ empty. ++*/ ++static int add_child_ptr(znode * parent, znode * child) ++{ ++ coord_t coord; ++ reiser4_item_data data; ++ int result; ++ reiser4_key key; ++ ++ assert("nikita-1111", parent != NULL); ++ assert("nikita-1112", child != NULL); ++ assert("nikita-1115", ++ znode_get_level(parent) == znode_get_level(child) + 1); ++ ++ result = zload(parent); ++ if (result != 0) ++ return result; ++ assert("nikita-1113", node_is_empty(parent)); ++ coord_init_first_unit(&coord, parent); ++ ++ build_child_ptr_data(child, &data); ++ data.arg = NULL; ++ ++ read_lock_dk(znode_get_tree(parent)); ++ key = *znode_get_ld_key(child); ++ read_unlock_dk(znode_get_tree(parent)); ++ ++ result = node_plugin_by_node(parent)->create_item(&coord, &key, &data, ++ NULL); ++ znode_make_dirty(parent); ++ zrelse(parent); ++ return result; ++} ++ ++/* actually remove tree root */ ++static int reiser4_kill_root(reiser4_tree * tree /* tree from which root is ++ * being removed */, ++ znode * old_root /* root node that is being ++ * removed */ , ++ znode * new_root /* new root---sole child of ++ * @old_root */, ++ const reiser4_block_nr * new_root_blk /* disk address of ++ * @new_root */) ++{ ++ znode *uber; ++ int result; ++ lock_handle handle_for_uber; ++ ++ assert("umka-265", tree != NULL); ++ assert("nikita-1198", new_root != NULL); ++ assert("nikita-1199", ++ znode_get_level(new_root) + 1 == znode_get_level(old_root)); ++ ++ assert("nikita-1201", znode_is_write_locked(old_root)); ++ ++ assert("nikita-1203", ++ disk_addr_eq(new_root_blk, znode_get_block(new_root))); ++ ++ init_lh(&handle_for_uber); ++ /* obtain and lock "fake" znode protecting changes in tree height. */ ++ result = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI, ++ &handle_for_uber); ++ if (result == 0) { ++ uber = handle_for_uber.node; ++ ++ znode_make_dirty(uber); ++ ++ /* don't take long term lock a @new_root. Take spinlock. */ ++ ++ write_lock_tree(tree); ++ ++ tree->root_block = *new_root_blk; ++ --tree->height; ++ ++ /* recalculate max balance overhead */ ++ tree->estimate_one_insert = estimate_one_insert_item(tree); ++ ++ assert("nikita-1202", ++ tree->height == znode_get_level(new_root)); ++ ++ /* new root is child on "fake" node */ ++ init_parent_coord(&new_root->in_parent, uber); ++ ++uber->c_count; ++ ++ /* sibling_list_insert_nolock(new_root, NULL); */ ++ write_unlock_tree(tree); ++ ++ /* reinitialise old root. */ ++ result = node_plugin_by_node(old_root)->init(old_root); ++ znode_make_dirty(old_root); ++ if (result == 0) { ++ assert("nikita-1279", node_is_empty(old_root)); ++ ZF_SET(old_root, JNODE_HEARD_BANSHEE); ++ old_root->c_count = 0; ++ } ++ } ++ done_lh(&handle_for_uber); ++ ++ return result; ++} ++ ++/* remove tree root ++ ++ This function removes tree root, decreasing tree height by one. Tree root ++ and its only child (that is going to become new tree root) are write locked ++ at the entry. ++ ++ To remove tree root we need to take lock on special "fake" znode that ++ protects changes of tree height. See comments in reiser4_add_tree_root() for ++ more on this. ++ ++ Also parent pointers have to be updated in ++ old and new root. To simplify code, function is split into two parts: outer ++ reiser4_kill_tree_root() collects all necessary arguments and calls ++ reiser4_kill_root() to do the actual job. ++ ++*/ ++int reiser4_kill_tree_root(znode * old_root /* tree root that we are ++ removing*/) ++{ ++ int result; ++ coord_t down_link; ++ znode *new_root; ++ reiser4_tree *tree; ++ ++ assert("umka-266", current_tree != NULL); ++ assert("nikita-1194", old_root != NULL); ++ assert("nikita-1196", znode_is_root(old_root)); ++ assert("nikita-1200", node_num_items(old_root) == 1); ++ assert("nikita-1401", znode_is_write_locked(old_root)); ++ ++ coord_init_first_unit(&down_link, old_root); ++ ++ tree = znode_get_tree(old_root); ++ new_root = child_znode(&down_link, old_root, 0, 1); ++ if (!IS_ERR(new_root)) { ++ result = ++ reiser4_kill_root(tree, old_root, new_root, ++ znode_get_block(new_root)); ++ zput(new_root); ++ } else ++ result = PTR_ERR(new_root); ++ ++ return result; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/tree_mod.h b/fs/reiser4/tree_mod.h +new file mode 100644 +index 0000000..1519641 +--- /dev/null ++++ b/fs/reiser4/tree_mod.h +@@ -0,0 +1,29 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Functions to add/delete new nodes to/from the tree. See tree_mod.c for ++ * comments. */ ++ ++#if !defined( __REISER4_TREE_MOD_H__ ) ++#define __REISER4_TREE_MOD_H__ ++ ++#include "forward.h" ++ ++znode *reiser4_new_node(znode * brother, tree_level level); ++znode *reiser4_add_tree_root(znode * old_root, znode * fake); ++int reiser4_kill_tree_root(znode * old_root); ++void build_child_ptr_data(znode * child, reiser4_item_data * data); ++ ++/* __REISER4_TREE_MOD_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/tree_walk.c b/fs/reiser4/tree_walk.c +new file mode 100644 +index 0000000..cde4875 +--- /dev/null ++++ b/fs/reiser4/tree_walk.c +@@ -0,0 +1,927 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Routines and macros to: ++ ++ get_left_neighbor() ++ ++ get_right_neighbor() ++ ++ get_parent() ++ ++ get_first_child() ++ ++ get_last_child() ++ ++ various routines to walk the whole tree and do things to it like ++ repack it, or move it to tertiary storage. Please make them as ++ generic as is reasonable. ++ ++*/ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "coord.h" ++#include "plugin/item/item.h" ++#include "jnode.h" ++#include "znode.h" ++#include "tree_walk.h" ++#include "tree.h" ++#include "super.h" ++ ++/* These macros are used internally in tree_walk.c in attempt to make ++ lock_neighbor() code usable to build lock_parent(), lock_right_neighbor, ++ lock_left_neighbor */ ++#define GET_NODE_BY_PTR_OFFSET(node, off) (*(znode**)(((unsigned long)(node)) + (off))) ++#define FIELD_OFFSET(name) offsetof(znode, name) ++#define PARENT_PTR_OFFSET FIELD_OFFSET(in_parent.node) ++#define LEFT_PTR_OFFSET FIELD_OFFSET(left) ++#define RIGHT_PTR_OFFSET FIELD_OFFSET(right) ++ ++/* This is the generic procedure to get and lock `generic' neighbor (left or ++ right neighbor or parent). It implements common algorithm for all cases of ++ getting lock on neighbor node, only znode structure field is different in ++ each case. This is parameterized by ptr_offset argument, which is byte ++ offset for the pointer to the desired neighbor within the current node's ++ znode structure. This function should be called with the tree lock held */ ++static int lock_neighbor( ++ /* resulting lock handle */ ++ lock_handle * result, ++ /* znode to lock */ ++ znode * node, ++ /* pointer to neighbor (or parent) znode field offset, in bytes from ++ the base address of znode structure */ ++ int ptr_offset, ++ /* lock mode for longterm_lock_znode call */ ++ znode_lock_mode mode, ++ /* lock request for longterm_lock_znode call */ ++ znode_lock_request req, ++ /* GN_* flags */ ++ int flags, int rlocked) ++{ ++ reiser4_tree *tree = znode_get_tree(node); ++ znode *neighbor; ++ int ret; ++ ++ assert("umka-236", node != NULL); ++ assert("umka-237", tree != NULL); ++ assert_rw_locked(&(tree->tree_lock)); ++ ++ if (flags & GN_TRY_LOCK) ++ req |= ZNODE_LOCK_NONBLOCK; ++ if (flags & GN_SAME_ATOM) ++ req |= ZNODE_LOCK_DONT_FUSE; ++ ++ /* get neighbor's address by using of sibling link, quit while loop ++ (and return) if link is not available. */ ++ while (1) { ++ neighbor = GET_NODE_BY_PTR_OFFSET(node, ptr_offset); ++ ++ /* return -E_NO_NEIGHBOR if parent or side pointer is NULL or if ++ * node pointed by it is not connected. ++ * ++ * However, GN_ALLOW_NOT_CONNECTED option masks "connected" ++ * check and allows passing reference to not connected znode to ++ * subsequent longterm_lock_znode() call. This kills possible ++ * busy loop if we are trying to get longterm lock on locked but ++ * not yet connected parent node. */ ++ if (neighbor == NULL || !((flags & GN_ALLOW_NOT_CONNECTED) ++ || znode_is_connected(neighbor))) { ++ return RETERR(-E_NO_NEIGHBOR); ++ } ++ ++ /* protect it from deletion. */ ++ zref(neighbor); ++ ++ rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree); ++ ++ ret = longterm_lock_znode(result, neighbor, mode, req); ++ ++ /* The lock handle obtains its own reference, release the one from above. */ ++ zput(neighbor); ++ ++ rlocked ? read_lock_tree(tree) : write_lock_tree(tree); ++ ++ /* restart if node we got reference to is being ++ invalidated. we should not get reference to this node ++ again. */ ++ if (ret == -EINVAL) ++ continue; ++ if (ret) ++ return ret; ++ ++ /* check if neighbor link still points to just locked znode; ++ the link could have been changed while the process slept. */ ++ if (neighbor == GET_NODE_BY_PTR_OFFSET(node, ptr_offset)) ++ return 0; ++ ++ /* znode was locked by mistake; unlock it and restart locking ++ process from beginning. */ ++ rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree); ++ longterm_unlock_znode(result); ++ rlocked ? read_lock_tree(tree) : write_lock_tree(tree); ++ } ++} ++ ++/* get parent node with longterm lock, accepts GN* flags. */ ++int reiser4_get_parent_flags(lock_handle * lh /* resulting lock handle */ , ++ znode * node /* child node */ , ++ znode_lock_mode mode ++ /* type of lock: read or write */ , ++ int flags /* GN_* flags */ ) ++{ ++ int result; ++ ++ read_lock_tree(znode_get_tree(node)); ++ result = lock_neighbor(lh, node, PARENT_PTR_OFFSET, mode, ++ ZNODE_LOCK_HIPRI, flags, 1); ++ read_unlock_tree(znode_get_tree(node)); ++ return result; ++} ++ ++/* wrapper function to lock right or left neighbor depending on GN_GO_LEFT ++ bit in @flags parameter */ ++/* Audited by: umka (2002.06.14) */ ++static inline int ++lock_side_neighbor(lock_handle * result, ++ znode * node, znode_lock_mode mode, int flags, int rlocked) ++{ ++ int ret; ++ int ptr_offset; ++ znode_lock_request req; ++ ++ if (flags & GN_GO_LEFT) { ++ ptr_offset = LEFT_PTR_OFFSET; ++ req = ZNODE_LOCK_LOPRI; ++ } else { ++ ptr_offset = RIGHT_PTR_OFFSET; ++ req = ZNODE_LOCK_HIPRI; ++ } ++ ++ ret = ++ lock_neighbor(result, node, ptr_offset, mode, req, flags, rlocked); ++ ++ if (ret == -E_NO_NEIGHBOR) /* if we walk left or right -E_NO_NEIGHBOR does not ++ * guarantee that neighbor is absent in the ++ * tree; in this case we return -ENOENT -- ++ * means neighbor at least not found in ++ * cache */ ++ return RETERR(-ENOENT); ++ ++ return ret; ++} ++ ++#if REISER4_DEBUG ++ ++int check_sibling_list(znode * node) ++{ ++ znode *scan; ++ znode *next; ++ ++ assert("nikita-3283", LOCK_CNT_GTZ(write_locked_tree)); ++ ++ if (node == NULL) ++ return 1; ++ ++ if (ZF_ISSET(node, JNODE_RIP)) ++ return 1; ++ ++ assert("nikita-3270", node != NULL); ++ assert_rw_write_locked(&(znode_get_tree(node)->tree_lock)); ++ ++ for (scan = node; znode_is_left_connected(scan); scan = next) { ++ next = scan->left; ++ if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) { ++ assert("nikita-3271", znode_is_right_connected(next)); ++ assert("nikita-3272", next->right == scan); ++ } else ++ break; ++ } ++ for (scan = node; znode_is_right_connected(scan); scan = next) { ++ next = scan->right; ++ if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) { ++ assert("nikita-3273", znode_is_left_connected(next)); ++ assert("nikita-3274", next->left == scan); ++ } else ++ break; ++ } ++ return 1; ++} ++ ++#endif ++ ++/* Znode sibling pointers maintenence. */ ++ ++/* Znode sibling pointers are established between any neighbored nodes which are ++ in cache. There are two znode state bits (JNODE_LEFT_CONNECTED, ++ JNODE_RIGHT_CONNECTED), if left or right sibling pointer contains actual ++ value (even NULL), corresponded JNODE_*_CONNECTED bit is set. ++ ++ Reiser4 tree operations which may allocate new znodes (CBK, tree balancing) ++ take care about searching (hash table lookup may be required) of znode ++ neighbors, establishing sibling pointers between them and setting ++ JNODE_*_CONNECTED state bits. */ ++ ++/* adjusting of sibling pointers and `connected' states for two ++ neighbors; works if one neighbor is NULL (was not found). */ ++ ++/* FIXME-VS: this is unstatic-ed to use in tree.c in prepare_twig_cut */ ++void link_left_and_right(znode * left, znode * right) ++{ ++ assert("nikita-3275", check_sibling_list(left)); ++ assert("nikita-3275", check_sibling_list(right)); ++ ++ if (left != NULL) { ++ if (left->right == NULL) { ++ left->right = right; ++ ZF_SET(left, JNODE_RIGHT_CONNECTED); ++ ++ ON_DEBUG(left->right_version = ++ atomic_inc_return(&delim_key_version); ++ ); ++ ++ } else if (ZF_ISSET(left->right, JNODE_HEARD_BANSHEE) ++ && left->right != right) { ++ ++ ON_DEBUG(left->right->left_version = ++ atomic_inc_return(&delim_key_version); ++ left->right_version = ++ atomic_inc_return(&delim_key_version);); ++ ++ left->right->left = NULL; ++ left->right = right; ++ ZF_SET(left, JNODE_RIGHT_CONNECTED); ++ } else ++ /* ++ * there is a race condition in renew_sibling_link() ++ * and assertions below check that it is only one ++ * there. Thread T1 calls renew_sibling_link() without ++ * GN_NO_ALLOC flag. zlook() doesn't find neighbor ++ * node, but before T1 gets to the ++ * link_left_and_right(), another thread T2 creates ++ * neighbor node and connects it. check for ++ * left->right == NULL above protects T1 from ++ * overwriting correct left->right pointer installed ++ * by T2. ++ */ ++ assert("nikita-3302", ++ right == NULL || left->right == right); ++ } ++ if (right != NULL) { ++ if (right->left == NULL) { ++ right->left = left; ++ ZF_SET(right, JNODE_LEFT_CONNECTED); ++ ++ ON_DEBUG(right->left_version = ++ atomic_inc_return(&delim_key_version); ++ ); ++ ++ } else if (ZF_ISSET(right->left, JNODE_HEARD_BANSHEE) ++ && right->left != left) { ++ ++ ON_DEBUG(right->left->right_version = ++ atomic_inc_return(&delim_key_version); ++ right->left_version = ++ atomic_inc_return(&delim_key_version);); ++ ++ right->left->right = NULL; ++ right->left = left; ++ ZF_SET(right, JNODE_LEFT_CONNECTED); ++ ++ } else ++ assert("nikita-3303", ++ left == NULL || right->left == left); ++ } ++ assert("nikita-3275", check_sibling_list(left)); ++ assert("nikita-3275", check_sibling_list(right)); ++} ++ ++/* Audited by: umka (2002.06.14) */ ++static void link_znodes(znode * first, znode * second, int to_left) ++{ ++ if (to_left) ++ link_left_and_right(second, first); ++ else ++ link_left_and_right(first, second); ++} ++ ++/* getting of next (to left or to right, depend on gn_to_left bit in flags) ++ coord's unit position in horizontal direction, even across node ++ boundary. Should be called under tree lock, it protects nonexistence of ++ sibling link on parent level, if lock_side_neighbor() fails with ++ -ENOENT. */ ++static int far_next_coord(coord_t * coord, lock_handle * handle, int flags) ++{ ++ int ret; ++ znode *node; ++ reiser4_tree *tree; ++ ++ assert("umka-243", coord != NULL); ++ assert("umka-244", handle != NULL); ++ assert("zam-1069", handle->node == NULL); ++ ++ ret = ++ (flags & GN_GO_LEFT) ? coord_prev_unit(coord) : ++ coord_next_unit(coord); ++ if (!ret) ++ return 0; ++ ++ ret = ++ lock_side_neighbor(handle, coord->node, ZNODE_READ_LOCK, flags, 0); ++ if (ret) ++ return ret; ++ ++ node = handle->node; ++ tree = znode_get_tree(node); ++ write_unlock_tree(tree); ++ ++ coord_init_zero(coord); ++ ++ /* We avoid synchronous read here if it is specified by flag. */ ++ if ((flags & GN_ASYNC) && znode_page(handle->node) == NULL) { ++ ret = jstartio(ZJNODE(handle->node)); ++ if (!ret) ++ ret = -E_REPEAT; ++ goto error_locked; ++ } ++ ++ /* corresponded zrelse() should be called by the clients of ++ far_next_coord(), in place when this node gets unlocked. */ ++ ret = zload(handle->node); ++ if (ret) ++ goto error_locked; ++ ++ if (flags & GN_GO_LEFT) ++ coord_init_last_unit(coord, node); ++ else ++ coord_init_first_unit(coord, node); ++ ++ if (0) { ++ error_locked: ++ longterm_unlock_znode(handle); ++ } ++ write_lock_tree(tree); ++ return ret; ++} ++ ++/* Very significant function which performs a step in horizontal direction ++ when sibling pointer is not available. Actually, it is only function which ++ does it. ++ Note: this function does not restore locking status at exit, ++ caller should does care about proper unlocking and zrelsing */ ++static int ++renew_sibling_link(coord_t * coord, lock_handle * handle, znode * child, ++ tree_level level, int flags, int *nr_locked) ++{ ++ int ret; ++ int to_left = flags & GN_GO_LEFT; ++ reiser4_block_nr da; ++ /* parent of the neighbor node; we set it to parent until not sharing ++ of one parent between child and neighbor node is detected */ ++ znode *side_parent = coord->node; ++ reiser4_tree *tree = znode_get_tree(child); ++ znode *neighbor = NULL; ++ ++ assert("umka-245", coord != NULL); ++ assert("umka-246", handle != NULL); ++ assert("umka-247", child != NULL); ++ assert("umka-303", tree != NULL); ++ ++ init_lh(handle); ++ write_lock_tree(tree); ++ ret = far_next_coord(coord, handle, flags); ++ ++ if (ret) { ++ if (ret != -ENOENT) { ++ write_unlock_tree(tree); ++ return ret; ++ } ++ } else { ++ item_plugin *iplug; ++ ++ if (handle->node != NULL) { ++ (*nr_locked)++; ++ side_parent = handle->node; ++ } ++ ++ /* does coord object points to internal item? We do not ++ support sibling pointers between znode for formatted and ++ unformatted nodes and return -E_NO_NEIGHBOR in that case. */ ++ iplug = item_plugin_by_coord(coord); ++ if (!item_is_internal(coord)) { ++ link_znodes(child, NULL, to_left); ++ write_unlock_tree(tree); ++ /* we know there can't be formatted neighbor */ ++ return RETERR(-E_NO_NEIGHBOR); ++ } ++ write_unlock_tree(tree); ++ ++ iplug->s.internal.down_link(coord, NULL, &da); ++ ++ if (flags & GN_NO_ALLOC) { ++ neighbor = zlook(tree, &da); ++ } else { ++ neighbor = ++ zget(tree, &da, side_parent, level, ++ reiser4_ctx_gfp_mask_get()); ++ } ++ ++ if (IS_ERR(neighbor)) { ++ ret = PTR_ERR(neighbor); ++ return ret; ++ } ++ ++ if (neighbor) ++ /* update delimiting keys */ ++ set_child_delimiting_keys(coord->node, coord, neighbor); ++ ++ write_lock_tree(tree); ++ } ++ ++ if (likely(neighbor == NULL || ++ (znode_get_level(child) == znode_get_level(neighbor) ++ && child != neighbor))) ++ link_znodes(child, neighbor, to_left); ++ else { ++ warning("nikita-3532", ++ "Sibling nodes on the different levels: %i != %i\n", ++ znode_get_level(child), znode_get_level(neighbor)); ++ ret = RETERR(-EIO); ++ } ++ ++ write_unlock_tree(tree); ++ ++ /* if GN_NO_ALLOC isn't set we keep reference to neighbor znode */ ++ if (neighbor != NULL && (flags & GN_NO_ALLOC)) ++ /* atomic_dec(&ZJNODE(neighbor)->x_count); */ ++ zput(neighbor); ++ ++ return ret; ++} ++ ++/* This function is for establishing of one side relation. */ ++/* Audited by: umka (2002.06.14) */ ++static int connect_one_side(coord_t * coord, znode * node, int flags) ++{ ++ coord_t local; ++ lock_handle handle; ++ int nr_locked; ++ int ret; ++ ++ assert("umka-248", coord != NULL); ++ assert("umka-249", node != NULL); ++ ++ coord_dup_nocheck(&local, coord); ++ ++ init_lh(&handle); ++ ++ ret = ++ renew_sibling_link(&local, &handle, node, znode_get_level(node), ++ flags | GN_NO_ALLOC, &nr_locked); ++ ++ if (handle.node != NULL) { ++ /* complementary operations for zload() and lock() in far_next_coord() */ ++ zrelse(handle.node); ++ longterm_unlock_znode(&handle); ++ } ++ ++ /* we catch error codes which are not interesting for us because we ++ run renew_sibling_link() only for znode connection. */ ++ if (ret == -ENOENT || ret == -E_NO_NEIGHBOR) ++ return 0; ++ ++ return ret; ++} ++ ++/* if @child is not in `connected' state, performs hash searches for left and ++ right neighbor nodes and establishes horizontal sibling links */ ++/* Audited by: umka (2002.06.14), umka (2002.06.15) */ ++int connect_znode(coord_t * parent_coord, znode * child) ++{ ++ reiser4_tree *tree = znode_get_tree(child); ++ int ret = 0; ++ ++ assert("zam-330", parent_coord != NULL); ++ assert("zam-331", child != NULL); ++ assert("zam-332", parent_coord->node != NULL); ++ assert("umka-305", tree != NULL); ++ ++ /* it is trivial to `connect' root znode because it can't have ++ neighbors */ ++ if (znode_above_root(parent_coord->node)) { ++ child->left = NULL; ++ child->right = NULL; ++ ZF_SET(child, JNODE_LEFT_CONNECTED); ++ ZF_SET(child, JNODE_RIGHT_CONNECTED); ++ ++ ON_DEBUG(child->left_version = ++ atomic_inc_return(&delim_key_version); ++ child->right_version = ++ atomic_inc_return(&delim_key_version);); ++ ++ return 0; ++ } ++ ++ /* load parent node */ ++ coord_clear_iplug(parent_coord); ++ ret = zload(parent_coord->node); ++ ++ if (ret != 0) ++ return ret; ++ ++ /* protect `connected' state check by tree_lock */ ++ read_lock_tree(tree); ++ ++ if (!znode_is_right_connected(child)) { ++ read_unlock_tree(tree); ++ /* connect right (default is right) */ ++ ret = connect_one_side(parent_coord, child, GN_NO_ALLOC); ++ if (ret) ++ goto zrelse_and_ret; ++ ++ read_lock_tree(tree); ++ } ++ ++ ret = znode_is_left_connected(child); ++ ++ read_unlock_tree(tree); ++ ++ if (!ret) { ++ ret = ++ connect_one_side(parent_coord, child, ++ GN_NO_ALLOC | GN_GO_LEFT); ++ } else ++ ret = 0; ++ ++ zrelse_and_ret: ++ zrelse(parent_coord->node); ++ ++ return ret; ++} ++ ++/* this function is like renew_sibling_link() but allocates neighbor node if ++ it doesn't exist and `connects' it. It may require making two steps in ++ horizontal direction, first one for neighbor node finding/allocation, ++ second one is for finding neighbor of neighbor to connect freshly allocated ++ znode. */ ++/* Audited by: umka (2002.06.14), umka (2002.06.15) */ ++static int ++renew_neighbor(coord_t * coord, znode * node, tree_level level, int flags) ++{ ++ coord_t local; ++ lock_handle empty[2]; ++ reiser4_tree *tree = znode_get_tree(node); ++ znode *neighbor = NULL; ++ int nr_locked = 0; ++ int ret; ++ ++ assert("umka-250", coord != NULL); ++ assert("umka-251", node != NULL); ++ assert("umka-307", tree != NULL); ++ assert("umka-308", level <= tree->height); ++ ++ /* umka (2002.06.14) ++ Here probably should be a check for given "level" validness. ++ Something like assert("xxx-yyy", level < REAL_MAX_ZTREE_HEIGHT); ++ */ ++ ++ coord_dup(&local, coord); ++ ++ ret = ++ renew_sibling_link(&local, &empty[0], node, level, ++ flags & ~GN_NO_ALLOC, &nr_locked); ++ if (ret) ++ goto out; ++ ++ /* tree lock is not needed here because we keep parent node(s) locked ++ and reference to neighbor znode incremented */ ++ neighbor = (flags & GN_GO_LEFT) ? node->left : node->right; ++ ++ read_lock_tree(tree); ++ ret = znode_is_connected(neighbor); ++ read_unlock_tree(tree); ++ if (ret) { ++ ret = 0; ++ goto out; ++ } ++ ++ ret = ++ renew_sibling_link(&local, &empty[nr_locked], neighbor, level, ++ flags | GN_NO_ALLOC, &nr_locked); ++ /* second renew_sibling_link() call is used for znode connection only, ++ so we can live with these errors */ ++ if (-ENOENT == ret || -E_NO_NEIGHBOR == ret) ++ ret = 0; ++ ++ out: ++ ++ for (--nr_locked; nr_locked >= 0; --nr_locked) { ++ zrelse(empty[nr_locked].node); ++ longterm_unlock_znode(&empty[nr_locked]); ++ } ++ ++ if (neighbor != NULL) ++ /* decrement znode reference counter without actually ++ releasing it. */ ++ atomic_dec(&ZJNODE(neighbor)->x_count); ++ ++ return ret; ++} ++ ++/* ++ reiser4_get_neighbor() -- lock node's neighbor. ++ ++ reiser4_get_neighbor() locks node's neighbor (left or right one, depends on ++ given parameter) using sibling link to it. If sibling link is not available ++ (i.e. neighbor znode is not in cache) and flags allow read blocks, we go one ++ level up for information about neighbor's disk address. We lock node's ++ parent, if it is common parent for both 'node' and its neighbor, neighbor's ++ disk address is in next (to left or to right) down link from link that points ++ to original node. If not, we need to lock parent's neighbor, read its content ++ and take first(last) downlink with neighbor's disk address. That locking ++ could be done by using sibling link and lock_neighbor() function, if sibling ++ link exists. In another case we have to go level up again until we find ++ common parent or valid sibling link. Then go down ++ allocating/connecting/locking/reading nodes until neighbor of first one is ++ locked. ++ ++ @neighbor: result lock handle, ++ @node: a node which we lock neighbor of, ++ @lock_mode: lock mode {LM_READ, LM_WRITE}, ++ @flags: logical OR of {GN_*} (see description above) subset. ++ ++ @return: 0 if success, negative value if lock was impossible due to an error ++ or lack of neighbor node. ++*/ ++ ++/* Audited by: umka (2002.06.14), umka (2002.06.15) */ ++int ++reiser4_get_neighbor(lock_handle * neighbor, znode * node, ++ znode_lock_mode lock_mode, int flags) ++{ ++ reiser4_tree *tree = znode_get_tree(node); ++ lock_handle path[REAL_MAX_ZTREE_HEIGHT]; ++ ++ coord_t coord; ++ ++ tree_level base_level; ++ tree_level h = 0; ++ int ret; ++ ++ assert("umka-252", tree != NULL); ++ assert("umka-253", neighbor != NULL); ++ assert("umka-254", node != NULL); ++ ++ base_level = znode_get_level(node); ++ ++ assert("umka-310", base_level <= tree->height); ++ ++ coord_init_zero(&coord); ++ ++ again: ++ /* first, we try to use simple lock_neighbor() which requires sibling ++ link existence */ ++ read_lock_tree(tree); ++ ret = lock_side_neighbor(neighbor, node, lock_mode, flags, 1); ++ read_unlock_tree(tree); ++ if (!ret) { ++ /* load znode content if it was specified */ ++ if (flags & GN_LOAD_NEIGHBOR) { ++ ret = zload(node); ++ if (ret) ++ longterm_unlock_znode(neighbor); ++ } ++ return ret; ++ } ++ ++ /* only -ENOENT means we may look upward and try to connect ++ @node with its neighbor (if @flags allow us to do it) */ ++ if (ret != -ENOENT || !(flags & GN_CAN_USE_UPPER_LEVELS)) ++ return ret; ++ ++ /* before establishing of sibling link we lock parent node; it is ++ required by renew_neighbor() to work. */ ++ init_lh(&path[0]); ++ ret = reiser4_get_parent(&path[0], node, ZNODE_READ_LOCK); ++ if (ret) ++ return ret; ++ if (znode_above_root(path[0].node)) { ++ longterm_unlock_znode(&path[0]); ++ return RETERR(-E_NO_NEIGHBOR); ++ } ++ ++ while (1) { ++ znode *child = (h == 0) ? node : path[h - 1].node; ++ znode *parent = path[h].node; ++ ++ ret = zload(parent); ++ if (ret) ++ break; ++ ++ ret = find_child_ptr(parent, child, &coord); ++ ++ if (ret) { ++ zrelse(parent); ++ break; ++ } ++ ++ /* try to establish missing sibling link */ ++ ret = renew_neighbor(&coord, child, h + base_level, flags); ++ ++ zrelse(parent); ++ ++ switch (ret) { ++ case 0: ++ /* unlocking of parent znode prevents simple ++ deadlock situation */ ++ done_lh(&path[h]); ++ ++ /* depend on tree level we stay on we repeat first ++ locking attempt ... */ ++ if (h == 0) ++ goto again; ++ ++ /* ... or repeat establishing of sibling link at ++ one level below. */ ++ --h; ++ break; ++ ++ case -ENOENT: ++ /* sibling link is not available -- we go ++ upward. */ ++ init_lh(&path[h + 1]); ++ ret = ++ reiser4_get_parent(&path[h + 1], parent, ++ ZNODE_READ_LOCK); ++ if (ret) ++ goto fail; ++ ++h; ++ if (znode_above_root(path[h].node)) { ++ ret = RETERR(-E_NO_NEIGHBOR); ++ goto fail; ++ } ++ break; ++ ++ case -E_DEADLOCK: ++ /* there was lock request from hi-pri locker. if ++ it is possible we unlock last parent node and ++ re-lock it again. */ ++ for (; reiser4_check_deadlock(); h--) { ++ done_lh(&path[h]); ++ if (h == 0) ++ goto fail; ++ } ++ ++ break; ++ ++ default: /* other errors. */ ++ goto fail; ++ } ++ } ++ fail: ++ ON_DEBUG(check_lock_node_data(node)); ++ ON_DEBUG(check_lock_data()); ++ ++ /* unlock path */ ++ do { ++ /* FIXME-Zam: when we get here from case -E_DEADLOCK's goto ++ fail; path[0] is already done_lh-ed, therefore ++ longterm_unlock_znode(&path[h]); is not applicable */ ++ done_lh(&path[h]); ++ --h; ++ } while (h + 1 != 0); ++ ++ return ret; ++} ++ ++/* remove node from sibling list */ ++/* Audited by: umka (2002.06.14) */ ++void sibling_list_remove(znode * node) ++{ ++ reiser4_tree *tree; ++ ++ tree = znode_get_tree(node); ++ assert("umka-255", node != NULL); ++ assert_rw_write_locked(&(tree->tree_lock)); ++ assert("nikita-3275", check_sibling_list(node)); ++ ++ write_lock_dk(tree); ++ if (znode_is_right_connected(node) && node->right != NULL && ++ znode_is_left_connected(node) && node->left != NULL) { ++ assert("zam-32245", ++ keyeq(znode_get_rd_key(node), ++ znode_get_ld_key(node->right))); ++ znode_set_rd_key(node->left, znode_get_ld_key(node->right)); ++ } ++ write_unlock_dk(tree); ++ ++ if (znode_is_right_connected(node) && node->right != NULL) { ++ assert("zam-322", znode_is_left_connected(node->right)); ++ node->right->left = node->left; ++ ON_DEBUG(node->right->left_version = ++ atomic_inc_return(&delim_key_version); ++ ); ++ } ++ if (znode_is_left_connected(node) && node->left != NULL) { ++ assert("zam-323", znode_is_right_connected(node->left)); ++ node->left->right = node->right; ++ ON_DEBUG(node->left->right_version = ++ atomic_inc_return(&delim_key_version); ++ ); ++ } ++ ++ ZF_CLR(node, JNODE_LEFT_CONNECTED); ++ ZF_CLR(node, JNODE_RIGHT_CONNECTED); ++ ON_DEBUG(node->left = node->right = NULL; ++ node->left_version = atomic_inc_return(&delim_key_version); ++ node->right_version = atomic_inc_return(&delim_key_version);); ++ assert("nikita-3276", check_sibling_list(node)); ++} ++ ++/* disconnect node from sibling list */ ++void sibling_list_drop(znode * node) ++{ ++ znode *right; ++ znode *left; ++ ++ assert("nikita-2464", node != NULL); ++ assert("nikita-3277", check_sibling_list(node)); ++ ++ right = node->right; ++ if (right != NULL) { ++ assert("nikita-2465", znode_is_left_connected(right)); ++ right->left = NULL; ++ ON_DEBUG(right->left_version = ++ atomic_inc_return(&delim_key_version); ++ ); ++ } ++ left = node->left; ++ if (left != NULL) { ++ assert("zam-323", znode_is_right_connected(left)); ++ left->right = NULL; ++ ON_DEBUG(left->right_version = ++ atomic_inc_return(&delim_key_version); ++ ); ++ } ++ ZF_CLR(node, JNODE_LEFT_CONNECTED); ++ ZF_CLR(node, JNODE_RIGHT_CONNECTED); ++ ON_DEBUG(node->left = node->right = NULL; ++ node->left_version = atomic_inc_return(&delim_key_version); ++ node->right_version = atomic_inc_return(&delim_key_version);); ++} ++ ++/* Insert new node into sibling list. Regular balancing inserts new node ++ after (at right side) existing and locked node (@before), except one case ++ of adding new tree root node. @before should be NULL in that case. */ ++void sibling_list_insert_nolock(znode * new, znode * before) ++{ ++ assert("zam-334", new != NULL); ++ assert("nikita-3298", !znode_is_left_connected(new)); ++ assert("nikita-3299", !znode_is_right_connected(new)); ++ assert("nikita-3300", new->left == NULL); ++ assert("nikita-3301", new->right == NULL); ++ assert("nikita-3278", check_sibling_list(new)); ++ assert("nikita-3279", check_sibling_list(before)); ++ ++ if (before != NULL) { ++ assert("zam-333", znode_is_connected(before)); ++ new->right = before->right; ++ new->left = before; ++ ON_DEBUG(new->right_version = ++ atomic_inc_return(&delim_key_version); ++ new->left_version = ++ atomic_inc_return(&delim_key_version);); ++ if (before->right != NULL) { ++ before->right->left = new; ++ ON_DEBUG(before->right->left_version = ++ atomic_inc_return(&delim_key_version); ++ ); ++ } ++ before->right = new; ++ ON_DEBUG(before->right_version = ++ atomic_inc_return(&delim_key_version); ++ ); ++ } else { ++ new->right = NULL; ++ new->left = NULL; ++ ON_DEBUG(new->right_version = ++ atomic_inc_return(&delim_key_version); ++ new->left_version = ++ atomic_inc_return(&delim_key_version);); ++ } ++ ZF_SET(new, JNODE_LEFT_CONNECTED); ++ ZF_SET(new, JNODE_RIGHT_CONNECTED); ++ assert("nikita-3280", check_sibling_list(new)); ++ assert("nikita-3281", check_sibling_list(before)); ++} ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ End: ++*/ +diff --git a/fs/reiser4/tree_walk.h b/fs/reiser4/tree_walk.h +new file mode 100644 +index 0000000..3d5f09f +--- /dev/null ++++ b/fs/reiser4/tree_walk.h +@@ -0,0 +1,125 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++/* definitions of reiser4 tree walk functions */ ++ ++#ifndef __FS_REISER4_TREE_WALK_H__ ++#define __FS_REISER4_TREE_WALK_H__ ++ ++#include "debug.h" ++#include "forward.h" ++ ++/* establishes horizontal links between cached znodes */ ++int connect_znode(coord_t * coord, znode * node); ++ ++/* tree traversal functions (reiser4_get_parent(), reiser4_get_neighbor()) ++ have the following common arguments: ++ ++ return codes: ++ ++ @return : 0 - OK, ++ ++ZAM-FIXME-HANS: wrong return code name. Change them all. ++ -ENOENT - neighbor is not in cache, what is detected by sibling ++ link absence. ++ ++ -E_NO_NEIGHBOR - we are sure that neighbor (or parent) node cannot be ++ found (because we are left-/right- most node of the ++ tree, for example). Also, this return code is for ++ reiser4_get_parent() when we see no parent link -- it ++ means that our node is root node. ++ ++ -E_DEADLOCK - deadlock detected (request from high-priority process ++ received), other error codes are conformed to ++ /usr/include/asm/errno.h . ++*/ ++ ++int ++reiser4_get_parent_flags(lock_handle * result, znode * node, ++ znode_lock_mode mode, int flags); ++ ++/* bits definition for reiser4_get_neighbor function `flags' arg. */ ++typedef enum { ++ /* If sibling pointer is NULL, this flag allows get_neighbor() to try to ++ * find not allocated not connected neigbor by going though upper ++ * levels */ ++ GN_CAN_USE_UPPER_LEVELS = 0x1, ++ /* locking left neighbor instead of right one */ ++ GN_GO_LEFT = 0x2, ++ /* automatically load neighbor node content */ ++ GN_LOAD_NEIGHBOR = 0x4, ++ /* return -E_REPEAT if can't lock */ ++ GN_TRY_LOCK = 0x8, ++ /* used internally in tree_walk.c, causes renew_sibling to not ++ allocate neighbor znode, but only search for it in znode cache */ ++ GN_NO_ALLOC = 0x10, ++ /* do not go across atom boundaries */ ++ GN_SAME_ATOM = 0x20, ++ /* allow to lock not connected nodes */ ++ GN_ALLOW_NOT_CONNECTED = 0x40, ++ /* Avoid synchronous jload, instead, call jstartio() and return -E_REPEAT. */ ++ GN_ASYNC = 0x80 ++} znode_get_neigbor_flags; ++ ++/* A commonly used wrapper for reiser4_get_parent_flags(). */ ++static inline int reiser4_get_parent(lock_handle * result, znode * node, ++ znode_lock_mode mode) ++{ ++ return reiser4_get_parent_flags(result, node, mode, ++ GN_ALLOW_NOT_CONNECTED); ++} ++ ++int reiser4_get_neighbor(lock_handle * neighbor, znode * node, ++ znode_lock_mode lock_mode, int flags); ++ ++/* there are wrappers for most common usages of reiser4_get_neighbor() */ ++static inline int ++reiser4_get_left_neighbor(lock_handle * result, znode * node, int lock_mode, ++ int flags) ++{ ++ return reiser4_get_neighbor(result, node, lock_mode, ++ flags | GN_GO_LEFT); ++} ++ ++static inline int ++reiser4_get_right_neighbor(lock_handle * result, znode * node, int lock_mode, ++ int flags) ++{ ++ ON_DEBUG(check_lock_node_data(node)); ++ ON_DEBUG(check_lock_data()); ++ return reiser4_get_neighbor(result, node, lock_mode, ++ flags & (~GN_GO_LEFT)); ++} ++ ++extern void sibling_list_remove(znode * node); ++extern void sibling_list_drop(znode * node); ++extern void sibling_list_insert_nolock(znode * new, znode * before); ++extern void link_left_and_right(znode * left, znode * right); ++ ++/* Functions called by tree_walk() when tree_walk() ... */ ++struct tree_walk_actor { ++ /* ... meets a formatted node, */ ++ int (*process_znode) (tap_t *, void *); ++ /* ... meets an extent, */ ++ int (*process_extent) (tap_t *, void *); ++ /* ... begins tree traversal or repeats it after -E_REPEAT was returned by ++ * node or extent processing functions. */ ++ int (*before) (void *); ++}; ++ ++#if REISER4_DEBUG ++int check_sibling_list(znode * node); ++#else ++#define check_sibling_list(n) (1) ++#endif ++ ++#endif /* __FS_REISER4_TREE_WALK_H__ */ ++ ++/* ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/txnmgr.c b/fs/reiser4/txnmgr.c +new file mode 100644 +index 0000000..72d525b +--- /dev/null ++++ b/fs/reiser4/txnmgr.c +@@ -0,0 +1,3164 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Joshua MacDonald wrote the first draft of this code. */ ++ ++/* ZAM-LONGTERM-FIXME-HANS: The locking in this file is badly designed, and a ++filesystem scales only as well as its worst locking design. You need to ++substantially restructure this code. Josh was not as experienced a programmer ++as you. Particularly review how the locking style differs from what you did ++for znodes usingt hi-lo priority locking, and present to me an opinion on ++whether the differences are well founded. */ ++ ++/* I cannot help but to disagree with the sentiment above. Locking of ++ * transaction manager is _not_ badly designed, and, at the very least, is not ++ * the scaling bottleneck. Scaling bottleneck is _exactly_ hi-lo priority ++ * locking on znodes, especially on the root node of the tree. --nikita, ++ * 2003.10.13 */ ++ ++/* The txnmgr is a set of interfaces that keep track of atoms and transcrash handles. The ++ txnmgr processes capture_block requests and manages the relationship between jnodes and ++ atoms through the various stages of a transcrash, and it also oversees the fusion and ++ capture-on-copy processes. The main difficulty with this task is maintaining a ++ deadlock-free lock ordering between atoms and jnodes/handles. The reason for the ++ difficulty is that jnodes, handles, and atoms contain pointer circles, and the cycle ++ must be broken. The main requirement is that atom-fusion be deadlock free, so once you ++ hold the atom_lock you may then wait to acquire any jnode or handle lock. This implies ++ that any time you check the atom-pointer of a jnode or handle and then try to lock that ++ atom, you must use trylock() and possibly reverse the order. ++ ++ This code implements the design documented at: ++ ++ http://namesys.com/txn-doc.html ++ ++ZAM-FIXME-HANS: update v4.html to contain all of the information present in the above (but updated), and then remove the ++above document and reference the new. Be sure to provide some credit to Josh. I already have some writings on this ++topic in v4.html, but they are lacking in details present in the above. Cure that. Remember to write for the bright 12 ++year old --- define all technical terms used. ++ ++*/ ++ ++/* Thoughts on the external transaction interface: ++ ++ In the current code, a TRANSCRASH handle is created implicitly by reiser4_init_context() (which ++ creates state that lasts for the duration of a system call and is called at the start ++ of ReiserFS methods implementing VFS operations), and closed by reiser4_exit_context(), ++ occupying the scope of a single system call. We wish to give certain applications an ++ interface to begin and close (commit) transactions. Since our implementation of ++ transactions does not yet support isolation, allowing an application to open a ++ transaction implies trusting it to later close the transaction. Part of the ++ transaction interface will be aimed at enabling that trust, but the interface for ++ actually using transactions is fairly narrow. ++ ++ BEGIN_TRANSCRASH: Returns a transcrash identifier. It should be possible to translate ++ this identifier into a string that a shell-script could use, allowing you to start a ++ transaction by issuing a command. Once open, the transcrash should be set in the task ++ structure, and there should be options (I suppose) to allow it to be carried across ++ fork/exec. A transcrash has several options: ++ ++ - READ_FUSING or WRITE_FUSING: The default policy is for txn-capture to capture only ++ on writes (WRITE_FUSING) and allow "dirty reads". If the application wishes to ++ capture on reads as well, it should set READ_FUSING. ++ ++ - TIMEOUT: Since a non-isolated transcrash cannot be undone, every transcrash must ++ eventually close (or else the machine must crash). If the application dies an ++ unexpected death with an open transcrash, for example, or if it hangs for a long ++ duration, one solution (to avoid crashing the machine) is to simply close it anyway. ++ This is a dangerous option, but it is one way to solve the problem until isolated ++ transcrashes are available for untrusted applications. ++ ++ It seems to be what databases do, though it is unclear how one avoids a DoS attack ++ creating a vulnerability based on resource starvation. Guaranteeing that some ++ minimum amount of computational resources are made available would seem more correct ++ than guaranteeing some amount of time. When we again have someone to code the work, ++ this issue should be considered carefully. -Hans ++ ++ RESERVE_BLOCKS: A running transcrash should indicate to the transaction manager how ++ many dirty blocks it expects. The reserve_blocks interface should be called at a point ++ where it is safe for the application to fail, because the system may not be able to ++ grant the allocation and the application must be able to back-out. For this reason, ++ the number of reserve-blocks can also be passed as an argument to BEGIN_TRANSCRASH, but ++ the application may also wish to extend the allocation after beginning its transcrash. ++ ++ CLOSE_TRANSCRASH: The application closes the transcrash when it is finished making ++ modifications that require transaction protection. When isolated transactions are ++ supported the CLOSE operation is replaced by either COMMIT or ABORT. For example, if a ++ RESERVE_BLOCKS call fails for the application, it should "abort" by calling ++ CLOSE_TRANSCRASH, even though it really commits any changes that were made (which is ++ why, for safety, the application should call RESERVE_BLOCKS before making any changes). ++ ++ For actually implementing these out-of-system-call-scopped transcrashes, the ++ reiser4_context has a "txn_handle *trans" pointer that may be set to an open ++ transcrash. Currently there are no dynamically-allocated transcrashes, but there is a ++ "struct kmem_cache *_txnh_slab" created for that purpose in this file. ++*/ ++ ++/* Extending the other system call interfaces for future transaction features: ++ ++ Specialized applications may benefit from passing flags to the ordinary system call ++ interface such as read(), write(), or stat(). For example, the application specifies ++ WRITE_FUSING by default but wishes to add that a certain read() command should be ++ treated as READ_FUSING. But which read? Is it the directory-entry read, the stat-data ++ read, or the file-data read? These issues are straight-forward, but there are a lot of ++ them and adding the necessary flags-passing code will be tedious. ++ ++ When supporting isolated transactions, there is a corresponding READ_MODIFY_WRITE (RMW) ++ flag, which specifies that although it is a read operation being requested, a ++ write-lock should be taken. The reason is that read-locks are shared while write-locks ++ are exclusive, so taking a read-lock when a later-write is known in advance will often ++ leads to deadlock. If a reader knows it will write later, it should issue read ++ requests with the RMW flag set. ++*/ ++ ++/* ++ The znode/atom deadlock avoidance. ++ ++ FIXME(Zam): writing of this comment is in progress. ++ ++ The atom's special stage ASTAGE_CAPTURE_WAIT introduces a kind of atom's ++ long-term locking, which makes reiser4 locking scheme more complex. It had ++ deadlocks until we implement deadlock avoidance algorithms. That deadlocks ++ looked as the following: one stopped thread waits for a long-term lock on ++ znode, the thread who owns that lock waits when fusion with another atom will ++ be allowed. ++ ++ The source of the deadlocks is an optimization of not capturing index nodes ++ for read. Let's prove it. Suppose we have dumb node capturing scheme which ++ unconditionally captures each block before locking it. ++ ++ That scheme has no deadlocks. Let's begin with the thread which stage is ++ ASTAGE_CAPTURE_WAIT and it waits for a znode lock. The thread can't wait for ++ a capture because it's stage allows fusion with any atom except which are ++ being committed currently. A process of atom commit can't deadlock because ++ atom commit procedure does not acquire locks and does not fuse with other ++ atoms. Reiser4 does capturing right before going to sleep inside the ++ longtertm_lock_znode() function, it means the znode which we want to lock is ++ already captured and its atom is in ASTAGE_CAPTURE_WAIT stage. If we ++ continue the analysis we understand that no one process in the sequence may ++ waits atom fusion. Thereby there are no deadlocks of described kind. ++ ++ The capturing optimization makes the deadlocks possible. A thread can wait a ++ lock which owner did not captured that node. The lock owner's current atom ++ is not fused with the first atom and it does not get a ASTAGE_CAPTURE_WAIT ++ state. A deadlock is possible when that atom meets another one which is in ++ ASTAGE_CAPTURE_WAIT already. ++ ++ The deadlock avoidance scheme includes two algorithms: ++ ++ First algorithm is used when a thread captures a node which is locked but not ++ captured by another thread. Those nodes are marked MISSED_IN_CAPTURE at the ++ moment we skip their capturing. If such a node (marked MISSED_IN_CAPTURE) is ++ being captured by a thread with current atom is in ASTAGE_CAPTURE_WAIT, the ++ routine which forces all lock owners to join with current atom is executed. ++ ++ Second algorithm does not allow to skip capturing of already captured nodes. ++ ++ Both algorithms together prevent waiting a longterm lock without atom fusion ++ with atoms of all lock owners, which is a key thing for getting atom/znode ++ locking deadlocks. ++*/ ++ ++/* ++ * Transactions and mmap(2). ++ * ++ * 1. Transactions are not supported for accesses through mmap(2), because ++ * this would effectively amount to user-level transactions whose duration ++ * is beyond control of the kernel. ++ * ++ * 2. That said, we still want to preserve some decency with regard to ++ * mmap(2). During normal write(2) call, following sequence of events ++ * happens: ++ * ++ * 1. page is created; ++ * ++ * 2. jnode is created, dirtied and captured into current atom. ++ * ++ * 3. extent is inserted and modified. ++ * ++ * Steps (2) and (3) take place under long term lock on the twig node. ++ * ++ * When file is accessed through mmap(2) page is always created during ++ * page fault. ++ * After this (in reiser4_readpage()->reiser4_readpage_extent()): ++ * ++ * 1. if access is made to non-hole page new jnode is created, (if ++ * necessary) ++ * ++ * 2. if access is made to the hole page, jnode is not created (XXX ++ * not clear why). ++ * ++ * Also, even if page is created by write page fault it is not marked ++ * dirty immediately by handle_mm_fault(). Probably this is to avoid races ++ * with page write-out. ++ * ++ * Dirty bit installed by hardware is only transferred to the struct page ++ * later, when page is unmapped (in zap_pte_range(), or ++ * try_to_unmap_one()). ++ * ++ * So, with mmap(2) we have to handle following irksome situations: ++ * ++ * 1. there exists modified page (clean or dirty) without jnode ++ * ++ * 2. there exists modified page (clean or dirty) with clean jnode ++ * ++ * 3. clean page which is a part of atom can be transparently modified ++ * at any moment through mapping without becoming dirty. ++ * ++ * (1) and (2) can lead to the out-of-memory situation: ->writepage() ++ * doesn't know what to do with such pages and ->sync_sb()/->writepages() ++ * don't see them, because these methods operate on atoms. ++ * ++ * (3) can lead to the loss of data: suppose we have dirty page with dirty ++ * captured jnode captured by some atom. As part of early flush (for ++ * example) page was written out. Dirty bit was cleared on both page and ++ * jnode. After this page is modified through mapping, but kernel doesn't ++ * notice and just discards page and jnode as part of commit. (XXX ++ * actually it doesn't, because to reclaim page ->releasepage() has to be ++ * called and before this dirty bit will be transferred to the struct ++ * page). ++ * ++ */ ++ ++#include "debug.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree.h" ++#include "wander.h" ++#include "ktxnmgrd.h" ++#include "super.h" ++#include "page_cache.h" ++#include "reiser4.h" ++#include "vfs_ops.h" ++#include "inode.h" ++#include "flush.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* for totalram_pages */ ++ ++static void atom_free(txn_atom * atom); ++ ++static int commit_txnh(txn_handle * txnh); ++ ++static void wakeup_atom_waitfor_list(txn_atom * atom); ++static void wakeup_atom_waiting_list(txn_atom * atom); ++ ++static void capture_assign_txnh_nolock(txn_atom * atom, txn_handle * txnh); ++ ++static void capture_assign_block_nolock(txn_atom * atom, jnode * node); ++ ++static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node); ++ ++static int capture_init_fusion(jnode * node, txn_handle * txnh, ++ txn_capture mode); ++ ++static int capture_fuse_wait(txn_handle *, txn_atom *, txn_atom *, txn_capture); ++ ++static void capture_fuse_into(txn_atom * small, txn_atom * large); ++ ++void reiser4_invalidate_list(struct list_head *); ++ ++/* GENERIC STRUCTURES */ ++ ++typedef struct _txn_wait_links txn_wait_links; ++ ++struct _txn_wait_links { ++ lock_stack *_lock_stack; ++ struct list_head _fwaitfor_link; ++ struct list_head _fwaiting_link; ++ int (*waitfor_cb) (txn_atom * atom, struct _txn_wait_links * wlinks); ++ int (*waiting_cb) (txn_atom * atom, struct _txn_wait_links * wlinks); ++}; ++ ++/* FIXME: In theory, we should be using the slab cache init & destructor ++ methods instead of, e.g., jnode_init, etc. */ ++static struct kmem_cache *_atom_slab = NULL; ++/* this is for user-visible, cross system-call transactions. */ ++static struct kmem_cache *_txnh_slab = NULL; ++ ++/** ++ * init_txnmgr_static - create transaction manager slab caches ++ * ++ * Initializes caches of txn-atoms and txn_handle. It is part of reiser4 module ++ * initialization. ++ */ ++int init_txnmgr_static(void) ++{ ++ assert("jmacd-600", _atom_slab == NULL); ++ assert("jmacd-601", _txnh_slab == NULL); ++ ++ ON_DEBUG(atomic_set(&flush_cnt, 0)); ++ ++ _atom_slab = kmem_cache_create("txn_atom", sizeof(txn_atom), 0, ++ SLAB_HWCACHE_ALIGN | ++ SLAB_RECLAIM_ACCOUNT, NULL, NULL); ++ if (_atom_slab == NULL) ++ return RETERR(-ENOMEM); ++ ++ _txnh_slab = kmem_cache_create("txn_handle", sizeof(txn_handle), 0, ++ SLAB_HWCACHE_ALIGN, NULL, NULL); ++ if (_txnh_slab == NULL) { ++ kmem_cache_destroy(_atom_slab); ++ _atom_slab = NULL; ++ return RETERR(-ENOMEM); ++ } ++ ++ return 0; ++} ++ ++/** ++ * done_txnmgr_static - delete txn_atom and txn_handle caches ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++void done_txnmgr_static(void) ++{ ++ destroy_reiser4_cache(&_atom_slab); ++ destroy_reiser4_cache(&_txnh_slab); ++} ++ ++/** ++ * init_txnmgr - initialize a new transaction manager ++ * @mgr: pointer to transaction manager embedded in reiser4 super block ++ * ++ * This is called on mount. Makes necessary initializations. ++ */ ++void reiser4_init_txnmgr(txn_mgr *mgr) ++{ ++ assert("umka-169", mgr != NULL); ++ ++ mgr->atom_count = 0; ++ mgr->id_count = 1; ++ INIT_LIST_HEAD(&mgr->atoms_list); ++ spin_lock_init(&mgr->tmgr_lock); ++ mutex_init(&mgr->commit_mutex); ++} ++ ++/** ++ * reiser4_done_txnmgr - stop transaction manager ++ * @mgr: pointer to transaction manager embedded in reiser4 super block ++ * ++ * This is called on umount. Does sanity checks. ++ */ ++void reiser4_done_txnmgr(txn_mgr *mgr) ++{ ++ assert("umka-170", mgr != NULL); ++ assert("umka-1701", list_empty_careful(&mgr->atoms_list)); ++ assert("umka-1702", mgr->atom_count == 0); ++} ++ ++/* Initialize a transaction handle. */ ++/* Audited by: umka (2002.06.13) */ ++static void txnh_init(txn_handle * txnh, txn_mode mode) ++{ ++ assert("umka-171", txnh != NULL); ++ ++ txnh->mode = mode; ++ txnh->atom = NULL; ++ reiser4_ctx_gfp_mask_set(); ++ txnh->flags = 0; ++ spin_lock_init(&txnh->hlock); ++ INIT_LIST_HEAD(&txnh->txnh_link); ++} ++ ++#if REISER4_DEBUG ++/* Check if a transaction handle is clean. */ ++static int txnh_isclean(txn_handle * txnh) ++{ ++ assert("umka-172", txnh != NULL); ++ return txnh->atom == NULL && ++ LOCK_CNT_NIL(spin_locked_txnh); ++} ++#endif ++ ++/* Initialize an atom. */ ++static void atom_init(txn_atom * atom) ++{ ++ int level; ++ ++ assert("umka-173", atom != NULL); ++ ++ memset(atom, 0, sizeof(txn_atom)); ++ ++ atom->stage = ASTAGE_FREE; ++ atom->start_time = jiffies; ++ ++ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) ++ INIT_LIST_HEAD(ATOM_DIRTY_LIST(atom, level)); ++ ++ INIT_LIST_HEAD(ATOM_CLEAN_LIST(atom)); ++ INIT_LIST_HEAD(ATOM_OVRWR_LIST(atom)); ++ INIT_LIST_HEAD(ATOM_WB_LIST(atom)); ++ INIT_LIST_HEAD(&atom->inodes); ++ spin_lock_init(&(atom->alock)); ++ /* list of transaction handles */ ++ INIT_LIST_HEAD(&atom->txnh_list); ++ /* link to transaction manager's list of atoms */ ++ INIT_LIST_HEAD(&atom->atom_link); ++ INIT_LIST_HEAD(&atom->fwaitfor_list); ++ INIT_LIST_HEAD(&atom->fwaiting_list); ++ blocknr_set_init(&atom->delete_set); ++ blocknr_set_init(&atom->wandered_map); ++ ++ init_atom_fq_parts(atom); ++} ++ ++#if REISER4_DEBUG ++/* Check if an atom is clean. */ ++static int atom_isclean(txn_atom * atom) ++{ ++ int level; ++ ++ assert("umka-174", atom != NULL); ++ ++ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) { ++ if (!list_empty_careful(ATOM_DIRTY_LIST(atom, level))) { ++ return 0; ++ } ++ } ++ ++ return atom->stage == ASTAGE_FREE && ++ atom->txnh_count == 0 && ++ atom->capture_count == 0 && ++ atomic_read(&atom->refcount) == 0 && ++ (&atom->atom_link == atom->atom_link.next && ++ &atom->atom_link == atom->atom_link.prev) && ++ list_empty_careful(&atom->txnh_list) && ++ list_empty_careful(ATOM_CLEAN_LIST(atom)) && ++ list_empty_careful(ATOM_OVRWR_LIST(atom)) && ++ list_empty_careful(ATOM_WB_LIST(atom)) && ++ list_empty_careful(&atom->fwaitfor_list) && ++ list_empty_careful(&atom->fwaiting_list) && ++ atom_fq_parts_are_clean(atom); ++} ++#endif ++ ++/* Begin a transaction in this context. Currently this uses the reiser4_context's ++ trans_in_ctx, which means that transaction handles are stack-allocated. Eventually ++ this will be extended to allow transaction handles to span several contexts. */ ++/* Audited by: umka (2002.06.13) */ ++void reiser4_txn_begin(reiser4_context * context) ++{ ++ assert("jmacd-544", context->trans == NULL); ++ ++ context->trans = &context->trans_in_ctx; ++ ++ /* FIXME_LATER_JMACD Currently there's no way to begin a TXN_READ_FUSING ++ transcrash. Default should be TXN_WRITE_FUSING. Also, the _trans variable is ++ stack allocated right now, but we would like to allow for dynamically allocated ++ transcrashes that span multiple system calls. ++ */ ++ txnh_init(context->trans, TXN_WRITE_FUSING); ++} ++ ++/* Finish a transaction handle context. */ ++int reiser4_txn_end(reiser4_context * context) ++{ ++ long ret = 0; ++ txn_handle *txnh; ++ ++ assert("umka-283", context != NULL); ++ assert("nikita-3012", reiser4_schedulable()); ++ assert("vs-24", context == get_current_context()); ++ assert("nikita-2967", lock_stack_isclean(get_current_lock_stack())); ++ ++ txnh = context->trans; ++ if (txnh != NULL) { ++ if (txnh->atom != NULL) ++ ret = commit_txnh(txnh); ++ assert("jmacd-633", txnh_isclean(txnh)); ++ context->trans = NULL; ++ } ++ return ret; ++} ++ ++void reiser4_txn_restart(reiser4_context * context) ++{ ++ reiser4_txn_end(context); ++ reiser4_preempt_point(); ++ reiser4_txn_begin(context); ++} ++ ++void reiser4_txn_restart_current(void) ++{ ++ reiser4_txn_restart(get_current_context()); ++} ++ ++/* TXN_ATOM */ ++ ++/* Get the atom belonging to a txnh, which is not locked. Return txnh locked. Locks atom, if atom ++ is not NULL. This performs the necessary spin_trylock to break the lock-ordering cycle. May ++ return NULL. */ ++static txn_atom *txnh_get_atom(txn_handle * txnh) ++{ ++ txn_atom *atom; ++ ++ assert("umka-180", txnh != NULL); ++ assert_spin_not_locked(&(txnh->hlock)); ++ ++ while (1) { ++ spin_lock_txnh(txnh); ++ atom = txnh->atom; ++ ++ if (atom == NULL) ++ break; ++ ++ if (spin_trylock_atom(atom)) ++ break; ++ ++ atomic_inc(&atom->refcount); ++ ++ spin_unlock_txnh(txnh); ++ spin_lock_atom(atom); ++ spin_lock_txnh(txnh); ++ ++ if (txnh->atom == atom) { ++ atomic_dec(&atom->refcount); ++ break; ++ } ++ ++ spin_unlock_txnh(txnh); ++ atom_dec_and_unlock(atom); ++ } ++ ++ return atom; ++} ++ ++/* Get the current atom and spinlock it if current atom present. May return NULL */ ++txn_atom *get_current_atom_locked_nocheck(void) ++{ ++ reiser4_context *cx; ++ txn_atom *atom; ++ txn_handle *txnh; ++ ++ cx = get_current_context(); ++ assert("zam-437", cx != NULL); ++ ++ txnh = cx->trans; ++ assert("zam-435", txnh != NULL); ++ ++ atom = txnh_get_atom(txnh); ++ ++ spin_unlock_txnh(txnh); ++ return atom; ++} ++ ++/* Get the atom belonging to a jnode, which is initially locked. Return with ++ both jnode and atom locked. This performs the necessary spin_trylock to ++ break the lock-ordering cycle. Assumes the jnode is already locked, and ++ returns NULL if atom is not set. */ ++txn_atom *jnode_get_atom(jnode * node) ++{ ++ txn_atom *atom; ++ ++ assert("umka-181", node != NULL); ++ ++ while (1) { ++ assert_spin_locked(&(node->guard)); ++ ++ atom = node->atom; ++ /* node is not in any atom */ ++ if (atom == NULL) ++ break; ++ ++ /* If atom is not locked, grab the lock and return */ ++ if (spin_trylock_atom(atom)) ++ break; ++ ++ /* At least one jnode belongs to this atom it guarantees that ++ * atom->refcount > 0, we can safely increment refcount. */ ++ atomic_inc(&atom->refcount); ++ spin_unlock_jnode(node); ++ ++ /* re-acquire spin locks in the right order */ ++ spin_lock_atom(atom); ++ spin_lock_jnode(node); ++ ++ /* check if node still points to the same atom. */ ++ if (node->atom == atom) { ++ atomic_dec(&atom->refcount); ++ break; ++ } ++ ++ /* releasing of atom lock and reference requires not holding ++ * locks on jnodes. */ ++ spin_unlock_jnode(node); ++ ++ /* We do not sure that this atom has extra references except our ++ * one, so we should call proper function which may free atom if ++ * last reference is released. */ ++ atom_dec_and_unlock(atom); ++ ++ /* lock jnode again for getting valid node->atom pointer ++ * value. */ ++ spin_lock_jnode(node); ++ } ++ ++ return atom; ++} ++ ++/* Returns true if @node is dirty and part of the same atom as one of its neighbors. Used ++ by flush code to indicate whether the next node (in some direction) is suitable for ++ flushing. */ ++int ++same_slum_check(jnode * node, jnode * check, int alloc_check, int alloc_value) ++{ ++ int compat; ++ txn_atom *atom; ++ ++ assert("umka-182", node != NULL); ++ assert("umka-183", check != NULL); ++ ++ /* Not sure what this function is supposed to do if supplied with @check that is ++ neither formatted nor unformatted (bitmap or so). */ ++ assert("nikita-2373", jnode_is_znode(check) ++ || jnode_is_unformatted(check)); ++ ++ /* Need a lock on CHECK to get its atom and to check various state bits. ++ Don't need a lock on NODE once we get the atom lock. */ ++ /* It is not enough to lock two nodes and check (node->atom == ++ check->atom) because atom could be locked and being fused at that ++ moment, jnodes of the atom of that state (being fused) can point to ++ different objects, but the atom is the same. */ ++ spin_lock_jnode(check); ++ ++ atom = jnode_get_atom(check); ++ ++ if (atom == NULL) { ++ compat = 0; ++ } else { ++ compat = (node->atom == atom && JF_ISSET(check, JNODE_DIRTY)); ++ ++ if (compat && jnode_is_znode(check)) { ++ compat &= znode_is_connected(JZNODE(check)); ++ } ++ ++ if (compat && alloc_check) { ++ compat &= (alloc_value == jnode_is_flushprepped(check)); ++ } ++ ++ spin_unlock_atom(atom); ++ } ++ ++ spin_unlock_jnode(check); ++ ++ return compat; ++} ++ ++/* Decrement the atom's reference count and if it falls to zero, free it. */ ++void atom_dec_and_unlock(txn_atom * atom) ++{ ++ txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr; ++ ++ assert("umka-186", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ assert("zam-1039", atomic_read(&atom->refcount) > 0); ++ ++ if (atomic_dec_and_test(&atom->refcount)) { ++ /* take txnmgr lock and atom lock in proper order. */ ++ if (!spin_trylock_txnmgr(mgr)) { ++ /* This atom should exist after we re-acquire its ++ * spinlock, so we increment its reference counter. */ ++ atomic_inc(&atom->refcount); ++ spin_unlock_atom(atom); ++ spin_lock_txnmgr(mgr); ++ spin_lock_atom(atom); ++ ++ if (!atomic_dec_and_test(&atom->refcount)) { ++ spin_unlock_atom(atom); ++ spin_unlock_txnmgr(mgr); ++ return; ++ } ++ } ++ assert_spin_locked(&(mgr->tmgr_lock)); ++ atom_free(atom); ++ spin_unlock_txnmgr(mgr); ++ } else ++ spin_unlock_atom(atom); ++} ++ ++/* Create new atom and connect it to given transaction handle. This adds the ++ atom to the transaction manager's list and sets its reference count to 1, an ++ artificial reference which is kept until it commits. We play strange games ++ to avoid allocation under jnode & txnh spinlocks.*/ ++ ++static int atom_begin_and_assign_to_txnh(txn_atom ** atom_alloc, txn_handle * txnh) ++{ ++ txn_atom *atom; ++ txn_mgr *mgr; ++ ++ if (REISER4_DEBUG && rofs_tree(current_tree)) { ++ warning("nikita-3366", "Creating atom on rofs"); ++ dump_stack(); ++ } ++ ++ if (*atom_alloc == NULL) { ++ (*atom_alloc) = kmem_cache_alloc(_atom_slab, ++ reiser4_ctx_gfp_mask_get()); ++ ++ if (*atom_alloc == NULL) ++ return RETERR(-ENOMEM); ++ } ++ ++ /* and, also, txnmgr spin lock should be taken before jnode and txnh ++ locks. */ ++ mgr = &get_super_private(reiser4_get_current_sb())->tmgr; ++ spin_lock_txnmgr(mgr); ++ spin_lock_txnh(txnh); ++ ++ /* Check whether new atom still needed */ ++ if (txnh->atom != NULL) { ++ /* NOTE-NIKITA probably it is rather better to free ++ * atom_alloc here than thread it up to reiser4_try_capture() */ ++ ++ spin_unlock_txnh(txnh); ++ spin_unlock_txnmgr(mgr); ++ ++ return -E_REPEAT; ++ } ++ ++ atom = *atom_alloc; ++ *atom_alloc = NULL; ++ ++ atom_init(atom); ++ ++ assert("jmacd-17", atom_isclean(atom)); ++ ++ /* ++ * lock ordering is broken here. It is ok, as long as @atom is new ++ * and inaccessible for others. We can't use spin_lock_atom or ++ * spin_lock(&atom->alock) because they care about locking ++ * dependencies. spin_trylock_lock doesn't. ++ */ ++ check_me("", spin_trylock_atom(atom)); ++ ++ /* add atom to the end of transaction manager's list of atoms */ ++ list_add_tail(&atom->atom_link, &mgr->atoms_list); ++ atom->atom_id = mgr->id_count++; ++ mgr->atom_count += 1; ++ ++ /* Release txnmgr lock */ ++ spin_unlock_txnmgr(mgr); ++ ++ /* One reference until it commits. */ ++ atomic_inc(&atom->refcount); ++ atom->stage = ASTAGE_CAPTURE_FUSE; ++ atom->super = reiser4_get_current_sb(); ++ capture_assign_txnh_nolock(atom, txnh); ++ ++ spin_unlock_atom(atom); ++ spin_unlock_txnh(txnh); ++ ++ return -E_REPEAT; ++} ++ ++/* Return true if an atom is currently "open". */ ++static int atom_isopen(const txn_atom * atom) ++{ ++ assert("umka-185", atom != NULL); ++ ++ return atom->stage > 0 && atom->stage < ASTAGE_PRE_COMMIT; ++} ++ ++/* Return the number of pointers to this atom that must be updated during fusion. This ++ approximates the amount of work to be done. Fusion chooses the atom with fewer ++ pointers to fuse into the atom with more pointers. */ ++static int atom_pointer_count(const txn_atom * atom) ++{ ++ assert("umka-187", atom != NULL); ++ ++ /* This is a measure of the amount of work needed to fuse this atom ++ * into another. */ ++ return atom->txnh_count + atom->capture_count; ++} ++ ++/* Called holding the atom lock, this removes the atom from the transaction manager list ++ and frees it. */ ++static void atom_free(txn_atom * atom) ++{ ++ txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr; ++ ++ assert("umka-188", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ ++ /* Remove from the txn_mgr's atom list */ ++ assert_spin_locked(&(mgr->tmgr_lock)); ++ mgr->atom_count -= 1; ++ list_del_init(&atom->atom_link); ++ ++ /* Clean the atom */ ++ assert("jmacd-16", ++ (atom->stage == ASTAGE_INVALID || atom->stage == ASTAGE_DONE)); ++ atom->stage = ASTAGE_FREE; ++ ++ blocknr_set_destroy(&atom->delete_set); ++ blocknr_set_destroy(&atom->wandered_map); ++ ++ assert("jmacd-16", atom_isclean(atom)); ++ ++ spin_unlock_atom(atom); ++ ++ kmem_cache_free(_atom_slab, atom); ++} ++ ++static int atom_is_dotard(const txn_atom * atom) ++{ ++ return time_after(jiffies, atom->start_time + ++ get_current_super_private()->tmgr.atom_max_age); ++} ++ ++static int atom_can_be_committed(txn_atom * atom) ++{ ++ assert_spin_locked(&(atom->alock)); ++ assert("zam-885", atom->txnh_count > atom->nr_waiters); ++ return atom->txnh_count == atom->nr_waiters + 1; ++} ++ ++/* Return true if an atom should commit now. This is determined by aging, atom ++ size or atom flags. */ ++static int atom_should_commit(const txn_atom * atom) ++{ ++ assert("umka-189", atom != NULL); ++ return ++ (atom->flags & ATOM_FORCE_COMMIT) || ++ ((unsigned)atom_pointer_count(atom) > ++ get_current_super_private()->tmgr.atom_max_size) ++ || atom_is_dotard(atom); ++} ++ ++/* return 1 if current atom exists and requires commit. */ ++int current_atom_should_commit(void) ++{ ++ txn_atom *atom; ++ int result = 0; ++ ++ atom = get_current_atom_locked_nocheck(); ++ if (atom) { ++ result = atom_should_commit(atom); ++ spin_unlock_atom(atom); ++ } ++ return result; ++} ++ ++static int atom_should_commit_asap(const txn_atom * atom) ++{ ++ unsigned int captured; ++ unsigned int pinnedpages; ++ ++ assert("nikita-3309", atom != NULL); ++ ++ captured = (unsigned)atom->capture_count; ++ pinnedpages = (captured >> PAGE_CACHE_SHIFT) * sizeof(znode); ++ ++ return (pinnedpages > (totalram_pages >> 3)) || (atom->flushed > 100); ++} ++ ++static jnode *find_first_dirty_in_list(struct list_head *head, int flags) ++{ ++ jnode *first_dirty; ++ ++ list_for_each_entry(first_dirty, head, capture_link) { ++ if (!(flags & JNODE_FLUSH_COMMIT)) { ++ /* ++ * skip jnodes which "heard banshee" or having active ++ * I/O ++ */ ++ if (JF_ISSET(first_dirty, JNODE_HEARD_BANSHEE) || ++ JF_ISSET(first_dirty, JNODE_WRITEBACK)) ++ continue; ++ } ++ return first_dirty; ++ } ++ return NULL; ++} ++ ++/* Get first dirty node from the atom's dirty_nodes[n] lists; return NULL if atom has no dirty ++ nodes on atom's lists */ ++jnode *find_first_dirty_jnode(txn_atom * atom, int flags) ++{ ++ jnode *first_dirty; ++ tree_level level; ++ ++ assert_spin_locked(&(atom->alock)); ++ ++ /* The flush starts from LEAF_LEVEL (=1). */ ++ for (level = 1; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) { ++ if (list_empty_careful(ATOM_DIRTY_LIST(atom, level))) ++ continue; ++ ++ first_dirty = ++ find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, level), ++ flags); ++ if (first_dirty) ++ return first_dirty; ++ } ++ ++ /* znode-above-root is on the list #0. */ ++ return find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, 0), flags); ++} ++ ++static void dispatch_wb_list(txn_atom * atom, flush_queue_t * fq) ++{ ++ jnode *cur; ++ ++ assert("zam-905", atom_is_protected(atom)); ++ ++ cur = list_entry(ATOM_WB_LIST(atom)->next, jnode, capture_link); ++ while (ATOM_WB_LIST(atom) != &cur->capture_link) { ++ jnode *next = list_entry(cur->capture_link.next, jnode, capture_link); ++ ++ spin_lock_jnode(cur); ++ if (!JF_ISSET(cur, JNODE_WRITEBACK)) { ++ if (JF_ISSET(cur, JNODE_DIRTY)) { ++ queue_jnode(fq, cur); ++ } else { ++ /* move jnode to atom's clean list */ ++ list_move_tail(&cur->capture_link, ++ ATOM_CLEAN_LIST(atom)); ++ } ++ } ++ spin_unlock_jnode(cur); ++ ++ cur = next; ++ } ++} ++ ++/* Scan current atom->writeback_nodes list, re-submit dirty and !writeback ++ * jnodes to disk. */ ++static int submit_wb_list(void) ++{ ++ int ret; ++ flush_queue_t *fq; ++ ++ fq = get_fq_for_current_atom(); ++ if (IS_ERR(fq)) ++ return PTR_ERR(fq); ++ ++ dispatch_wb_list(fq->atom, fq); ++ spin_unlock_atom(fq->atom); ++ ++ ret = reiser4_write_fq(fq, NULL, 1); ++ reiser4_fq_put(fq); ++ ++ return ret; ++} ++ ++/* Wait completion of all writes, re-submit atom writeback list if needed. */ ++static int current_atom_complete_writes(void) ++{ ++ int ret; ++ ++ /* Each jnode from that list was modified and dirtied when it had i/o ++ * request running already. After i/o completion we have to resubmit ++ * them to disk again.*/ ++ ret = submit_wb_list(); ++ if (ret < 0) ++ return ret; ++ ++ /* Wait all i/o completion */ ++ ret = current_atom_finish_all_fq(); ++ if (ret) ++ return ret; ++ ++ /* Scan wb list again; all i/o should be completed, we re-submit dirty ++ * nodes to disk */ ++ ret = submit_wb_list(); ++ if (ret < 0) ++ return ret; ++ ++ /* Wait all nodes we just submitted */ ++ return current_atom_finish_all_fq(); ++} ++ ++#if REISER4_DEBUG ++ ++static void reiser4_info_atom(const char *prefix, const txn_atom * atom) ++{ ++ if (atom == NULL) { ++ printk("%s: no atom\n", prefix); ++ return; ++ } ++ ++ printk("%s: refcount: %i id: %i flags: %x txnh_count: %i" ++ " capture_count: %i stage: %x start: %lu, flushed: %i\n", prefix, ++ atomic_read(&atom->refcount), atom->atom_id, atom->flags, ++ atom->txnh_count, atom->capture_count, atom->stage, ++ atom->start_time, atom->flushed); ++} ++ ++#else /* REISER4_DEBUG */ ++ ++static inline void reiser4_info_atom(const char *prefix, const txn_atom * atom) {} ++ ++#endif /* REISER4_DEBUG */ ++ ++#define TOOMANYFLUSHES (1 << 13) ++ ++/* Called with the atom locked and no open "active" transaction handlers except ++ ours, this function calls flush_current_atom() until all dirty nodes are ++ processed. Then it initiates commit processing. ++ ++ Called by the single remaining open "active" txnh, which is closing. Other ++ open txnhs belong to processes which wait atom commit in commit_txnh() ++ routine. They are counted as "waiters" in atom->nr_waiters. Therefore as ++ long as we hold the atom lock none of the jnodes can be captured and/or ++ locked. ++ ++ Return value is an error code if commit fails. ++*/ ++static int commit_current_atom(long *nr_submitted, txn_atom ** atom) ++{ ++ reiser4_super_info_data *sbinfo = get_current_super_private(); ++ long ret = 0; ++ /* how many times jnode_flush() was called as a part of attempt to ++ * commit this atom. */ ++ int flushiters; ++ ++ assert("zam-888", atom != NULL && *atom != NULL); ++ assert_spin_locked(&((*atom)->alock)); ++ assert("zam-887", get_current_context()->trans->atom == *atom); ++ assert("jmacd-151", atom_isopen(*atom)); ++ ++ assert("nikita-3184", ++ get_current_super_private()->delete_mutex_owner != current); ++ ++ for (flushiters = 0;; ++flushiters) { ++ ret = ++ flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS | ++ JNODE_FLUSH_COMMIT, ++ LONG_MAX /* nr_to_write */ , ++ nr_submitted, atom, NULL); ++ if (ret != -E_REPEAT) ++ break; ++ ++ /* if atom's dirty list contains one znode which is ++ HEARD_BANSHEE and is locked we have to allow lock owner to ++ continue and uncapture that znode */ ++ reiser4_preempt_point(); ++ ++ *atom = get_current_atom_locked(); ++ if (flushiters > TOOMANYFLUSHES && IS_POW(flushiters)) { ++ warning("nikita-3176", ++ "Flushing like mad: %i", flushiters); ++ reiser4_info_atom("atom", *atom); ++ DEBUGON(flushiters > (1 << 20)); ++ } ++ } ++ ++ if (ret) ++ return ret; ++ ++ assert_spin_locked(&((*atom)->alock)); ++ ++ if (!atom_can_be_committed(*atom)) { ++ spin_unlock_atom(*atom); ++ return RETERR(-E_REPEAT); ++ } ++ ++ if ((*atom)->capture_count == 0) ++ goto done; ++ ++ /* Up to this point we have been flushing and after flush is called we ++ return -E_REPEAT. Now we can commit. We cannot return -E_REPEAT ++ at this point, commit should be successful. */ ++ reiser4_atom_set_stage(*atom, ASTAGE_PRE_COMMIT); ++ ON_DEBUG(((*atom)->committer = current)); ++ spin_unlock_atom(*atom); ++ ++ ret = current_atom_complete_writes(); ++ if (ret) ++ return ret; ++ ++ assert("zam-906", list_empty(ATOM_WB_LIST(*atom))); ++ ++ /* isolate critical code path which should be executed by only one ++ * thread using tmgr mutex */ ++ mutex_lock(&sbinfo->tmgr.commit_mutex); ++ ++ ret = reiser4_write_logs(nr_submitted); ++ if (ret < 0) ++ reiser4_panic("zam-597", "write log failed (%ld)\n", ret); ++ ++ /* The atom->ovrwr_nodes list is processed under commit mutex held ++ because of bitmap nodes which are captured by special way in ++ reiser4_pre_commit_hook_bitmap(), that way does not include ++ capture_fuse_wait() as a capturing of other nodes does -- the commit ++ mutex is used for transaction isolation instead. */ ++ reiser4_invalidate_list(ATOM_OVRWR_LIST(*atom)); ++ mutex_unlock(&sbinfo->tmgr.commit_mutex); ++ ++ reiser4_invalidate_list(ATOM_CLEAN_LIST(*atom)); ++ reiser4_invalidate_list(ATOM_WB_LIST(*atom)); ++ assert("zam-927", list_empty(&(*atom)->inodes)); ++ ++ spin_lock_atom(*atom); ++ done: ++ reiser4_atom_set_stage(*atom, ASTAGE_DONE); ++ ON_DEBUG((*atom)->committer = NULL); ++ ++ /* Atom's state changes, so wake up everybody waiting for this ++ event. */ ++ wakeup_atom_waiting_list(*atom); ++ ++ /* Decrement the "until commit" reference, at least one txnh (the caller) is ++ still open. */ ++ atomic_dec(&(*atom)->refcount); ++ ++ assert("jmacd-1070", atomic_read(&(*atom)->refcount) > 0); ++ assert("jmacd-1062", (*atom)->capture_count == 0); ++ BUG_ON((*atom)->capture_count != 0); ++ assert_spin_locked(&((*atom)->alock)); ++ ++ return ret; ++} ++ ++/* TXN_TXNH */ ++ ++/** ++ * force_commit_atom - commit current atom and wait commit completion ++ * @txnh: ++ * ++ * Commits current atom and wait commit completion; current atom and @txnh have ++ * to be spinlocked before call, this function unlocks them on exit. ++ */ ++int force_commit_atom(txn_handle *txnh) ++{ ++ txn_atom *atom; ++ ++ assert("zam-837", txnh != NULL); ++ assert_spin_locked(&(txnh->hlock)); ++ assert("nikita-2966", lock_stack_isclean(get_current_lock_stack())); ++ ++ atom = txnh->atom; ++ ++ assert("zam-834", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ ++ /* ++ * Set flags for atom and txnh: forcing atom commit and waiting for ++ * commit completion ++ */ ++ txnh->flags |= TXNH_WAIT_COMMIT; ++ atom->flags |= ATOM_FORCE_COMMIT; ++ ++ spin_unlock_txnh(txnh); ++ spin_unlock_atom(atom); ++ ++ /* commit is here */ ++ reiser4_txn_restart_current(); ++ return 0; ++} ++ ++/* Called to force commit of any outstanding atoms. @commit_all_atoms controls ++ * should we commit all atoms including new ones which are created after this ++ * functions is called. */ ++int txnmgr_force_commit_all(struct super_block *super, int commit_all_atoms) ++{ ++ int ret; ++ txn_atom *atom; ++ txn_mgr *mgr; ++ txn_handle *txnh; ++ unsigned long start_time = jiffies; ++ reiser4_context *ctx = get_current_context(); ++ ++ assert("nikita-2965", lock_stack_isclean(get_current_lock_stack())); ++ assert("nikita-3058", reiser4_commit_check_locks()); ++ ++ reiser4_txn_restart_current(); ++ ++ mgr = &get_super_private(super)->tmgr; ++ ++ txnh = ctx->trans; ++ ++ again: ++ ++ spin_lock_txnmgr(mgr); ++ ++ list_for_each_entry(atom, &mgr->atoms_list, atom_link) { ++ spin_lock_atom(atom); ++ ++ /* Commit any atom which can be committed. If @commit_new_atoms ++ * is not set we commit only atoms which were created before ++ * this call is started. */ ++ if (commit_all_atoms ++ || time_before_eq(atom->start_time, start_time)) { ++ if (atom->stage <= ASTAGE_POST_COMMIT) { ++ spin_unlock_txnmgr(mgr); ++ ++ if (atom->stage < ASTAGE_PRE_COMMIT) { ++ spin_lock_txnh(txnh); ++ /* Add force-context txnh */ ++ capture_assign_txnh_nolock(atom, txnh); ++ ret = force_commit_atom(txnh); ++ if (ret) ++ return ret; ++ } else ++ /* wait atom commit */ ++ reiser4_atom_wait_event(atom); ++ ++ goto again; ++ } ++ } ++ ++ spin_unlock_atom(atom); ++ } ++ ++#if REISER4_DEBUG ++ if (commit_all_atoms) { ++ reiser4_super_info_data *sbinfo = get_super_private(super); ++ spin_lock_reiser4_super(sbinfo); ++ assert("zam-813", ++ sbinfo->blocks_fake_allocated_unformatted == 0); ++ assert("zam-812", sbinfo->blocks_fake_allocated == 0); ++ spin_unlock_reiser4_super(sbinfo); ++ } ++#endif ++ ++ spin_unlock_txnmgr(mgr); ++ ++ return 0; ++} ++ ++/* check whether commit_some_atoms() can commit @atom. Locking is up to the ++ * caller */ ++static int atom_is_committable(txn_atom * atom) ++{ ++ return ++ atom->stage < ASTAGE_PRE_COMMIT && ++ atom->txnh_count == atom->nr_waiters && atom_should_commit(atom); ++} ++ ++/* called periodically from ktxnmgrd to commit old atoms. Releases ktxnmgrd spin ++ * lock at exit */ ++int commit_some_atoms(txn_mgr * mgr) ++{ ++ int ret = 0; ++ txn_atom *atom; ++ txn_handle *txnh; ++ reiser4_context *ctx; ++ struct list_head *pos, *tmp; ++ ++ ctx = get_current_context(); ++ assert("nikita-2444", ctx != NULL); ++ ++ txnh = ctx->trans; ++ spin_lock_txnmgr(mgr); ++ ++ /* ++ * this is to avoid gcc complain that atom might be used ++ * uninitialized ++ */ ++ atom = NULL; ++ ++ /* look for atom to commit */ ++ list_for_each_safe(pos, tmp, &mgr->atoms_list) { ++ atom = list_entry(pos, txn_atom, atom_link); ++ /* ++ * first test without taking atom spin lock, whether it is ++ * eligible for committing at all ++ */ ++ if (atom_is_committable(atom)) { ++ /* now, take spin lock and re-check */ ++ spin_lock_atom(atom); ++ if (atom_is_committable(atom)) ++ break; ++ spin_unlock_atom(atom); ++ } ++ } ++ ++ ret = (&mgr->atoms_list == pos); ++ spin_unlock_txnmgr(mgr); ++ ++ if (ret) { ++ /* nothing found */ ++ spin_unlock(&mgr->daemon->guard); ++ return 0; ++ } ++ ++ spin_lock_txnh(txnh); ++ ++ BUG_ON(atom == NULL); ++ /* Set the atom to force committing */ ++ atom->flags |= ATOM_FORCE_COMMIT; ++ ++ /* Add force-context txnh */ ++ capture_assign_txnh_nolock(atom, txnh); ++ ++ spin_unlock_txnh(txnh); ++ spin_unlock_atom(atom); ++ ++ /* we are about to release daemon spin lock, notify daemon it ++ has to rescan atoms */ ++ mgr->daemon->rescan = 1; ++ spin_unlock(&mgr->daemon->guard); ++ reiser4_txn_restart_current(); ++ return 0; ++} ++ ++static int txn_try_to_fuse_small_atom(txn_mgr * tmgr, txn_atom * atom) ++{ ++ int atom_stage; ++ txn_atom *atom_2; ++ int repeat; ++ ++ assert("zam-1051", atom->stage < ASTAGE_PRE_COMMIT); ++ ++ atom_stage = atom->stage; ++ repeat = 0; ++ ++ if (!spin_trylock_txnmgr(tmgr)) { ++ atomic_inc(&atom->refcount); ++ spin_unlock_atom(atom); ++ spin_lock_txnmgr(tmgr); ++ spin_lock_atom(atom); ++ repeat = 1; ++ if (atom->stage != atom_stage) { ++ spin_unlock_txnmgr(tmgr); ++ atom_dec_and_unlock(atom); ++ return -E_REPEAT; ++ } ++ atomic_dec(&atom->refcount); ++ } ++ ++ list_for_each_entry(atom_2, &tmgr->atoms_list, atom_link) { ++ if (atom == atom_2) ++ continue; ++ /* ++ * if trylock does not succeed we just do not fuse with that ++ * atom. ++ */ ++ if (spin_trylock_atom(atom_2)) { ++ if (atom_2->stage < ASTAGE_PRE_COMMIT) { ++ spin_unlock_txnmgr(tmgr); ++ capture_fuse_into(atom_2, atom); ++ /* all locks are lost we can only repeat here */ ++ return -E_REPEAT; ++ } ++ spin_unlock_atom(atom_2); ++ } ++ } ++ atom->flags |= ATOM_CANCEL_FUSION; ++ spin_unlock_txnmgr(tmgr); ++ if (repeat) { ++ spin_unlock_atom(atom); ++ return -E_REPEAT; ++ } ++ return 0; ++} ++ ++/* Calls jnode_flush for current atom if it exists; if not, just take another ++ atom and call jnode_flush() for him. If current transaction handle has ++ already assigned atom (current atom) we have to close current transaction ++ prior to switch to another atom or do something with current atom. This ++ code tries to flush current atom. ++ ++ flush_some_atom() is called as part of memory clearing process. It is ++ invoked from balance_dirty_pages(), pdflushd, and entd. ++ ++ If we can flush no nodes, atom is committed, because this frees memory. ++ ++ If atom is too large or too old it is committed also. ++*/ ++int ++flush_some_atom(jnode * start, long *nr_submitted, const struct writeback_control *wbc, ++ int flags) ++{ ++ reiser4_context *ctx = get_current_context(); ++ txn_mgr *tmgr = &get_super_private(ctx->super)->tmgr; ++ txn_handle *txnh = ctx->trans; ++ txn_atom *atom; ++ int ret; ++ ++ BUG_ON(wbc->nr_to_write == 0); ++ BUG_ON(*nr_submitted != 0); ++ assert("zam-1042", txnh != NULL); ++ repeat: ++ if (txnh->atom == NULL) { ++ /* current atom is not available, take first from txnmgr */ ++ spin_lock_txnmgr(tmgr); ++ ++ /* traverse the list of all atoms */ ++ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) { ++ /* lock atom before checking its state */ ++ spin_lock_atom(atom); ++ ++ /* ++ * we need an atom which is not being committed and ++ * which has no flushers (jnode_flush() add one flusher ++ * at the beginning and subtract one at the end). ++ */ ++ if (atom->stage < ASTAGE_PRE_COMMIT && ++ atom->nr_flushers == 0) { ++ spin_lock_txnh(txnh); ++ capture_assign_txnh_nolock(atom, txnh); ++ spin_unlock_txnh(txnh); ++ ++ goto found; ++ } ++ ++ spin_unlock_atom(atom); ++ } ++ ++ /* ++ * Write throttling is case of no one atom can be ++ * flushed/committed. ++ */ ++ if (!current_is_pdflush() && !wbc->nonblocking) { ++ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) { ++ spin_lock_atom(atom); ++ /* Repeat the check from the above. */ ++ if (atom->stage < ASTAGE_PRE_COMMIT ++ && atom->nr_flushers == 0) { ++ spin_lock_txnh(txnh); ++ capture_assign_txnh_nolock(atom, txnh); ++ spin_unlock_txnh(txnh); ++ ++ goto found; ++ } ++ if (atom->stage <= ASTAGE_POST_COMMIT) { ++ spin_unlock_txnmgr(tmgr); ++ /* ++ * we just wait until atom's flusher ++ * makes a progress in flushing or ++ * committing the atom ++ */ ++ reiser4_atom_wait_event(atom); ++ goto repeat; ++ } ++ spin_unlock_atom(atom); ++ } ++ } ++ spin_unlock_txnmgr(tmgr); ++ return 0; ++ found: ++ spin_unlock_txnmgr(tmgr); ++ } else ++ atom = get_current_atom_locked(); ++ ++ BUG_ON(atom->super != ctx->super); ++ assert("vs-35", atom->super == ctx->super); ++ if (start) { ++ spin_lock_jnode(start); ++ ret = (atom == start->atom) ? 1 : 0; ++ spin_unlock_jnode(start); ++ if (ret == 0) ++ start = NULL; ++ } ++ ret = flush_current_atom(flags, wbc->nr_to_write, nr_submitted, &atom, start); ++ if (ret == 0) { ++ /* flush_current_atom returns 0 only if it submitted for write ++ nothing */ ++ BUG_ON(*nr_submitted != 0); ++ if (*nr_submitted == 0 || atom_should_commit_asap(atom)) { ++ if (atom->capture_count < tmgr->atom_min_size && ++ !(atom->flags & ATOM_CANCEL_FUSION)) { ++ ret = txn_try_to_fuse_small_atom(tmgr, atom); ++ if (ret == -E_REPEAT) { ++ reiser4_preempt_point(); ++ goto repeat; ++ } ++ } ++ /* if early flushing could not make more nodes clean, ++ * or atom is too old/large, ++ * we force current atom to commit */ ++ /* wait for commit completion but only if this ++ * wouldn't stall pdflushd and ent thread. */ ++ if (!wbc->nonblocking && !ctx->entd) ++ txnh->flags |= TXNH_WAIT_COMMIT; ++ atom->flags |= ATOM_FORCE_COMMIT; ++ } ++ spin_unlock_atom(atom); ++ } else if (ret == -E_REPEAT) { ++ if (*nr_submitted == 0) { ++ /* let others who hampers flushing (hold longterm locks, ++ for instance) to free the way for flush */ ++ reiser4_preempt_point(); ++ goto repeat; ++ } ++ ret = 0; ++ } ++/* ++ if (*nr_submitted > wbc->nr_to_write) ++ warning("", "asked for %ld, written %ld\n", wbc->nr_to_write, *nr_submitted); ++*/ ++ reiser4_txn_restart(ctx); ++ ++ return ret; ++} ++ ++/* Remove processed nodes from atom's clean list (thereby remove them from transaction). */ ++void reiser4_invalidate_list(struct list_head *head) ++{ ++ while (!list_empty(head)) { ++ jnode *node; ++ ++ node = list_entry(head->next, jnode, capture_link); ++ spin_lock_jnode(node); ++ reiser4_uncapture_block(node); ++ jput(node); ++ } ++} ++ ++static void init_wlinks(txn_wait_links * wlinks) ++{ ++ wlinks->_lock_stack = get_current_lock_stack(); ++ INIT_LIST_HEAD(&wlinks->_fwaitfor_link); ++ INIT_LIST_HEAD(&wlinks->_fwaiting_link); ++ wlinks->waitfor_cb = NULL; ++ wlinks->waiting_cb = NULL; ++} ++ ++/* Add atom to the atom's waitfor list and wait for somebody to wake us up; */ ++void reiser4_atom_wait_event(txn_atom * atom) ++{ ++ txn_wait_links _wlinks; ++ ++ assert_spin_locked(&(atom->alock)); ++ assert("nikita-3156", ++ lock_stack_isclean(get_current_lock_stack()) || ++ atom->nr_running_queues > 0); ++ ++ init_wlinks(&_wlinks); ++ list_add_tail(&_wlinks._fwaitfor_link, &atom->fwaitfor_list); ++ atomic_inc(&atom->refcount); ++ spin_unlock_atom(atom); ++ ++ reiser4_prepare_to_sleep(_wlinks._lock_stack); ++ reiser4_go_to_sleep(_wlinks._lock_stack); ++ ++ spin_lock_atom(atom); ++ list_del(&_wlinks._fwaitfor_link); ++ atom_dec_and_unlock(atom); ++} ++ ++void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage) ++{ ++ assert("nikita-3535", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ assert("nikita-3536", stage <= ASTAGE_INVALID); ++ /* Excelsior! */ ++ assert("nikita-3537", stage >= atom->stage); ++ if (atom->stage != stage) { ++ atom->stage = stage; ++ reiser4_atom_send_event(atom); ++ } ++} ++ ++/* wake all threads which wait for an event */ ++void reiser4_atom_send_event(txn_atom * atom) ++{ ++ assert_spin_locked(&(atom->alock)); ++ wakeup_atom_waitfor_list(atom); ++} ++ ++/* Informs txn manager code that owner of this txn_handle should wait atom commit completion (for ++ example, because it does fsync(2)) */ ++static int should_wait_commit(txn_handle * h) ++{ ++ return h->flags & TXNH_WAIT_COMMIT; ++} ++ ++typedef struct commit_data { ++ txn_atom *atom; ++ txn_handle *txnh; ++ long nr_written; ++ /* as an optimization we start committing atom by first trying to ++ * flush it few times without switching into ASTAGE_CAPTURE_WAIT. This ++ * allows to reduce stalls due to other threads waiting for atom in ++ * ASTAGE_CAPTURE_WAIT stage. ->preflush is counter of these ++ * preliminary flushes. */ ++ int preflush; ++ /* have we waited on atom. */ ++ int wait; ++ int failed; ++ int wake_ktxnmgrd_up; ++} commit_data; ++ ++/* ++ * Called from commit_txnh() repeatedly, until either error happens, or atom ++ * commits successfully. ++ */ ++static int try_commit_txnh(commit_data * cd) ++{ ++ int result; ++ ++ assert("nikita-2968", lock_stack_isclean(get_current_lock_stack())); ++ ++ /* Get the atom and txnh locked. */ ++ cd->atom = txnh_get_atom(cd->txnh); ++ assert("jmacd-309", cd->atom != NULL); ++ spin_unlock_txnh(cd->txnh); ++ ++ if (cd->wait) { ++ cd->atom->nr_waiters--; ++ cd->wait = 0; ++ } ++ ++ if (cd->atom->stage == ASTAGE_DONE) ++ return 0; ++ ++ if (cd->failed) ++ return 0; ++ ++ if (atom_should_commit(cd->atom)) { ++ /* if atom is _very_ large schedule it for commit as soon as ++ * possible. */ ++ if (atom_should_commit_asap(cd->atom)) { ++ /* ++ * When atom is in PRE_COMMIT or later stage following ++ * invariant (encoded in atom_can_be_committed()) ++ * holds: there is exactly one non-waiter transaction ++ * handle opened on this atom. When thread wants to ++ * wait until atom commits (for example sync()) it ++ * waits on atom event after increasing ++ * atom->nr_waiters (see blow in this function). It ++ * cannot be guaranteed that atom is already committed ++ * after receiving event, so loop has to be ++ * re-started. But if atom switched into PRE_COMMIT ++ * stage and became too large, we cannot change its ++ * state back to CAPTURE_WAIT (atom stage can only ++ * increase monotonically), hence this check. ++ */ ++ if (cd->atom->stage < ASTAGE_CAPTURE_WAIT) ++ reiser4_atom_set_stage(cd->atom, ++ ASTAGE_CAPTURE_WAIT); ++ cd->atom->flags |= ATOM_FORCE_COMMIT; ++ } ++ if (cd->txnh->flags & TXNH_DONT_COMMIT) { ++ /* ++ * this thread (transaction handle that is) doesn't ++ * want to commit atom. Notify waiters that handle is ++ * closed. This can happen, for example, when we are ++ * under VFS directory lock and don't want to commit ++ * atom right now to avoid stalling other threads ++ * working in the same directory. ++ */ ++ ++ /* Wake the ktxnmgrd up if the ktxnmgrd is needed to ++ * commit this atom: no atom waiters and only one ++ * (our) open transaction handle. */ ++ cd->wake_ktxnmgrd_up = ++ cd->atom->txnh_count == 1 && ++ cd->atom->nr_waiters == 0; ++ reiser4_atom_send_event(cd->atom); ++ result = 0; ++ } else if (!atom_can_be_committed(cd->atom)) { ++ if (should_wait_commit(cd->txnh)) { ++ /* sync(): wait for commit */ ++ cd->atom->nr_waiters++; ++ cd->wait = 1; ++ reiser4_atom_wait_event(cd->atom); ++ result = RETERR(-E_REPEAT); ++ } else { ++ result = 0; ++ } ++ } else if (cd->preflush > 0 && !is_current_ktxnmgrd()) { ++ /* ++ * optimization: flush atom without switching it into ++ * ASTAGE_CAPTURE_WAIT. ++ * ++ * But don't do this for ktxnmgrd, because ktxnmgrd ++ * should never block on atom fusion. ++ */ ++ result = flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS, ++ LONG_MAX, &cd->nr_written, ++ &cd->atom, NULL); ++ if (result == 0) { ++ spin_unlock_atom(cd->atom); ++ cd->preflush = 0; ++ result = RETERR(-E_REPEAT); ++ } else /* Atoms wasn't flushed ++ * completely. Rinse. Repeat. */ ++ --cd->preflush; ++ } else { ++ /* We change atom state to ASTAGE_CAPTURE_WAIT to ++ prevent atom fusion and count ourself as an active ++ flusher */ ++ reiser4_atom_set_stage(cd->atom, ASTAGE_CAPTURE_WAIT); ++ cd->atom->flags |= ATOM_FORCE_COMMIT; ++ ++ result = ++ commit_current_atom(&cd->nr_written, &cd->atom); ++ if (result != 0 && result != -E_REPEAT) ++ cd->failed = 1; ++ } ++ } else ++ result = 0; ++ ++#if REISER4_DEBUG ++ if (result == 0) ++ assert_spin_locked(&(cd->atom->alock)); ++#endif ++ ++ /* perfectly valid assertion, except that when atom/txnh is not locked ++ * fusion can take place, and cd->atom points nowhere. */ ++ /* ++ assert("jmacd-1028", ergo(result != 0, spin_atom_is_not_locked(cd->atom))); ++ */ ++ return result; ++} ++ ++/* Called to commit a transaction handle. This decrements the atom's number of open ++ handles and if it is the last handle to commit and the atom should commit, initiates ++ atom commit. if commit does not fail, return number of written blocks */ ++static int commit_txnh(txn_handle * txnh) ++{ ++ commit_data cd; ++ assert("umka-192", txnh != NULL); ++ ++ memset(&cd, 0, sizeof cd); ++ cd.txnh = txnh; ++ cd.preflush = 10; ++ ++ /* calls try_commit_txnh() until either atom commits, or error ++ * happens */ ++ while (try_commit_txnh(&cd) != 0) ++ reiser4_preempt_point(); ++ ++ spin_lock_txnh(txnh); ++ ++ cd.atom->txnh_count -= 1; ++ txnh->atom = NULL; ++ /* remove transaction handle from atom's list of transaction handles */ ++ list_del_init(&txnh->txnh_link); ++ ++ spin_unlock_txnh(txnh); ++ atom_dec_and_unlock(cd.atom); ++ /* if we don't want to do a commit (TXNH_DONT_COMMIT is set, probably ++ * because it takes time) by current thread, we do that work ++ * asynchronously by ktxnmgrd daemon. */ ++ if (cd.wake_ktxnmgrd_up) ++ ktxnmgrd_kick(&get_current_super_private()->tmgr); ++ ++ return 0; ++} ++ ++/* TRY_CAPTURE */ ++ ++/* This routine attempts a single block-capture request. It may return -E_REPEAT if some ++ condition indicates that the request should be retried, and it may block if the ++ txn_capture mode does not include the TXN_CAPTURE_NONBLOCKING request flag. ++ ++ This routine encodes the basic logic of block capturing described by: ++ ++ http://namesys.com/v4/v4.html ++ ++ Our goal here is to ensure that any two blocks that contain dependent modifications ++ should commit at the same time. This function enforces this discipline by initiating ++ fusion whenever a transaction handle belonging to one atom requests to read or write a ++ block belonging to another atom (TXN_CAPTURE_WRITE or TXN_CAPTURE_READ_ATOMIC). ++ ++ In addition, this routine handles the initial assignment of atoms to blocks and ++ transaction handles. These are possible outcomes of this function: ++ ++ 1. The block and handle are already part of the same atom: return immediate success ++ ++ 2. The block is assigned but the handle is not: call capture_assign_txnh to assign ++ the handle to the block's atom. ++ ++ 3. The handle is assigned but the block is not: call capture_assign_block to assign ++ the block to the handle's atom. ++ ++ 4. Both handle and block are assigned, but to different atoms: call capture_init_fusion ++ to fuse atoms. ++ ++ 5. Neither block nor handle are assigned: create a new atom and assign them both. ++ ++ 6. A read request for a non-captured block: return immediate success. ++ ++ This function acquires and releases the handle's spinlock. This function is called ++ under the jnode lock and if the return value is 0, it returns with the jnode lock still ++ held. If the return is -E_REPEAT or some other error condition, the jnode lock is ++ released. The external interface (reiser4_try_capture) manages re-aquiring the jnode ++ lock in the failure case. ++*/ ++static int try_capture_block( ++ txn_handle * txnh, jnode * node, txn_capture mode, ++ txn_atom ** atom_alloc) ++{ ++ txn_atom *block_atom; ++ txn_atom *txnh_atom; ++ ++ /* Should not call capture for READ_NONCOM requests, handled in reiser4_try_capture. */ ++ assert("jmacd-567", CAPTURE_TYPE(mode) != TXN_CAPTURE_READ_NONCOM); ++ ++ /* FIXME-ZAM-HANS: FIXME_LATER_JMACD Should assert that atom->tree == ++ * node->tree somewhere. */ ++ assert("umka-194", txnh != NULL); ++ assert("umka-195", node != NULL); ++ ++ /* The jnode is already locked! Being called from reiser4_try_capture(). */ ++ assert_spin_locked(&(node->guard)); ++ block_atom = node->atom; ++ ++ /* Get txnh spinlock, this allows us to compare txn_atom pointers but it doesn't ++ let us touch the atoms themselves. */ ++ spin_lock_txnh(txnh); ++ txnh_atom = txnh->atom; ++ /* Process of capturing continues into one of four branches depends on ++ which atoms from (block atom (node->atom), current atom (txnh->atom)) ++ exist. */ ++ if (txnh_atom == NULL) { ++ if (block_atom == NULL) { ++ spin_unlock_txnh(txnh); ++ spin_unlock_jnode(node); ++ /* assign empty atom to the txnh and repeat */ ++ return atom_begin_and_assign_to_txnh(atom_alloc, txnh); ++ } else { ++ atomic_inc(&block_atom->refcount); ++ /* node spin-lock isn't needed anymore */ ++ spin_unlock_jnode(node); ++ if (!spin_trylock_atom(block_atom)) { ++ spin_unlock_txnh(txnh); ++ spin_lock_atom(block_atom); ++ spin_lock_txnh(txnh); ++ } ++ /* re-check state after getting txnh and the node ++ * atom spin-locked */ ++ if (node->atom != block_atom || txnh->atom != NULL) { ++ spin_unlock_txnh(txnh); ++ atom_dec_and_unlock(block_atom); ++ return RETERR(-E_REPEAT); ++ } ++ atomic_dec(&block_atom->refcount); ++ if (block_atom->stage > ASTAGE_CAPTURE_WAIT || ++ (block_atom->stage == ASTAGE_CAPTURE_WAIT && ++ block_atom->txnh_count != 0)) ++ return capture_fuse_wait(txnh, block_atom, NULL, mode); ++ capture_assign_txnh_nolock(block_atom, txnh); ++ spin_unlock_txnh(txnh); ++ spin_unlock_atom(block_atom); ++ return RETERR(-E_REPEAT); ++ } ++ } else { ++ /* It is time to perform deadlock prevention check over the ++ node we want to capture. It is possible this node was locked ++ for read without capturing it. The optimization which allows ++ to do it helps us in keeping atoms independent as long as ++ possible but it may cause lock/fuse deadlock problems. ++ ++ A number of similar deadlock situations with locked but not ++ captured nodes were found. In each situation there are two ++ or more threads: one of them does flushing while another one ++ does routine balancing or tree lookup. The flushing thread ++ (F) sleeps in long term locking request for node (N), another ++ thread (A) sleeps in trying to capture some node already ++ belonging the atom F, F has a state which prevents ++ immediately fusion . ++ ++ Deadlocks of this kind cannot happen if node N was properly ++ captured by thread A. The F thread fuse atoms before locking ++ therefore current atom of thread F and current atom of thread ++ A became the same atom and thread A may proceed. This does ++ not work if node N was not captured because the fusion of ++ atom does not happens. ++ ++ The following scheme solves the deadlock: If ++ longterm_lock_znode locks and does not capture a znode, that ++ znode is marked as MISSED_IN_CAPTURE. A node marked this way ++ is processed by the code below which restores the missed ++ capture and fuses current atoms of all the node lock owners ++ by calling the fuse_not_fused_lock_owners() function. */ ++ if (JF_ISSET(node, JNODE_MISSED_IN_CAPTURE)) { ++ JF_CLR(node, JNODE_MISSED_IN_CAPTURE); ++ if (jnode_is_znode(node) && znode_is_locked(JZNODE(node))) { ++ spin_unlock_txnh(txnh); ++ spin_unlock_jnode(node); ++ fuse_not_fused_lock_owners(txnh, JZNODE(node)); ++ return RETERR(-E_REPEAT); ++ } ++ } ++ if (block_atom == NULL) { ++ atomic_inc(&txnh_atom->refcount); ++ spin_unlock_txnh(txnh); ++ if (!spin_trylock_atom(txnh_atom)) { ++ spin_unlock_jnode(node); ++ spin_lock_atom(txnh_atom); ++ spin_lock_jnode(node); ++ } ++ if (txnh->atom != txnh_atom || node->atom != NULL ++ || JF_ISSET(node, JNODE_IS_DYING)) { ++ spin_unlock_jnode(node); ++ atom_dec_and_unlock(txnh_atom); ++ return RETERR(-E_REPEAT); ++ } ++ atomic_dec(&txnh_atom->refcount); ++ capture_assign_block_nolock(txnh_atom, node); ++ spin_unlock_atom(txnh_atom); ++ } else { ++ if (txnh_atom != block_atom) { ++ if (mode & TXN_CAPTURE_DONT_FUSE) { ++ spin_unlock_txnh(txnh); ++ spin_unlock_jnode(node); ++ /* we are in a "no-fusion" mode and @node is ++ * already part of transaction. */ ++ return RETERR(-E_NO_NEIGHBOR); ++ } ++ return capture_init_fusion(node, txnh, mode); ++ } ++ spin_unlock_txnh(txnh); ++ } ++ } ++ return 0; ++} ++ ++static txn_capture ++build_capture_mode(jnode * node, znode_lock_mode lock_mode, txn_capture flags) ++{ ++ txn_capture cap_mode; ++ ++ assert_spin_locked(&(node->guard)); ++ ++ /* FIXME_JMACD No way to set TXN_CAPTURE_READ_MODIFY yet. */ ++ ++ if (lock_mode == ZNODE_WRITE_LOCK) { ++ cap_mode = TXN_CAPTURE_WRITE; ++ } else if (node->atom != NULL) { ++ cap_mode = TXN_CAPTURE_WRITE; ++ } else if (0 && /* txnh->mode == TXN_READ_FUSING && */ ++ jnode_get_level(node) == LEAF_LEVEL) { ++ /* NOTE-NIKITA TXN_READ_FUSING is not currently used */ ++ /* We only need a READ_FUSING capture at the leaf level. This ++ is because the internal levels of the tree (twigs included) ++ are redundant from the point of the user that asked for a ++ read-fusing transcrash. The user only wants to read-fuse ++ atoms due to reading uncommitted data that another user has ++ written. It is the file system that reads/writes the ++ internal tree levels, the user only reads/writes leaves. */ ++ cap_mode = TXN_CAPTURE_READ_ATOMIC; ++ } else { ++ /* In this case (read lock at a non-leaf) there's no reason to ++ * capture. */ ++ /* cap_mode = TXN_CAPTURE_READ_NONCOM; */ ++ return 0; ++ } ++ ++ cap_mode |= (flags & (TXN_CAPTURE_NONBLOCKING | TXN_CAPTURE_DONT_FUSE)); ++ assert("nikita-3186", cap_mode != 0); ++ return cap_mode; ++} ++ ++/* This is an external interface to try_capture_block(), it calls ++ try_capture_block() repeatedly as long as -E_REPEAT is returned. ++ ++ @node: node to capture, ++ @lock_mode: read or write lock is used in capture mode calculation, ++ @flags: see txn_capture flags enumeration, ++ @can_coc : can copy-on-capture ++ ++ @return: 0 - node was successfully captured, -E_REPEAT - capture request ++ cannot be processed immediately as it was requested in flags, ++ < 0 - other errors. ++*/ ++int reiser4_try_capture(jnode *node, znode_lock_mode lock_mode, ++ txn_capture flags) ++{ ++ txn_atom *atom_alloc = NULL; ++ txn_capture cap_mode; ++ txn_handle *txnh = get_current_context()->trans; ++ int ret; ++ ++ assert_spin_locked(&(node->guard)); ++ ++ repeat: ++ if (JF_ISSET(node, JNODE_IS_DYING)) ++ return RETERR(-EINVAL); ++ if (node->atom != NULL && txnh->atom == node->atom) ++ return 0; ++ cap_mode = build_capture_mode(node, lock_mode, flags); ++ if (cap_mode == 0 || ++ (!(cap_mode & TXN_CAPTURE_WTYPES) && node->atom == NULL)) { ++ /* Mark this node as "MISSED". It helps in further deadlock ++ * analysis */ ++ if (jnode_is_znode(node)) ++ JF_SET(node, JNODE_MISSED_IN_CAPTURE); ++ return 0; ++ } ++ /* Repeat try_capture as long as -E_REPEAT is returned. */ ++ ret = try_capture_block(txnh, node, cap_mode, &atom_alloc); ++ /* Regardless of non_blocking: ++ ++ If ret == 0 then jnode is still locked. ++ If ret != 0 then jnode is unlocked. ++ */ ++#if REISER4_DEBUG ++ if (ret == 0) ++ assert_spin_locked(&(node->guard)); ++ else ++ assert_spin_not_locked(&(node->guard)); ++#endif ++ assert_spin_not_locked(&(txnh->guard)); ++ ++ if (ret == -E_REPEAT) { ++ /* E_REPEAT implies all locks were released, therefore we need ++ to take the jnode's lock again. */ ++ spin_lock_jnode(node); ++ ++ /* Although this may appear to be a busy loop, it is not. ++ There are several conditions that cause E_REPEAT to be ++ returned by the call to try_capture_block, all cases ++ indicating some kind of state change that means you should ++ retry the request and will get a different result. In some ++ cases this could be avoided with some extra code, but ++ generally it is done because the necessary locks were ++ released as a result of the operation and repeating is the ++ simplest thing to do (less bug potential). The cases are: ++ atom fusion returns E_REPEAT after it completes (jnode and ++ txnh were unlocked); race conditions in assign_block, ++ assign_txnh, and init_fusion return E_REPEAT (trylock ++ failure); after going to sleep in capture_fuse_wait ++ (request was blocked but may now succeed). I'm not quite ++ sure how capture_copy works yet, but it may also return ++ E_REPEAT. When the request is legitimately blocked, the ++ requestor goes to sleep in fuse_wait, so this is not a busy ++ loop. */ ++ /* NOTE-NIKITA: still don't understand: ++ ++ try_capture_block->capture_assign_txnh->spin_trylock_atom->E_REPEAT ++ ++ looks like busy loop? ++ */ ++ goto repeat; ++ } ++ ++ /* free extra atom object that was possibly allocated by ++ try_capture_block(). ++ ++ Do this before acquiring jnode spin lock to ++ minimize time spent under lock. --nikita */ ++ if (atom_alloc != NULL) { ++ kmem_cache_free(_atom_slab, atom_alloc); ++ } ++ ++ if (ret != 0) { ++ if (ret == -E_BLOCK) { ++ assert("nikita-3360", ++ cap_mode & TXN_CAPTURE_NONBLOCKING); ++ ret = -E_REPEAT; ++ } ++ ++ /* Failure means jnode is not locked. FIXME_LATER_JMACD May ++ want to fix the above code to avoid releasing the lock and ++ re-acquiring it, but there are cases were failure occurs ++ when the lock is not held, and those cases would need to be ++ modified to re-take the lock. */ ++ spin_lock_jnode(node); ++ } ++ ++ /* Jnode is still locked. */ ++ assert_spin_locked(&(node->guard)); ++ return ret; ++} ++ ++static void release_two_atoms(txn_atom *one, txn_atom *two) ++{ ++ spin_unlock_atom(one); ++ atom_dec_and_unlock(two); ++ spin_lock_atom(one); ++ atom_dec_and_unlock(one); ++} ++ ++/* This function sets up a call to try_capture_block and repeats as long as -E_REPEAT is ++ returned by that routine. The txn_capture request mode is computed here depending on ++ the transaction handle's type and the lock request. This is called from the depths of ++ the lock manager with the jnode lock held and it always returns with the jnode lock ++ held. ++*/ ++ ++/* fuse all 'active' atoms of lock owners of given node. */ ++static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node) ++{ ++ lock_handle *lh; ++ int repeat; ++ txn_atom *atomh, *atomf; ++ reiser4_context *me = get_current_context(); ++ reiser4_context *ctx = NULL; ++ ++ assert_spin_not_locked(&(ZJNODE(node)->guard)); ++ assert_spin_not_locked(&(txnh->hlock)); ++ ++ repeat: ++ repeat = 0; ++ atomh = txnh_get_atom(txnh); ++ spin_unlock_txnh(txnh); ++ assert("zam-692", atomh != NULL); ++ ++ spin_lock_zlock(&node->lock); ++ /* inspect list of lock owners */ ++ list_for_each_entry(lh, &node->lock.owners, owners_link) { ++ ctx = get_context_by_lock_stack(lh->owner); ++ if (ctx == me) ++ continue; ++ /* below we use two assumptions to avoid addition spin-locks ++ for checking the condition : ++ ++ 1) if the lock stack has lock, the transaction should be ++ opened, i.e. ctx->trans != NULL; ++ ++ 2) reading of well-aligned ctx->trans->atom is atomic, if it ++ equals to the address of spin-locked atomh, we take that ++ the atoms are the same, nothing has to be captured. */ ++ if (atomh != ctx->trans->atom) { ++ reiser4_wake_up(lh->owner); ++ repeat = 1; ++ break; ++ } ++ } ++ if (repeat) { ++ if (!spin_trylock_txnh(ctx->trans)) { ++ spin_unlock_zlock(&node->lock); ++ spin_unlock_atom(atomh); ++ goto repeat; ++ } ++ atomf = ctx->trans->atom; ++ if (atomf == NULL) { ++ capture_assign_txnh_nolock(atomh, ctx->trans); ++ /* release zlock lock _after_ assigning the atom to the ++ * transaction handle, otherwise the lock owner thread ++ * may unlock all znodes, exit kernel context and here ++ * we would access an invalid transaction handle. */ ++ spin_unlock_zlock(&node->lock); ++ spin_unlock_atom(atomh); ++ spin_unlock_txnh(ctx->trans); ++ goto repeat; ++ } ++ assert("zam-1059", atomf != atomh); ++ spin_unlock_zlock(&node->lock); ++ atomic_inc(&atomh->refcount); ++ atomic_inc(&atomf->refcount); ++ spin_unlock_txnh(ctx->trans); ++ if (atomf > atomh) { ++ spin_lock_atom_nested(atomf); ++ } else { ++ spin_unlock_atom(atomh); ++ spin_lock_atom(atomf); ++ spin_lock_atom_nested(atomh); ++ } ++ if (atomh == atomf || !atom_isopen(atomh) || !atom_isopen(atomf)) { ++ release_two_atoms(atomf, atomh); ++ goto repeat; ++ } ++ atomic_dec(&atomh->refcount); ++ atomic_dec(&atomf->refcount); ++ capture_fuse_into(atomf, atomh); ++ goto repeat; ++ } ++ spin_unlock_zlock(&node->lock); ++ spin_unlock_atom(atomh); ++} ++ ++/* This is the interface to capture unformatted nodes via their struct page ++ reference. Currently it is only used in reiser4_invalidatepage */ ++int try_capture_page_to_invalidate(struct page *pg) ++{ ++ int ret; ++ jnode *node; ++ ++ assert("umka-292", pg != NULL); ++ assert("nikita-2597", PageLocked(pg)); ++ ++ if (IS_ERR(node = jnode_of_page(pg))) { ++ return PTR_ERR(node); ++ } ++ ++ spin_lock_jnode(node); ++ unlock_page(pg); ++ ++ ret = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0); ++ spin_unlock_jnode(node); ++ jput(node); ++ lock_page(pg); ++ return ret; ++} ++ ++/* This informs the transaction manager when a node is deleted. Add the block to the ++ atom's delete set and uncapture the block. ++ ++VS-FIXME-HANS: this E_REPEAT paradigm clutters the code and creates a need for ++explanations. find all the functions that use it, and unless there is some very ++good reason to use it (I have not noticed one so far and I doubt it exists, but maybe somewhere somehow....), ++move the loop to inside the function. ++ ++VS-FIXME-HANS: can this code be at all streamlined? In particular, can you lock and unlock the jnode fewer times? ++ */ ++void reiser4_uncapture_page(struct page *pg) ++{ ++ jnode *node; ++ txn_atom *atom; ++ ++ assert("umka-199", pg != NULL); ++ assert("nikita-3155", PageLocked(pg)); ++ ++ clear_page_dirty_for_io(pg); ++ ++ reiser4_wait_page_writeback(pg); ++ ++ node = jprivate(pg); ++ BUG_ON(node == NULL); ++ ++ spin_lock_jnode(node); ++ ++ atom = jnode_get_atom(node); ++ if (atom == NULL) { ++ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY)); ++ spin_unlock_jnode(node); ++ return; ++ } ++ ++ /* We can remove jnode from transaction even if it is on flush queue ++ * prepped list, we only need to be sure that flush queue is not being ++ * written by reiser4_write_fq(). reiser4_write_fq() does not use atom ++ * spin lock for protection of the prepped nodes list, instead ++ * write_fq() increments atom's nr_running_queues counters for the time ++ * when prepped list is not protected by spin lock. Here we check this ++ * counter if we want to remove jnode from flush queue and, if the ++ * counter is not zero, wait all reiser4_write_fq() for this atom to ++ * complete. This is not significant overhead. */ ++ while (JF_ISSET(node, JNODE_FLUSH_QUEUED) && atom->nr_running_queues) { ++ spin_unlock_jnode(node); ++ /* ++ * at this moment we want to wait for "atom event", viz. wait ++ * until @node can be removed from flush queue. But ++ * reiser4_atom_wait_event() cannot be called with page locked, ++ * because it deadlocks with jnode_extent_write(). Unlock page, ++ * after making sure (through page_cache_get()) that it cannot ++ * be released from memory. ++ */ ++ page_cache_get(pg); ++ unlock_page(pg); ++ reiser4_atom_wait_event(atom); ++ lock_page(pg); ++ /* ++ * page may has been detached by ->writepage()->releasepage(). ++ */ ++ reiser4_wait_page_writeback(pg); ++ spin_lock_jnode(node); ++ page_cache_release(pg); ++ atom = jnode_get_atom(node); ++/* VS-FIXME-HANS: improve the commenting in this function */ ++ if (atom == NULL) { ++ spin_unlock_jnode(node); ++ return; ++ } ++ } ++ reiser4_uncapture_block(node); ++ spin_unlock_atom(atom); ++ jput(node); ++} ++ ++/* this is used in extent's kill hook to uncapture and unhash jnodes attached to ++ * inode's tree of jnodes */ ++void reiser4_uncapture_jnode(jnode * node) ++{ ++ txn_atom *atom; ++ ++ assert_spin_locked(&(node->guard)); ++ assert("", node->pg == 0); ++ ++ atom = jnode_get_atom(node); ++ if (atom == NULL) { ++ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY)); ++ spin_unlock_jnode(node); ++ return; ++ } ++ ++ reiser4_uncapture_block(node); ++ spin_unlock_atom(atom); ++ jput(node); ++} ++ ++/* No-locking version of assign_txnh. Sets the transaction handle's atom pointer, ++ increases atom refcount and txnh_count, adds to txnh_list. */ ++static void capture_assign_txnh_nolock(txn_atom *atom, txn_handle *txnh) ++{ ++ assert("umka-200", atom != NULL); ++ assert("umka-201", txnh != NULL); ++ ++ assert_spin_locked(&(txnh->hlock)); ++ assert_spin_locked(&(atom->alock)); ++ assert("jmacd-824", txnh->atom == NULL); ++ assert("nikita-3540", atom_isopen(atom)); ++ BUG_ON(txnh->atom != NULL); ++ ++ atomic_inc(&atom->refcount); ++ txnh->atom = atom; ++ reiser4_ctx_gfp_mask_set(); ++ list_add_tail(&txnh->txnh_link, &atom->txnh_list); ++ atom->txnh_count += 1; ++} ++ ++/* No-locking version of assign_block. Sets the block's atom pointer, references the ++ block, adds it to the clean or dirty capture_jnode list, increments capture_count. */ ++static void capture_assign_block_nolock(txn_atom *atom, jnode *node) ++{ ++ assert("umka-202", atom != NULL); ++ assert("umka-203", node != NULL); ++ assert_spin_locked(&(node->guard)); ++ assert_spin_locked(&(atom->alock)); ++ assert("jmacd-323", node->atom == NULL); ++ BUG_ON(!list_empty_careful(&node->capture_link)); ++ assert("nikita-3470", !JF_ISSET(node, JNODE_DIRTY)); ++ ++ /* Pointer from jnode to atom is not counted in atom->refcount. */ ++ node->atom = atom; ++ ++ list_add_tail(&node->capture_link, ATOM_CLEAN_LIST(atom)); ++ atom->capture_count += 1; ++ /* reference to jnode is acquired by atom. */ ++ jref(node); ++ ++ ON_DEBUG(count_jnode(atom, node, NOT_CAPTURED, CLEAN_LIST, 1)); ++ ++ LOCK_CNT_INC(t_refs); ++} ++ ++/* common code for dirtying both unformatted jnodes and formatted znodes. */ ++static void do_jnode_make_dirty(jnode * node, txn_atom * atom) ++{ ++ assert_spin_locked(&(node->guard)); ++ assert_spin_locked(&(atom->alock)); ++ assert("jmacd-3981", !JF_ISSET(node, JNODE_DIRTY)); ++ ++ JF_SET(node, JNODE_DIRTY); ++ ++ get_current_context()->nr_marked_dirty++; ++ ++ /* We grab2flush_reserve one additional block only if node was ++ not CREATED and jnode_flush did not sort it into neither ++ relocate set nor overwrite one. If node is in overwrite or ++ relocate set we assume that atom's flush reserved counter was ++ already adjusted. */ ++ if (!JF_ISSET(node, JNODE_CREATED) && !JF_ISSET(node, JNODE_RELOC) ++ && !JF_ISSET(node, JNODE_OVRWR) && jnode_is_leaf(node) ++ && !jnode_is_cluster_page(node)) { ++ assert("vs-1093", !reiser4_blocknr_is_fake(&node->blocknr)); ++ assert("vs-1506", *jnode_get_block(node) != 0); ++ grabbed2flush_reserved_nolock(atom, (__u64) 1); ++ JF_SET(node, JNODE_FLUSH_RESERVED); ++ } ++ ++ if (!JF_ISSET(node, JNODE_FLUSH_QUEUED)) { ++ /* If the atom is not set yet, it will be added to the appropriate list in ++ capture_assign_block_nolock. */ ++ /* Sometimes a node is set dirty before being captured -- the case for new ++ jnodes. In that case the jnode will be added to the appropriate list ++ in capture_assign_block_nolock. Another reason not to re-link jnode is ++ that jnode is on a flush queue (see flush.c for details) */ ++ ++ int level = jnode_get_level(node); ++ ++ assert("nikita-3152", !JF_ISSET(node, JNODE_OVRWR)); ++ assert("zam-654", atom->stage < ASTAGE_PRE_COMMIT); ++ assert("nikita-2607", 0 <= level); ++ assert("nikita-2606", level <= REAL_MAX_ZTREE_HEIGHT); ++ ++ /* move node to atom's dirty list */ ++ list_move_tail(&node->capture_link, ATOM_DIRTY_LIST(atom, level)); ++ ON_DEBUG(count_jnode ++ (atom, node, NODE_LIST(node), DIRTY_LIST, 1)); ++ } ++} ++ ++/* Set the dirty status for this (spin locked) jnode. */ ++void jnode_make_dirty_locked(jnode * node) ++{ ++ assert("umka-204", node != NULL); ++ assert_spin_locked(&(node->guard)); ++ ++ if (REISER4_DEBUG && rofs_jnode(node)) { ++ warning("nikita-3365", "Dirtying jnode on rofs"); ++ dump_stack(); ++ } ++ ++ /* Fast check for already dirty node */ ++ if (!JF_ISSET(node, JNODE_DIRTY)) { ++ txn_atom *atom; ++ ++ atom = jnode_get_atom(node); ++ assert("vs-1094", atom); ++ /* Check jnode dirty status again because node spin lock might ++ * be released inside jnode_get_atom(). */ ++ if (likely(!JF_ISSET(node, JNODE_DIRTY))) ++ do_jnode_make_dirty(node, atom); ++ spin_unlock_atom(atom); ++ } ++} ++ ++/* Set the dirty status for this znode. */ ++void znode_make_dirty(znode * z) ++{ ++ jnode *node; ++ struct page *page; ++ ++ assert("umka-204", z != NULL); ++ assert("nikita-3290", znode_above_root(z) || znode_is_loaded(z)); ++ assert("nikita-3560", znode_is_write_locked(z)); ++ ++ node = ZJNODE(z); ++ /* znode is longterm locked, we can check dirty bit without spinlock */ ++ if (JF_ISSET(node, JNODE_DIRTY)) { ++ /* znode is dirty already. All we have to do is to change znode version */ ++ z->version = znode_build_version(jnode_get_tree(node)); ++ return; ++ } ++ ++ spin_lock_jnode(node); ++ jnode_make_dirty_locked(node); ++ page = jnode_page(node); ++ if (page != NULL) { ++ /* this is useful assertion (allows one to check that no ++ * modifications are lost due to update of in-flight page), ++ * but it requires locking on page to check PG_writeback ++ * bit. */ ++ /* assert("nikita-3292", ++ !PageWriteback(page) || ZF_ISSET(z, JNODE_WRITEBACK)); */ ++ page_cache_get(page); ++ ++ /* jnode lock is not needed for the rest of ++ * znode_set_dirty(). */ ++ spin_unlock_jnode(node); ++ /* reiser4 file write code calls set_page_dirty for ++ * unformatted nodes, for formatted nodes we do it here. */ ++ reiser4_set_page_dirty_internal(page); ++ page_cache_release(page); ++ /* bump version counter in znode */ ++ z->version = znode_build_version(jnode_get_tree(node)); ++ } else { ++ assert("zam-596", znode_above_root(JZNODE(node))); ++ spin_unlock_jnode(node); ++ } ++ ++ assert("nikita-1900", znode_is_write_locked(z)); ++ assert("jmacd-9777", node->atom != NULL); ++} ++ ++int reiser4_sync_atom(txn_atom * atom) ++{ ++ int result; ++ txn_handle *txnh; ++ ++ txnh = get_current_context()->trans; ++ ++ result = 0; ++ if (atom != NULL) { ++ if (atom->stage < ASTAGE_PRE_COMMIT) { ++ spin_lock_txnh(txnh); ++ capture_assign_txnh_nolock(atom, txnh); ++ result = force_commit_atom(txnh); ++ } else if (atom->stage < ASTAGE_POST_COMMIT) { ++ /* wait atom commit */ ++ reiser4_atom_wait_event(atom); ++ /* try once more */ ++ result = RETERR(-E_REPEAT); ++ } else ++ spin_unlock_atom(atom); ++ } ++ return result; ++} ++ ++#if REISER4_DEBUG ++ ++/* move jnode form one list to another ++ call this after atom->capture_count is updated */ ++void ++count_jnode(txn_atom * atom, jnode * node, atom_list old_list, ++ atom_list new_list, int check_lists) ++{ ++ struct list_head *pos; ++ ++ assert("zam-1018", atom_is_protected(atom)); ++ assert_spin_locked(&(node->guard)); ++ assert("", NODE_LIST(node) == old_list); ++ ++ switch (NODE_LIST(node)) { ++ case NOT_CAPTURED: ++ break; ++ case DIRTY_LIST: ++ assert("", atom->dirty > 0); ++ atom->dirty--; ++ break; ++ case CLEAN_LIST: ++ assert("", atom->clean > 0); ++ atom->clean--; ++ break; ++ case FQ_LIST: ++ assert("", atom->fq > 0); ++ atom->fq--; ++ break; ++ case WB_LIST: ++ assert("", atom->wb > 0); ++ atom->wb--; ++ break; ++ case OVRWR_LIST: ++ assert("", atom->ovrwr > 0); ++ atom->ovrwr--; ++ break; ++ default: ++ impossible("", ""); ++ } ++ ++ switch (new_list) { ++ case NOT_CAPTURED: ++ break; ++ case DIRTY_LIST: ++ atom->dirty++; ++ break; ++ case CLEAN_LIST: ++ atom->clean++; ++ break; ++ case FQ_LIST: ++ atom->fq++; ++ break; ++ case WB_LIST: ++ atom->wb++; ++ break; ++ case OVRWR_LIST: ++ atom->ovrwr++; ++ break; ++ default: ++ impossible("", ""); ++ } ++ ASSIGN_NODE_LIST(node, new_list); ++ if (0 && check_lists) { ++ int count; ++ tree_level level; ++ ++ count = 0; ++ ++ /* flush queue list */ ++ /* reiser4_check_fq(atom); */ ++ ++ /* dirty list */ ++ count = 0; ++ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) { ++ list_for_each(pos, ATOM_DIRTY_LIST(atom, level)) ++ count++; ++ } ++ if (count != atom->dirty) ++ warning("", "dirty counter %d, real %d\n", atom->dirty, ++ count); ++ ++ /* clean list */ ++ count = 0; ++ list_for_each(pos, ATOM_CLEAN_LIST(atom)) ++ count++; ++ if (count != atom->clean) ++ warning("", "clean counter %d, real %d\n", atom->clean, ++ count); ++ ++ /* wb list */ ++ count = 0; ++ list_for_each(pos, ATOM_WB_LIST(atom)) ++ count++; ++ if (count != atom->wb) ++ warning("", "wb counter %d, real %d\n", atom->wb, ++ count); ++ ++ /* overwrite list */ ++ count = 0; ++ list_for_each(pos, ATOM_OVRWR_LIST(atom)) ++ count++; ++ ++ if (count != atom->ovrwr) ++ warning("", "ovrwr counter %d, real %d\n", atom->ovrwr, ++ count); ++ } ++ assert("vs-1624", atom->num_queued == atom->fq); ++ if (atom->capture_count != ++ atom->dirty + atom->clean + atom->ovrwr + atom->wb + atom->fq) { ++ printk ++ ("count %d, dirty %d clean %d ovrwr %d wb %d fq %d\n", ++ atom->capture_count, atom->dirty, atom->clean, atom->ovrwr, ++ atom->wb, atom->fq); ++ assert("vs-1622", ++ atom->capture_count == ++ atom->dirty + atom->clean + atom->ovrwr + atom->wb + ++ atom->fq); ++ } ++} ++ ++#endif ++ ++/* Make node OVRWR and put it on atom->overwrite_nodes list, atom lock and jnode ++ * lock should be taken before calling this function. */ ++void jnode_make_wander_nolock(jnode * node) ++{ ++ txn_atom *atom; ++ ++ assert("nikita-2431", node != NULL); ++ assert("nikita-2432", !JF_ISSET(node, JNODE_RELOC)); ++ assert("nikita-3153", JF_ISSET(node, JNODE_DIRTY)); ++ assert("zam-897", !JF_ISSET(node, JNODE_FLUSH_QUEUED)); ++ assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node))); ++ ++ atom = node->atom; ++ ++ assert("zam-895", atom != NULL); ++ assert("zam-894", atom_is_protected(atom)); ++ ++ JF_SET(node, JNODE_OVRWR); ++ /* move node to atom's overwrite list */ ++ list_move_tail(&node->capture_link, ATOM_OVRWR_LIST(atom)); ++ ON_DEBUG(count_jnode(atom, node, DIRTY_LIST, OVRWR_LIST, 1)); ++} ++ ++/* Same as jnode_make_wander_nolock, but all necessary locks are taken inside ++ * this function. */ ++void jnode_make_wander(jnode * node) ++{ ++ txn_atom *atom; ++ ++ spin_lock_jnode(node); ++ atom = jnode_get_atom(node); ++ assert("zam-913", atom != NULL); ++ assert("zam-914", !JF_ISSET(node, JNODE_RELOC)); ++ ++ jnode_make_wander_nolock(node); ++ spin_unlock_atom(atom); ++ spin_unlock_jnode(node); ++} ++ ++/* this just sets RELOC bit */ ++static void jnode_make_reloc_nolock(flush_queue_t * fq, jnode * node) ++{ ++ assert_spin_locked(&(node->guard)); ++ assert("zam-916", JF_ISSET(node, JNODE_DIRTY)); ++ assert("zam-917", !JF_ISSET(node, JNODE_RELOC)); ++ assert("zam-918", !JF_ISSET(node, JNODE_OVRWR)); ++ assert("zam-920", !JF_ISSET(node, JNODE_FLUSH_QUEUED)); ++ assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node))); ++ jnode_set_reloc(node); ++} ++ ++/* Make znode RELOC and put it on flush queue */ ++void znode_make_reloc(znode * z, flush_queue_t * fq) ++{ ++ jnode *node; ++ txn_atom *atom; ++ ++ node = ZJNODE(z); ++ spin_lock_jnode(node); ++ ++ atom = jnode_get_atom(node); ++ assert("zam-919", atom != NULL); ++ ++ jnode_make_reloc_nolock(fq, node); ++ queue_jnode(fq, node); ++ ++ spin_unlock_atom(atom); ++ spin_unlock_jnode(node); ++ ++} ++ ++/* Make unformatted node RELOC and put it on flush queue */ ++void unformatted_make_reloc(jnode *node, flush_queue_t *fq) ++{ ++ assert("vs-1479", jnode_is_unformatted(node)); ++ ++ jnode_make_reloc_nolock(fq, node); ++ queue_jnode(fq, node); ++} ++ ++int reiser4_capture_super_block(struct super_block *s) ++{ ++ int result; ++ znode *uber; ++ lock_handle lh; ++ ++ init_lh(&lh); ++ result = get_uber_znode(reiser4_get_tree(s), ++ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI, &lh); ++ if (result) ++ return result; ++ ++ uber = lh.node; ++ /* Grabbing one block for superblock */ ++ result = reiser4_grab_space_force((__u64) 1, BA_RESERVED); ++ if (result != 0) ++ return result; ++ ++ znode_make_dirty(uber); ++ ++ done_lh(&lh); ++ return 0; ++} ++ ++/* Wakeup every handle on the atom's WAITFOR list */ ++static void wakeup_atom_waitfor_list(txn_atom * atom) ++{ ++ txn_wait_links *wlinks; ++ ++ assert("umka-210", atom != NULL); ++ ++ /* atom is locked */ ++ list_for_each_entry(wlinks, &atom->fwaitfor_list, _fwaitfor_link) { ++ if (wlinks->waitfor_cb == NULL || ++ wlinks->waitfor_cb(atom, wlinks)) ++ /* Wake up. */ ++ reiser4_wake_up(wlinks->_lock_stack); ++ } ++} ++ ++/* Wakeup every handle on the atom's WAITING list */ ++static void wakeup_atom_waiting_list(txn_atom * atom) ++{ ++ txn_wait_links *wlinks; ++ ++ assert("umka-211", atom != NULL); ++ ++ /* atom is locked */ ++ list_for_each_entry(wlinks, &atom->fwaiting_list, _fwaiting_link) { ++ if (wlinks->waiting_cb == NULL || ++ wlinks->waiting_cb(atom, wlinks)) ++ /* Wake up. */ ++ reiser4_wake_up(wlinks->_lock_stack); ++ } ++} ++ ++/* helper function used by capture_fuse_wait() to avoid "spurious wake-ups" */ ++static int wait_for_fusion(txn_atom * atom, txn_wait_links * wlinks) ++{ ++ assert("nikita-3330", atom != NULL); ++ assert_spin_locked(&(atom->alock)); ++ ++ /* atom->txnh_count == 1 is for waking waiters up if we are releasing ++ * last transaction handle. */ ++ return atom->stage != ASTAGE_CAPTURE_WAIT || atom->txnh_count == 1; ++} ++ ++/* The general purpose of this function is to wait on the first of two possible events. ++ The situation is that a handle (and its atom atomh) is blocked trying to capture a ++ block (i.e., node) but the node's atom (atomf) is in the CAPTURE_WAIT state. The ++ handle's atom (atomh) is not in the CAPTURE_WAIT state. However, atomh could fuse with ++ another atom or, due to age, enter the CAPTURE_WAIT state itself, at which point it ++ needs to unblock the handle to avoid deadlock. When the txnh is unblocked it will ++ proceed and fuse the two atoms in the CAPTURE_WAIT state. ++ ++ In other words, if either atomh or atomf change state, the handle will be awakened, ++ thus there are two lists per atom: WAITING and WAITFOR. ++ ++ This is also called by capture_assign_txnh with (atomh == NULL) to wait for atomf to ++ close but it is not assigned to an atom of its own. ++ ++ Lock ordering in this method: all four locks are held: JNODE_LOCK, TXNH_LOCK, ++ BOTH_ATOM_LOCKS. Result: all four locks are released. ++*/ ++static int capture_fuse_wait(txn_handle * txnh, txn_atom * atomf, ++ txn_atom * atomh, txn_capture mode) ++{ ++ int ret; ++ txn_wait_links wlinks; ++ ++ assert("umka-213", txnh != NULL); ++ assert("umka-214", atomf != NULL); ++ ++ if ((mode & TXN_CAPTURE_NONBLOCKING) != 0) { ++ spin_unlock_txnh(txnh); ++ spin_unlock_atom(atomf); ++ ++ if (atomh) { ++ spin_unlock_atom(atomh); ++ } ++ ++ return RETERR(-E_BLOCK); ++ } ++ ++ /* Initialize the waiting list links. */ ++ init_wlinks(&wlinks); ++ ++ /* Add txnh to atomf's waitfor list, unlock atomf. */ ++ list_add_tail(&wlinks._fwaitfor_link, &atomf->fwaitfor_list); ++ wlinks.waitfor_cb = wait_for_fusion; ++ atomic_inc(&atomf->refcount); ++ spin_unlock_atom(atomf); ++ ++ if (atomh) { ++ /* Add txnh to atomh's waiting list, unlock atomh. */ ++ list_add_tail(&wlinks._fwaiting_link, &atomh->fwaiting_list); ++ atomic_inc(&atomh->refcount); ++ spin_unlock_atom(atomh); ++ } ++ ++ /* Go to sleep. */ ++ spin_unlock_txnh(txnh); ++ ++ ret = reiser4_prepare_to_sleep(wlinks._lock_stack); ++ if (ret == 0) { ++ reiser4_go_to_sleep(wlinks._lock_stack); ++ ret = RETERR(-E_REPEAT); ++ } ++ ++ /* Remove from the waitfor list. */ ++ spin_lock_atom(atomf); ++ ++ list_del(&wlinks._fwaitfor_link); ++ atom_dec_and_unlock(atomf); ++ ++ if (atomh) { ++ /* Remove from the waiting list. */ ++ spin_lock_atom(atomh); ++ list_del(&wlinks._fwaiting_link); ++ atom_dec_and_unlock(atomh); ++ } ++ return ret; ++} ++ ++static void lock_two_atoms(txn_atom * one, txn_atom * two) ++{ ++ assert("zam-1067", one != two); ++ ++ /* lock the atom with lesser address first */ ++ if (one < two) { ++ spin_lock_atom(one); ++ spin_lock_atom_nested(two); ++ } else { ++ spin_lock_atom(two); ++ spin_lock_atom_nested(one); ++ } ++} ++ ++/* Perform the necessary work to prepare for fusing two atoms, which involves ++ * acquiring two atom locks in the proper order. If one of the node's atom is ++ * blocking fusion (i.e., it is in the CAPTURE_WAIT stage) and the handle's ++ * atom is not then the handle's request is put to sleep. If the node's atom ++ * is committing, then the node can be copy-on-captured. Otherwise, pick the ++ * atom with fewer pointers to be fused into the atom with more pointer and ++ * call capture_fuse_into. ++ */ ++static int capture_init_fusion(jnode *node, txn_handle *txnh, txn_capture mode) ++{ ++ txn_atom * txnh_atom = txnh->atom; ++ txn_atom * block_atom = node->atom; ++ ++ atomic_inc(&txnh_atom->refcount); ++ atomic_inc(&block_atom->refcount); ++ ++ spin_unlock_txnh(txnh); ++ spin_unlock_jnode(node); ++ ++ lock_two_atoms(txnh_atom, block_atom); ++ ++ if (txnh->atom != txnh_atom || node->atom != block_atom ) { ++ release_two_atoms(txnh_atom, block_atom); ++ return RETERR(-E_REPEAT); ++ } ++ ++ atomic_dec(&txnh_atom->refcount); ++ atomic_dec(&block_atom->refcount); ++ ++ assert ("zam-1066", atom_isopen(txnh_atom)); ++ ++ if (txnh_atom->stage >= block_atom->stage || ++ (block_atom->stage == ASTAGE_CAPTURE_WAIT && block_atom->txnh_count == 0)) { ++ capture_fuse_into(txnh_atom, block_atom); ++ return RETERR(-E_REPEAT); ++ } ++ spin_lock_txnh(txnh); ++ return capture_fuse_wait(txnh, block_atom, txnh_atom, mode); ++} ++ ++/* This function splices together two jnode lists (small and large) and sets all jnodes in ++ the small list to point to the large atom. Returns the length of the list. */ ++static int ++capture_fuse_jnode_lists(txn_atom *large, struct list_head *large_head, ++ struct list_head *small_head) ++{ ++ int count = 0; ++ jnode *node; ++ ++ assert("umka-218", large != NULL); ++ assert("umka-219", large_head != NULL); ++ assert("umka-220", small_head != NULL); ++ /* small atom should be locked also. */ ++ assert_spin_locked(&(large->alock)); ++ ++ /* For every jnode on small's capture list... */ ++ list_for_each_entry(node, small_head, capture_link) { ++ count += 1; ++ ++ /* With the jnode lock held, update atom pointer. */ ++ spin_lock_jnode(node); ++ node->atom = large; ++ spin_unlock_jnode(node); ++ } ++ ++ /* Splice the lists. */ ++ list_splice_init(small_head, large_head->prev); ++ ++ return count; ++} ++ ++/* This function splices together two txnh lists (small and large) and sets all txn handles in ++ the small list to point to the large atom. Returns the length of the list. */ ++static int ++capture_fuse_txnh_lists(txn_atom *large, struct list_head *large_head, ++ struct list_head *small_head) ++{ ++ int count = 0; ++ txn_handle *txnh; ++ ++ assert("umka-221", large != NULL); ++ assert("umka-222", large_head != NULL); ++ assert("umka-223", small_head != NULL); ++ ++ /* Adjust every txnh to the new atom. */ ++ list_for_each_entry(txnh, small_head, txnh_link) { ++ count += 1; ++ ++ /* With the txnh lock held, update atom pointer. */ ++ spin_lock_txnh(txnh); ++ txnh->atom = large; ++ spin_unlock_txnh(txnh); ++ } ++ ++ /* Splice the txn_handle list. */ ++ list_splice_init(small_head, large_head->prev); ++ ++ return count; ++} ++ ++/* This function fuses two atoms. The captured nodes and handles belonging to SMALL are ++ added to LARGE and their ->atom pointers are all updated. The associated counts are ++ updated as well, and any waiting handles belonging to either are awakened. Finally the ++ smaller atom's refcount is decremented. ++*/ ++static void capture_fuse_into(txn_atom * small, txn_atom * large) ++{ ++ int level; ++ unsigned zcount = 0; ++ unsigned tcount = 0; ++ ++ assert("umka-224", small != NULL); ++ assert("umka-225", small != NULL); ++ ++ assert_spin_locked(&(large->alock)); ++ assert_spin_locked(&(small->alock)); ++ ++ assert("jmacd-201", atom_isopen(small)); ++ assert("jmacd-202", atom_isopen(large)); ++ ++ /* Splice and update the per-level dirty jnode lists */ ++ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) { ++ zcount += ++ capture_fuse_jnode_lists(large, ++ ATOM_DIRTY_LIST(large, level), ++ ATOM_DIRTY_LIST(small, level)); ++ } ++ ++ /* Splice and update the [clean,dirty] jnode and txnh lists */ ++ zcount += ++ capture_fuse_jnode_lists(large, ATOM_CLEAN_LIST(large), ++ ATOM_CLEAN_LIST(small)); ++ zcount += ++ capture_fuse_jnode_lists(large, ATOM_OVRWR_LIST(large), ++ ATOM_OVRWR_LIST(small)); ++ zcount += ++ capture_fuse_jnode_lists(large, ATOM_WB_LIST(large), ++ ATOM_WB_LIST(small)); ++ zcount += ++ capture_fuse_jnode_lists(large, &large->inodes, &small->inodes); ++ tcount += ++ capture_fuse_txnh_lists(large, &large->txnh_list, ++ &small->txnh_list); ++ ++ /* Check our accounting. */ ++ assert("jmacd-1063", ++ zcount + small->num_queued == small->capture_count); ++ assert("jmacd-1065", tcount == small->txnh_count); ++ ++ /* sum numbers of waiters threads */ ++ large->nr_waiters += small->nr_waiters; ++ small->nr_waiters = 0; ++ ++ /* splice flush queues */ ++ reiser4_fuse_fq(large, small); ++ ++ /* update counter of jnode on every atom' list */ ++ ON_DEBUG(large->dirty += small->dirty; ++ small->dirty = 0; ++ large->clean += small->clean; ++ small->clean = 0; ++ large->ovrwr += small->ovrwr; ++ small->ovrwr = 0; ++ large->wb += small->wb; ++ small->wb = 0; ++ large->fq += small->fq; ++ small->fq = 0;); ++ ++ /* count flushers in result atom */ ++ large->nr_flushers += small->nr_flushers; ++ small->nr_flushers = 0; ++ ++ /* update counts of flushed nodes */ ++ large->flushed += small->flushed; ++ small->flushed = 0; ++ ++ /* Transfer list counts to large. */ ++ large->txnh_count += small->txnh_count; ++ large->capture_count += small->capture_count; ++ ++ /* Add all txnh references to large. */ ++ atomic_add(small->txnh_count, &large->refcount); ++ atomic_sub(small->txnh_count, &small->refcount); ++ ++ /* Reset small counts */ ++ small->txnh_count = 0; ++ small->capture_count = 0; ++ ++ /* Assign the oldest start_time, merge flags. */ ++ large->start_time = min(large->start_time, small->start_time); ++ large->flags |= small->flags; ++ ++ /* Merge blocknr sets. */ ++ blocknr_set_merge(&small->delete_set, &large->delete_set); ++ blocknr_set_merge(&small->wandered_map, &large->wandered_map); ++ ++ /* Merge allocated/deleted file counts */ ++ large->nr_objects_deleted += small->nr_objects_deleted; ++ large->nr_objects_created += small->nr_objects_created; ++ ++ small->nr_objects_deleted = 0; ++ small->nr_objects_created = 0; ++ ++ /* Merge allocated blocks counts */ ++ large->nr_blocks_allocated += small->nr_blocks_allocated; ++ ++ large->nr_running_queues += small->nr_running_queues; ++ small->nr_running_queues = 0; ++ ++ /* Merge blocks reserved for overwrite set. */ ++ large->flush_reserved += small->flush_reserved; ++ small->flush_reserved = 0; ++ ++ if (large->stage < small->stage) { ++ /* Large only needs to notify if it has changed state. */ ++ reiser4_atom_set_stage(large, small->stage); ++ wakeup_atom_waiting_list(large); ++ } ++ ++ reiser4_atom_set_stage(small, ASTAGE_INVALID); ++ ++ /* Notify any waiters--small needs to unload its wait lists. Waiters ++ actually remove themselves from the list before returning from the ++ fuse_wait function. */ ++ wakeup_atom_waiting_list(small); ++ ++ /* Unlock atoms */ ++ spin_unlock_atom(large); ++ atom_dec_and_unlock(small); ++} ++ ++/* TXNMGR STUFF */ ++ ++/* Release a block from the atom, reversing the effects of being captured, ++ do not release atom's reference to jnode due to holding spin-locks. ++ Currently this is only called when the atom commits. ++ ++ NOTE: this function does not release a (journal) reference to jnode ++ due to locking optimizations, you should call jput() somewhere after ++ calling reiser4_uncapture_block(). */ ++void reiser4_uncapture_block(jnode * node) ++{ ++ txn_atom *atom; ++ ++ assert("umka-226", node != NULL); ++ atom = node->atom; ++ assert("umka-228", atom != NULL); ++ ++ assert("jmacd-1021", node->atom == atom); ++ assert_spin_locked(&(node->guard)); ++ assert("jmacd-1023", atom_is_protected(atom)); ++ ++ JF_CLR(node, JNODE_DIRTY); ++ JF_CLR(node, JNODE_RELOC); ++ JF_CLR(node, JNODE_OVRWR); ++ JF_CLR(node, JNODE_CREATED); ++ JF_CLR(node, JNODE_WRITEBACK); ++ JF_CLR(node, JNODE_REPACK); ++ ++ list_del_init(&node->capture_link); ++ if (JF_ISSET(node, JNODE_FLUSH_QUEUED)) { ++ assert("zam-925", atom_isopen(atom)); ++ assert("vs-1623", NODE_LIST(node) == FQ_LIST); ++ ON_DEBUG(atom->num_queued--); ++ JF_CLR(node, JNODE_FLUSH_QUEUED); ++ } ++ atom->capture_count -= 1; ++ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), NOT_CAPTURED, 1)); ++ node->atom = NULL; ++ ++ spin_unlock_jnode(node); ++ LOCK_CNT_DEC(t_refs); ++} ++ ++/* Unconditional insert of jnode into atom's overwrite list. Currently used in ++ bitmap-based allocator code for adding modified bitmap blocks the ++ transaction. @atom and @node are spin locked */ ++void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node) ++{ ++ assert("zam-538", atom_is_protected(atom)); ++ assert_spin_locked(&(node->guard)); ++ assert("zam-899", JF_ISSET(node, JNODE_OVRWR)); ++ assert("zam-543", node->atom == NULL); ++ assert("vs-1433", !jnode_is_unformatted(node) && !jnode_is_znode(node)); ++ ++ list_add(&node->capture_link, ATOM_OVRWR_LIST(atom)); ++ jref(node); ++ node->atom = atom; ++ atom->capture_count++; ++ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), OVRWR_LIST, 1)); ++} ++ ++static int count_deleted_blocks_actor(txn_atom * atom, ++ const reiser4_block_nr * a, ++ const reiser4_block_nr * b, void *data) ++{ ++ reiser4_block_nr *counter = data; ++ ++ assert("zam-995", data != NULL); ++ assert("zam-996", a != NULL); ++ if (b == NULL) ++ *counter += 1; ++ else ++ *counter += *b; ++ return 0; ++} ++ ++reiser4_block_nr txnmgr_count_deleted_blocks(void) ++{ ++ reiser4_block_nr result; ++ txn_mgr *tmgr = &get_super_private(reiser4_get_current_sb())->tmgr; ++ txn_atom *atom; ++ ++ result = 0; ++ ++ spin_lock_txnmgr(tmgr); ++ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) { ++ spin_lock_atom(atom); ++ if (atom_isopen(atom)) ++ blocknr_set_iterator( ++ atom, &atom->delete_set, ++ count_deleted_blocks_actor, &result, 0); ++ spin_unlock_atom(atom); ++ } ++ spin_unlock_txnmgr(tmgr); ++ ++ return result; ++} ++ ++/* ++ * Local variables: ++ * c-indentation-style: "K&R" ++ * mode-name: "LC" ++ * c-basic-offset: 8 ++ * tab-width: 8 ++ * fill-column: 79 ++ * End: ++ */ +diff --git a/fs/reiser4/txnmgr.h b/fs/reiser4/txnmgr.h +new file mode 100644 +index 0000000..6ad4b5a +--- /dev/null ++++ b/fs/reiser4/txnmgr.h +@@ -0,0 +1,708 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* data-types and function declarations for transaction manager. See txnmgr.c ++ * for details. */ ++ ++#ifndef __REISER4_TXNMGR_H__ ++#define __REISER4_TXNMGR_H__ ++ ++#include "forward.h" ++#include "dformat.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* TYPE DECLARATIONS */ ++ ++/* This enumeration describes the possible types of a capture request (reiser4_try_capture). ++ A capture request dynamically assigns a block to the calling thread's transaction ++ handle. */ ++typedef enum { ++ /* A READ_ATOMIC request indicates that a block will be read and that the caller's ++ atom should fuse in order to ensure that the block commits atomically with the ++ caller. */ ++ TXN_CAPTURE_READ_ATOMIC = (1 << 0), ++ ++ /* A READ_NONCOM request indicates that a block will be read and that the caller is ++ willing to read a non-committed block without causing atoms to fuse. */ ++ TXN_CAPTURE_READ_NONCOM = (1 << 1), ++ ++ /* A READ_MODIFY request indicates that a block will be read but that the caller ++ wishes for the block to be captured as it will be written. This capture request ++ mode is not currently used, but eventually it will be useful for preventing ++ deadlock in read-modify-write cycles. */ ++ TXN_CAPTURE_READ_MODIFY = (1 << 2), ++ ++ /* A WRITE capture request indicates that a block will be modified and that atoms ++ should fuse to make the commit atomic. */ ++ TXN_CAPTURE_WRITE = (1 << 3), ++ ++ /* CAPTURE_TYPES is a mask of the four above capture types, used to separate the ++ exclusive type designation from extra bits that may be supplied -- see ++ below. */ ++ TXN_CAPTURE_TYPES = (TXN_CAPTURE_READ_ATOMIC | ++ TXN_CAPTURE_READ_NONCOM | TXN_CAPTURE_READ_MODIFY | ++ TXN_CAPTURE_WRITE), ++ ++ /* A subset of CAPTURE_TYPES, CAPTURE_WTYPES is a mask of request types that ++ indicate modification will occur. */ ++ TXN_CAPTURE_WTYPES = (TXN_CAPTURE_READ_MODIFY | TXN_CAPTURE_WRITE), ++ ++ /* An option to reiser4_try_capture, NONBLOCKING indicates that the caller would ++ prefer not to sleep waiting for an aging atom to commit. */ ++ TXN_CAPTURE_NONBLOCKING = (1 << 4), ++ ++ /* An option to reiser4_try_capture to prevent atom fusion, just simple ++ capturing is allowed */ ++ TXN_CAPTURE_DONT_FUSE = (1 << 5) ++ ++ /* This macro selects only the exclusive capture request types, stripping out any ++ options that were supplied (i.e., NONBLOCKING). */ ++#define CAPTURE_TYPE(x) ((x) & TXN_CAPTURE_TYPES) ++} txn_capture; ++ ++/* There are two kinds of transaction handle: WRITE_FUSING and READ_FUSING, the only ++ difference is in the handling of read requests. A WRITE_FUSING transaction handle ++ defaults read capture requests to TXN_CAPTURE_READ_NONCOM whereas a READ_FUSIONG ++ transaction handle defaults to TXN_CAPTURE_READ_ATOMIC. */ ++typedef enum { ++ TXN_WRITE_FUSING = (1 << 0), ++ TXN_READ_FUSING = (1 << 1) | TXN_WRITE_FUSING, /* READ implies WRITE */ ++} txn_mode; ++ ++/* Every atom has a stage, which is one of these exclusive values: */ ++typedef enum { ++ /* Initially an atom is free. */ ++ ASTAGE_FREE = 0, ++ ++ /* An atom begins by entering the CAPTURE_FUSE stage, where it proceeds to capture ++ blocks and fuse with other atoms. */ ++ ASTAGE_CAPTURE_FUSE = 1, ++ ++ /* We need to have a ASTAGE_CAPTURE_SLOW in which an atom fuses with one node for every X nodes it flushes to disk where X > 1. */ ++ ++ /* When an atom reaches a certain age it must do all it can to commit. An atom in ++ the CAPTURE_WAIT stage refuses new transaction handles and prevents fusion from ++ atoms in the CAPTURE_FUSE stage. */ ++ ASTAGE_CAPTURE_WAIT = 2, ++ ++ /* Waiting for I/O before commit. Copy-on-capture (see ++ http://namesys.com/v4/v4.html). */ ++ ASTAGE_PRE_COMMIT = 3, ++ ++ /* Post-commit overwrite I/O. Steal-on-capture. */ ++ ASTAGE_POST_COMMIT = 4, ++ ++ /* Atom which waits for the removal of the last reference to (it? ) to ++ * be deleted from memory */ ++ ASTAGE_DONE = 5, ++ ++ /* invalid atom. */ ++ ASTAGE_INVALID = 6, ++ ++} txn_stage; ++ ++/* Certain flags may be set in the txn_atom->flags field. */ ++typedef enum { ++ /* Indicates that the atom should commit as soon as possible. */ ++ ATOM_FORCE_COMMIT = (1 << 0), ++ /* to avoid endless loop, mark the atom (which was considered as too ++ * small) after failed attempt to fuse it. */ ++ ATOM_CANCEL_FUSION = (1 << 1) ++} txn_flags; ++ ++/* Flags for controlling commit_txnh */ ++typedef enum { ++ /* Wait commit atom completion in commit_txnh */ ++ TXNH_WAIT_COMMIT = 0x2, ++ /* Don't commit atom when this handle is closed */ ++ TXNH_DONT_COMMIT = 0x4 ++} txn_handle_flags_t; ++ ++/* TYPE DEFINITIONS */ ++ ++/* A note on lock ordering: the handle & jnode spinlock protects reading of their ->atom ++ fields, so typically an operation on the atom through either of these objects must (1) ++ lock the object, (2) read the atom pointer, (3) lock the atom. ++ ++ During atom fusion, the process holds locks on both atoms at once. Then, it iterates ++ through the list of handles and pages held by the smaller of the two atoms. For each ++ handle and page referencing the smaller atom, the fusing process must: (1) lock the ++ object, and (2) update the atom pointer. ++ ++ You can see that there is a conflict of lock ordering here, so the more-complex ++ procedure should have priority, i.e., the fusing process has priority so that it is ++ guaranteed to make progress and to avoid restarts. ++ ++ This decision, however, means additional complexity for aquiring the atom lock in the ++ first place. ++ ++ The general original procedure followed in the code was: ++ ++ TXN_OBJECT *obj = ...; ++ TXN_ATOM *atom; ++ ++ spin_lock (& obj->_lock); ++ ++ atom = obj->_atom; ++ ++ if (! spin_trylock_atom (atom)) ++ { ++ spin_unlock (& obj->_lock); ++ RESTART OPERATION, THERE WAS A RACE; ++ } ++ ++ ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED ++ ++ It has however been found that this wastes CPU a lot in a manner that is ++ hard to profile. So, proper refcounting was added to atoms, and new ++ standard locking sequence is like following: ++ ++ TXN_OBJECT *obj = ...; ++ TXN_ATOM *atom; ++ ++ spin_lock (& obj->_lock); ++ ++ atom = obj->_atom; ++ ++ if (! spin_trylock_atom (atom)) ++ { ++ atomic_inc (& atom->refcount); ++ spin_unlock (& obj->_lock); ++ spin_lock (&atom->_lock); ++ atomic_dec (& atom->refcount); ++ // HERE atom is locked ++ spin_unlock (&atom->_lock); ++ RESTART OPERATION, THERE WAS A RACE; ++ } ++ ++ ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED ++ ++ (core of this is implemented in trylock_throttle() function) ++ ++ See the jnode_get_atom() function for a common case. ++ ++ As an additional (and important) optimization allowing to avoid restarts, ++ it is possible to re-check required pre-conditions at the HERE point in ++ code above and proceed without restarting if they are still satisfied. ++*/ ++ ++/* An atomic transaction: this is the underlying system representation ++ of a transaction, not the one seen by clients. ++ ++ Invariants involving this data-type: ++ ++ [sb-fake-allocated] ++*/ ++struct txn_atom { ++ /* The spinlock protecting the atom, held during fusion and various other state ++ changes. */ ++ spinlock_t alock; ++ ++ /* The atom's reference counter, increasing (in case of a duplication ++ of an existing reference or when we are sure that some other ++ reference exists) may be done without taking spinlock, decrementing ++ of the ref. counter requires a spinlock to be held. ++ ++ Each transaction handle counts in ->refcount. All jnodes count as ++ one reference acquired in atom_begin_andlock(), released in ++ commit_current_atom(). ++ */ ++ atomic_t refcount; ++ ++ /* The atom_id identifies the atom in persistent records such as the log. */ ++ __u32 atom_id; ++ ++ /* Flags holding any of the txn_flags enumerated values (e.g., ++ ATOM_FORCE_COMMIT). */ ++ __u32 flags; ++ ++ /* Number of open handles. */ ++ __u32 txnh_count; ++ ++ /* The number of znodes captured by this atom. Equal to the sum of lengths of the ++ dirty_nodes[level] and clean_nodes lists. */ ++ __u32 capture_count; ++ ++#if REISER4_DEBUG ++ int clean; ++ int dirty; ++ int ovrwr; ++ int wb; ++ int fq; ++#endif ++ ++ __u32 flushed; ++ ++ /* Current transaction stage. */ ++ txn_stage stage; ++ ++ /* Start time. */ ++ unsigned long start_time; ++ ++ /* The atom's delete set. It collects block numbers of the nodes ++ which were deleted during the transaction. */ ++ struct list_head delete_set; ++ ++ /* The atom's wandered_block mapping. */ ++ struct list_head wandered_map; ++ ++ /* The transaction's list of dirty captured nodes--per level. Index ++ by (level). dirty_nodes[0] is for znode-above-root */ ++ struct list_head dirty_nodes[REAL_MAX_ZTREE_HEIGHT + 1]; ++ ++ /* The transaction's list of clean captured nodes. */ ++ struct list_head clean_nodes; ++ ++ /* The atom's overwrite set */ ++ struct list_head ovrwr_nodes; ++ ++ /* nodes which are being written to disk */ ++ struct list_head writeback_nodes; ++ ++ /* list of inodes */ ++ struct list_head inodes; ++ ++ /* List of handles associated with this atom. */ ++ struct list_head txnh_list; ++ ++ /* Transaction list link: list of atoms in the transaction manager. */ ++ struct list_head atom_link; ++ ++ /* List of handles waiting FOR this atom: see 'capture_fuse_wait' comment. */ ++ struct list_head fwaitfor_list; ++ ++ /* List of this atom's handles that are waiting: see 'capture_fuse_wait' comment. */ ++ struct list_head fwaiting_list; ++ ++ /* Numbers of objects which were deleted/created in this transaction ++ thereby numbers of objects IDs which were released/deallocated. */ ++ int nr_objects_deleted; ++ int nr_objects_created; ++ /* number of blocks allocated during the transaction */ ++ __u64 nr_blocks_allocated; ++ /* All atom's flush queue objects are on this list */ ++ struct list_head flush_queues; ++#if REISER4_DEBUG ++ /* number of flush queues for this atom. */ ++ int nr_flush_queues; ++ /* Number of jnodes which were removed from atom's lists and put ++ on flush_queue */ ++ int num_queued; ++#endif ++ /* number of threads who wait for this atom to complete commit */ ++ int nr_waiters; ++ /* number of threads which do jnode_flush() over this atom */ ++ int nr_flushers; ++ /* number of flush queues which are IN_USE and jnodes from fq->prepped ++ are submitted to disk by the reiser4_write_fq() routine. */ ++ int nr_running_queues; ++ /* A counter of grabbed unformatted nodes, see a description of the ++ * reiser4 space reservation scheme at block_alloc.c */ ++ reiser4_block_nr flush_reserved; ++#if REISER4_DEBUG ++ void *committer; ++#endif ++ struct super_block *super; ++}; ++ ++#define ATOM_DIRTY_LIST(atom, level) (&(atom)->dirty_nodes[level]) ++#define ATOM_CLEAN_LIST(atom) (&(atom)->clean_nodes) ++#define ATOM_OVRWR_LIST(atom) (&(atom)->ovrwr_nodes) ++#define ATOM_WB_LIST(atom) (&(atom)->writeback_nodes) ++#define ATOM_FQ_LIST(fq) (&(fq)->prepped) ++ ++#define NODE_LIST(node) (node)->list ++#define ASSIGN_NODE_LIST(node, list) ON_DEBUG(NODE_LIST(node) = list) ++ON_DEBUG(void ++ count_jnode(txn_atom *, jnode *, atom_list old_list, ++ atom_list new_list, int check_lists)); ++ ++typedef struct protected_jnodes { ++ struct list_head inatom; /* link to atom's list these structures */ ++ struct list_head nodes; /* head of list of protected nodes */ ++} protected_jnodes; ++ ++/* A transaction handle: the client obtains and commits this handle which is assigned by ++ the system to a txn_atom. */ ++struct txn_handle { ++ /* Spinlock protecting ->atom pointer */ ++ spinlock_t hlock; ++ ++ /* Flags for controlling commit_txnh() behavior */ ++ /* from txn_handle_flags_t */ ++ txn_handle_flags_t flags; ++ ++ /* Whether it is READ_FUSING or WRITE_FUSING. */ ++ txn_mode mode; ++ ++ /* If assigned, the atom it is part of. */ ++ txn_atom *atom; ++ ++ /* Transaction list link. Head is in txn_atom. */ ++ struct list_head txnh_link; ++}; ++ ++/* The transaction manager: one is contained in the reiser4_super_info_data */ ++struct txn_mgr { ++ /* A spinlock protecting the atom list, id_count, flush_control */ ++ spinlock_t tmgr_lock; ++ ++ /* List of atoms. */ ++ struct list_head atoms_list; ++ ++ /* Number of atoms. */ ++ int atom_count; ++ ++ /* A counter used to assign atom->atom_id values. */ ++ __u32 id_count; ++ ++ /* a mutex object for commit serialization */ ++ struct mutex commit_mutex; ++ ++ /* a list of all txnmrgs served by particular daemon. */ ++ struct list_head linkage; ++ ++ /* description of daemon for this txnmgr */ ++ ktxnmgrd_context *daemon; ++ ++ /* parameters. Adjustable through mount options. */ ++ unsigned int atom_max_size; ++ unsigned int atom_max_age; ++ unsigned int atom_min_size; ++ /* max number of concurrent flushers for one atom, 0 - unlimited. */ ++ unsigned int atom_max_flushers; ++ struct dentry *debugfs_atom_count; ++ struct dentry *debugfs_id_count; ++}; ++ ++/* FUNCTION DECLARATIONS */ ++ ++/* These are the externally (within Reiser4) visible transaction functions, therefore they ++ are prefixed with "txn_". For comments, see txnmgr.c. */ ++ ++extern int init_txnmgr_static(void); ++extern void done_txnmgr_static(void); ++ ++extern void reiser4_init_txnmgr(txn_mgr *); ++extern void reiser4_done_txnmgr(txn_mgr *); ++ ++extern int reiser4_txn_reserve(int reserved); ++ ++extern void reiser4_txn_begin(reiser4_context * context); ++extern int reiser4_txn_end(reiser4_context * context); ++ ++extern void reiser4_txn_restart(reiser4_context * context); ++extern void reiser4_txn_restart_current(void); ++ ++extern int txnmgr_force_commit_all(struct super_block *, int); ++extern int current_atom_should_commit(void); ++ ++extern jnode *find_first_dirty_jnode(txn_atom *, int); ++ ++extern int commit_some_atoms(txn_mgr *); ++extern int force_commit_atom(txn_handle *); ++extern int flush_current_atom(int, long, long *, txn_atom **, jnode *); ++ ++extern int flush_some_atom(jnode *, long *, const struct writeback_control *, int); ++ ++extern void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage); ++ ++extern int same_slum_check(jnode * base, jnode * check, int alloc_check, ++ int alloc_value); ++extern void atom_dec_and_unlock(txn_atom * atom); ++ ++extern int reiser4_try_capture(jnode * node, znode_lock_mode mode, txn_capture flags); ++extern int try_capture_page_to_invalidate(struct page *pg); ++ ++extern void reiser4_uncapture_page(struct page *pg); ++extern void reiser4_uncapture_block(jnode *); ++extern void reiser4_uncapture_jnode(jnode *); ++ ++extern int reiser4_capture_inode(struct inode *); ++extern int reiser4_uncapture_inode(struct inode *); ++ ++extern txn_atom *get_current_atom_locked_nocheck(void); ++ ++#if REISER4_DEBUG ++ ++/** ++ * atom_is_protected - make sure that nobody but us can do anything with atom ++ * @atom: atom to be checked ++ * ++ * This is used to assert that atom either entered commit stages or is spin ++ * locked. ++ */ ++static inline int atom_is_protected(txn_atom *atom) ++{ ++ if (atom->stage >= ASTAGE_PRE_COMMIT) ++ return 1; ++ assert_spin_locked(&(atom->alock)); ++ return 1; ++} ++ ++#endif ++ ++/* Get the current atom and spinlock it if current atom present. May not return NULL */ ++static inline txn_atom *get_current_atom_locked(void) ++{ ++ txn_atom *atom; ++ ++ atom = get_current_atom_locked_nocheck(); ++ assert("zam-761", atom != NULL); ++ ++ return atom; ++} ++ ++extern txn_atom *jnode_get_atom(jnode *); ++ ++extern void reiser4_atom_wait_event(txn_atom *); ++extern void reiser4_atom_send_event(txn_atom *); ++ ++extern void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node); ++extern int reiser4_capture_super_block(struct super_block *s); ++int capture_bulk(jnode **, int count); ++ ++/* See the comment on the function blocknrset.c:blocknr_set_add for the ++ calling convention of these three routines. */ ++extern void blocknr_set_init(struct list_head * bset); ++extern void blocknr_set_destroy(struct list_head * bset); ++extern void blocknr_set_merge(struct list_head * from, struct list_head * into); ++extern int blocknr_set_add_extent(txn_atom * atom, ++ struct list_head * bset, ++ blocknr_set_entry ** new_bsep, ++ const reiser4_block_nr * start, ++ const reiser4_block_nr * len); ++extern int blocknr_set_add_pair(txn_atom * atom, struct list_head * bset, ++ blocknr_set_entry ** new_bsep, ++ const reiser4_block_nr * a, ++ const reiser4_block_nr * b); ++ ++typedef int (*blocknr_set_actor_f) (txn_atom *, const reiser4_block_nr *, ++ const reiser4_block_nr *, void *); ++ ++extern int blocknr_set_iterator(txn_atom * atom, struct list_head * bset, ++ blocknr_set_actor_f actor, void *data, ++ int delete); ++ ++/* flush code takes care about how to fuse flush queues */ ++extern void flush_init_atom(txn_atom * atom); ++extern void flush_fuse_queues(txn_atom * large, txn_atom * small); ++ ++static inline void spin_lock_atom(txn_atom *atom) ++{ ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", (LOCK_CNT_NIL(spin_locked_txnh) && ++ LOCK_CNT_NIL(spin_locked_atom) && ++ LOCK_CNT_NIL(spin_locked_jnode) && ++ LOCK_CNT_NIL(spin_locked_zlock) && ++ LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_NIL(rw_locked_tree))); ++ ++ spin_lock(&(atom->alock)); ++ ++ LOCK_CNT_INC(spin_locked_atom); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline void spin_lock_atom_nested(txn_atom *atom) ++{ ++ assert("", (LOCK_CNT_NIL(spin_locked_txnh) && ++ LOCK_CNT_NIL(spin_locked_jnode) && ++ LOCK_CNT_NIL(spin_locked_zlock) && ++ LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_NIL(rw_locked_tree))); ++ ++ spin_lock_nested(&(atom->alock), SINGLE_DEPTH_NESTING); ++ ++ LOCK_CNT_INC(spin_locked_atom); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline int spin_trylock_atom(txn_atom *atom) ++{ ++ if (spin_trylock(&(atom->alock))) { ++ LOCK_CNT_INC(spin_locked_atom); ++ LOCK_CNT_INC(spin_locked); ++ return 1; ++ } ++ return 0; ++} ++ ++static inline void spin_unlock_atom(txn_atom *atom) ++{ ++ assert_spin_locked(&(atom->alock)); ++ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_atom)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(spin_locked_atom); ++ LOCK_CNT_DEC(spin_locked); ++ ++ spin_unlock(&(atom->alock)); ++} ++ ++static inline void spin_lock_txnh(txn_handle *txnh) ++{ ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", (LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_NIL(spin_locked_zlock) && ++ LOCK_CNT_NIL(rw_locked_tree))); ++ ++ spin_lock(&(txnh->hlock)); ++ ++ LOCK_CNT_INC(spin_locked_txnh); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline int spin_trylock_txnh(txn_handle *txnh) ++{ ++ if (spin_trylock(&(txnh->hlock))) { ++ LOCK_CNT_INC(spin_locked_txnh); ++ LOCK_CNT_INC(spin_locked); ++ return 1; ++ } ++ return 0; ++} ++ ++static inline void spin_unlock_txnh(txn_handle *txnh) ++{ ++ assert_spin_locked(&(txnh->hlock)); ++ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnh)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(spin_locked_txnh); ++ LOCK_CNT_DEC(spin_locked); ++ ++ spin_unlock(&(txnh->hlock)); ++} ++ ++#define spin_ordering_pred_txnmgr(tmgr) \ ++ ( LOCK_CNT_NIL(spin_locked_atom) && \ ++ LOCK_CNT_NIL(spin_locked_txnh) && \ ++ LOCK_CNT_NIL(spin_locked_jnode) && \ ++ LOCK_CNT_NIL(rw_locked_zlock) && \ ++ LOCK_CNT_NIL(rw_locked_dk) && \ ++ LOCK_CNT_NIL(rw_locked_tree) ) ++ ++static inline void spin_lock_txnmgr(txn_mgr *mgr) ++{ ++ /* check that spinlocks of lower priorities are not held */ ++ assert("", (LOCK_CNT_NIL(spin_locked_atom) && ++ LOCK_CNT_NIL(spin_locked_txnh) && ++ LOCK_CNT_NIL(spin_locked_jnode) && ++ LOCK_CNT_NIL(spin_locked_zlock) && ++ LOCK_CNT_NIL(rw_locked_dk) && ++ LOCK_CNT_NIL(rw_locked_tree))); ++ ++ spin_lock(&(mgr->tmgr_lock)); ++ ++ LOCK_CNT_INC(spin_locked_txnmgr); ++ LOCK_CNT_INC(spin_locked); ++} ++ ++static inline int spin_trylock_txnmgr(txn_mgr *mgr) ++{ ++ if (spin_trylock(&(mgr->tmgr_lock))) { ++ LOCK_CNT_INC(spin_locked_txnmgr); ++ LOCK_CNT_INC(spin_locked); ++ return 1; ++ } ++ return 0; ++} ++ ++static inline void spin_unlock_txnmgr(txn_mgr *mgr) ++{ ++ assert_spin_locked(&(mgr->tmgr_lock)); ++ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnmgr)); ++ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); ++ ++ LOCK_CNT_DEC(spin_locked_txnmgr); ++ LOCK_CNT_DEC(spin_locked); ++ ++ spin_unlock(&(mgr->tmgr_lock)); ++} ++ ++typedef enum { ++ FQ_IN_USE = 0x1 ++} flush_queue_state_t; ++ ++typedef struct flush_queue flush_queue_t; ++ ++/* This is an accumulator for jnodes prepared for writing to disk. A flush queue ++ is filled by the jnode_flush() routine, and written to disk under memory ++ pressure or at atom commit time. */ ++/* LOCKING: fq state and fq->atom are protected by guard spinlock, fq->nr_queued ++ field and fq->prepped list can be modified if atom is spin-locked and fq ++ object is "in-use" state. For read-only traversal of the fq->prepped list ++ and reading of the fq->nr_queued field it is enough to keep fq "in-use" or ++ only have atom spin-locked. */ ++struct flush_queue { ++ /* linkage element is the first in this structure to make debugging ++ easier. See field in atom struct for description of list. */ ++ struct list_head alink; ++ /* A spinlock to protect changes of fq state and fq->atom pointer */ ++ spinlock_t guard; ++ /* flush_queue state: [in_use | ready] */ ++ flush_queue_state_t state; ++ /* A list which contains queued nodes, queued nodes are removed from any ++ * atom's list and put on this ->prepped one. */ ++ struct list_head prepped; ++ /* number of submitted i/o requests */ ++ atomic_t nr_submitted; ++ /* number of i/o errors */ ++ atomic_t nr_errors; ++ /* An atom this flush queue is attached to */ ++ txn_atom *atom; ++ /* A wait queue head to wait on i/o completion */ ++ wait_queue_head_t wait; ++#if REISER4_DEBUG ++ /* A thread which took this fq in exclusive use, NULL if fq is free, ++ * used for debugging. */ ++ struct task_struct *owner; ++#endif ++}; ++ ++extern int reiser4_fq_by_atom(txn_atom *, flush_queue_t **); ++extern void reiser4_fq_put_nolock(flush_queue_t *); ++extern void reiser4_fq_put(flush_queue_t *); ++extern void reiser4_fuse_fq(txn_atom * to, txn_atom * from); ++extern void queue_jnode(flush_queue_t *, jnode *); ++ ++extern int reiser4_write_fq(flush_queue_t *, long *, int); ++extern int current_atom_finish_all_fq(void); ++extern void init_atom_fq_parts(txn_atom *); ++ ++extern reiser4_block_nr txnmgr_count_deleted_blocks(void); ++ ++extern void znode_make_dirty(znode * node); ++extern void jnode_make_dirty_locked(jnode * node); ++ ++extern int reiser4_sync_atom(txn_atom * atom); ++ ++#if REISER4_DEBUG ++extern int atom_fq_parts_are_clean(txn_atom *); ++#endif ++ ++extern void add_fq_to_bio(flush_queue_t *, struct bio *); ++extern flush_queue_t *get_fq_for_current_atom(void); ++ ++void protected_jnodes_init(protected_jnodes * list); ++void protected_jnodes_done(protected_jnodes * list); ++void reiser4_invalidate_list(struct list_head * head); ++ ++# endif /* __REISER4_TXNMGR_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/type_safe_hash.h b/fs/reiser4/type_safe_hash.h +new file mode 100644 +index 0000000..b2fdacd +--- /dev/null ++++ b/fs/reiser4/type_safe_hash.h +@@ -0,0 +1,320 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* A hash table class that uses hash chains (singly-linked) and is ++ parametrized to provide type safety. */ ++ ++#ifndef __REISER4_TYPE_SAFE_HASH_H__ ++#define __REISER4_TYPE_SAFE_HASH_H__ ++ ++#include "debug.h" ++ ++#include ++/* Step 1: Use TYPE_SAFE_HASH_DECLARE() to define the TABLE and LINK objects ++ based on the object type. You need to declare the item type before ++ this definition, define it after this definition. */ ++#define TYPE_SAFE_HASH_DECLARE(PREFIX,ITEM_TYPE) \ ++ \ ++typedef struct PREFIX##_hash_table_ PREFIX##_hash_table; \ ++typedef struct PREFIX##_hash_link_ PREFIX##_hash_link; \ ++ \ ++struct PREFIX##_hash_table_ \ ++{ \ ++ ITEM_TYPE **_table; \ ++ __u32 _buckets; \ ++}; \ ++ \ ++struct PREFIX##_hash_link_ \ ++{ \ ++ ITEM_TYPE *_next; \ ++} ++ ++/* Step 2: Define the object type of the hash: give it field of type ++ PREFIX_hash_link. */ ++ ++/* Step 3: Use TYPE_SAFE_HASH_DEFINE to define the hash table interface using ++ the type and field name used in step 3. The arguments are: ++ ++ ITEM_TYPE The item type being hashed ++ KEY_TYPE The type of key being hashed ++ KEY_NAME The name of the key field within the item ++ LINK_NAME The name of the link field within the item, which you must make type PREFIX_hash_link) ++ HASH_FUNC The name of the hash function (or macro, takes const pointer to key) ++ EQ_FUNC The name of the equality function (or macro, takes const pointer to two keys) ++ ++ It implements these functions: ++ ++ prefix_hash_init Initialize the table given its size. ++ prefix_hash_insert Insert an item ++ prefix_hash_insert_index Insert an item w/ precomputed hash_index ++ prefix_hash_find Find an item by key ++ prefix_hash_find_index Find an item w/ precomputed hash_index ++ prefix_hash_remove Remove an item, returns 1 if found, 0 if not found ++ prefix_hash_remove_index Remove an item w/ precomputed hash_index ++ ++ If you'd like something to be done differently, feel free to ask me ++ for modifications. Additional features that could be added but ++ have not been: ++ ++ prefix_hash_remove_key Find and remove an item by key ++ prefix_hash_remove_key_index Find and remove an item by key w/ precomputed hash_index ++ ++ The hash_function currently receives only the key as an argument, ++ meaning it must somehow know the number of buckets. If this is a ++ problem let me know. ++ ++ This hash table uses a single-linked hash chain. This means ++ insertion is fast but deletion requires searching the chain. ++ ++ There is also the doubly-linked hash chain approach, under which ++ deletion requires no search but the code is longer and it takes two ++ pointers per item. ++ ++ The circularly-linked approach has the shortest code but requires ++ two pointers per bucket, doubling the size of the bucket array (in ++ addition to two pointers per item). ++*/ ++#define TYPE_SAFE_HASH_DEFINE(PREFIX,ITEM_TYPE,KEY_TYPE,KEY_NAME,LINK_NAME,HASH_FUNC,EQ_FUNC) \ ++ \ ++static __inline__ void \ ++PREFIX##_check_hash (PREFIX##_hash_table *table UNUSED_ARG, \ ++ __u32 hash UNUSED_ARG) \ ++{ \ ++ assert("nikita-2780", hash < table->_buckets); \ ++} \ ++ \ ++static __inline__ int \ ++PREFIX##_hash_init (PREFIX##_hash_table *hash, \ ++ __u32 buckets) \ ++{ \ ++ hash->_table = (ITEM_TYPE**) KMALLOC (sizeof (ITEM_TYPE*) * buckets); \ ++ hash->_buckets = buckets; \ ++ if (hash->_table == NULL) \ ++ { \ ++ return RETERR(-ENOMEM); \ ++ } \ ++ memset (hash->_table, 0, sizeof (ITEM_TYPE*) * buckets); \ ++ ON_DEBUG(printk(#PREFIX "_hash_table: %i buckets\n", buckets)); \ ++ return 0; \ ++} \ ++ \ ++static __inline__ void \ ++PREFIX##_hash_done (PREFIX##_hash_table *hash) \ ++{ \ ++ if (REISER4_DEBUG && hash->_table != NULL) { \ ++ __u32 i; \ ++ for (i = 0 ; i < hash->_buckets ; ++ i) \ ++ assert("nikita-2905", hash->_table[i] == NULL); \ ++ } \ ++ if (hash->_table != NULL) \ ++ KFREE (hash->_table, sizeof (ITEM_TYPE*) * hash->_buckets); \ ++ hash->_table = NULL; \ ++} \ ++ \ ++static __inline__ void \ ++PREFIX##_hash_prefetch_next (ITEM_TYPE *item) \ ++{ \ ++ prefetch(item->LINK_NAME._next); \ ++} \ ++ \ ++static __inline__ void \ ++PREFIX##_hash_prefetch_bucket (PREFIX##_hash_table *hash, \ ++ __u32 index) \ ++{ \ ++ prefetch(hash->_table[index]); \ ++} \ ++ \ ++static __inline__ ITEM_TYPE* \ ++PREFIX##_hash_find_index (PREFIX##_hash_table *hash, \ ++ __u32 hash_index, \ ++ KEY_TYPE const *find_key) \ ++{ \ ++ ITEM_TYPE *item; \ ++ \ ++ PREFIX##_check_hash(hash, hash_index); \ ++ \ ++ for (item = hash->_table[hash_index]; \ ++ item != NULL; \ ++ item = item->LINK_NAME._next) \ ++ { \ ++ prefetch(item->LINK_NAME._next); \ ++ prefetch(item->LINK_NAME._next + offsetof(ITEM_TYPE, KEY_NAME)); \ ++ if (EQ_FUNC (& item->KEY_NAME, find_key)) \ ++ { \ ++ return item; \ ++ } \ ++ } \ ++ \ ++ return NULL; \ ++} \ ++ \ ++static __inline__ ITEM_TYPE* \ ++PREFIX##_hash_find_index_lru (PREFIX##_hash_table *hash, \ ++ __u32 hash_index, \ ++ KEY_TYPE const *find_key) \ ++{ \ ++ ITEM_TYPE ** item = &hash->_table[hash_index]; \ ++ \ ++ PREFIX##_check_hash(hash, hash_index); \ ++ \ ++ while (*item != NULL) { \ ++ prefetch(&(*item)->LINK_NAME._next); \ ++ if (EQ_FUNC (&(*item)->KEY_NAME, find_key)) { \ ++ ITEM_TYPE *found; \ ++ \ ++ found = *item; \ ++ *item = found->LINK_NAME._next; \ ++ found->LINK_NAME._next = hash->_table[hash_index]; \ ++ hash->_table[hash_index] = found; \ ++ return found; \ ++ } \ ++ item = &(*item)->LINK_NAME._next; \ ++ } \ ++ return NULL; \ ++} \ ++ \ ++static __inline__ int \ ++PREFIX##_hash_remove_index (PREFIX##_hash_table *hash, \ ++ __u32 hash_index, \ ++ ITEM_TYPE *del_item) \ ++{ \ ++ ITEM_TYPE ** hash_item_p = &hash->_table[hash_index]; \ ++ \ ++ PREFIX##_check_hash(hash, hash_index); \ ++ \ ++ while (*hash_item_p != NULL) { \ ++ prefetch(&(*hash_item_p)->LINK_NAME._next); \ ++ if (*hash_item_p == del_item) { \ ++ *hash_item_p = (*hash_item_p)->LINK_NAME._next; \ ++ return 1; \ ++ } \ ++ hash_item_p = &(*hash_item_p)->LINK_NAME._next; \ ++ } \ ++ return 0; \ ++} \ ++ \ ++static __inline__ void \ ++PREFIX##_hash_insert_index (PREFIX##_hash_table *hash, \ ++ __u32 hash_index, \ ++ ITEM_TYPE *ins_item) \ ++{ \ ++ PREFIX##_check_hash(hash, hash_index); \ ++ \ ++ ins_item->LINK_NAME._next = hash->_table[hash_index]; \ ++ hash->_table[hash_index] = ins_item; \ ++} \ ++ \ ++static __inline__ void \ ++PREFIX##_hash_insert_index_rcu (PREFIX##_hash_table *hash, \ ++ __u32 hash_index, \ ++ ITEM_TYPE *ins_item) \ ++{ \ ++ PREFIX##_check_hash(hash, hash_index); \ ++ \ ++ ins_item->LINK_NAME._next = hash->_table[hash_index]; \ ++ smp_wmb(); \ ++ hash->_table[hash_index] = ins_item; \ ++} \ ++ \ ++static __inline__ ITEM_TYPE* \ ++PREFIX##_hash_find (PREFIX##_hash_table *hash, \ ++ KEY_TYPE const *find_key) \ ++{ \ ++ return PREFIX##_hash_find_index (hash, HASH_FUNC(hash, find_key), find_key); \ ++} \ ++ \ ++static __inline__ ITEM_TYPE* \ ++PREFIX##_hash_find_lru (PREFIX##_hash_table *hash, \ ++ KEY_TYPE const *find_key) \ ++{ \ ++ return PREFIX##_hash_find_index_lru (hash, HASH_FUNC(hash, find_key), find_key); \ ++} \ ++ \ ++static __inline__ int \ ++PREFIX##_hash_remove (PREFIX##_hash_table *hash, \ ++ ITEM_TYPE *del_item) \ ++{ \ ++ return PREFIX##_hash_remove_index (hash, \ ++ HASH_FUNC(hash, &del_item->KEY_NAME), del_item); \ ++} \ ++ \ ++static __inline__ int \ ++PREFIX##_hash_remove_rcu (PREFIX##_hash_table *hash, \ ++ ITEM_TYPE *del_item) \ ++{ \ ++ return PREFIX##_hash_remove (hash, del_item); \ ++} \ ++ \ ++static __inline__ void \ ++PREFIX##_hash_insert (PREFIX##_hash_table *hash, \ ++ ITEM_TYPE *ins_item) \ ++{ \ ++ return PREFIX##_hash_insert_index (hash, \ ++ HASH_FUNC(hash, &ins_item->KEY_NAME), ins_item); \ ++} \ ++ \ ++static __inline__ void \ ++PREFIX##_hash_insert_rcu (PREFIX##_hash_table *hash, \ ++ ITEM_TYPE *ins_item) \ ++{ \ ++ return PREFIX##_hash_insert_index_rcu (hash, HASH_FUNC(hash, &ins_item->KEY_NAME), \ ++ ins_item); \ ++} \ ++ \ ++static __inline__ ITEM_TYPE * \ ++PREFIX##_hash_first (PREFIX##_hash_table *hash, __u32 ind) \ ++{ \ ++ ITEM_TYPE *first; \ ++ \ ++ for (first = NULL; ind < hash->_buckets; ++ ind) { \ ++ first = hash->_table[ind]; \ ++ if (first != NULL) \ ++ break; \ ++ } \ ++ return first; \ ++} \ ++ \ ++static __inline__ ITEM_TYPE * \ ++PREFIX##_hash_next (PREFIX##_hash_table *hash, \ ++ ITEM_TYPE *item) \ ++{ \ ++ ITEM_TYPE *next; \ ++ \ ++ if (item == NULL) \ ++ return NULL; \ ++ next = item->LINK_NAME._next; \ ++ if (next == NULL) \ ++ next = PREFIX##_hash_first (hash, HASH_FUNC(hash, &item->KEY_NAME) + 1); \ ++ return next; \ ++} \ ++ \ ++typedef struct {} PREFIX##_hash_dummy ++ ++#define for_all_ht_buckets(table, head) \ ++for ((head) = &(table) -> _table[ 0 ] ; \ ++ (head) != &(table) -> _table[ (table) -> _buckets ] ; ++ (head)) ++ ++#define for_all_in_bucket(bucket, item, next, field) \ ++for ((item) = *(bucket), (next) = (item) ? (item) -> field._next : NULL ; \ ++ (item) != NULL ; \ ++ (item) = (next), (next) = (item) ? (item) -> field._next : NULL ) ++ ++#define for_all_in_htable(table, prefix, item, next) \ ++for ((item) = prefix ## _hash_first ((table), 0), \ ++ (next) = prefix ## _hash_next ((table), (item)) ; \ ++ (item) != NULL ; \ ++ (item) = (next), \ ++ (next) = prefix ## _hash_next ((table), (item))) ++ ++/* __REISER4_TYPE_SAFE_HASH_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/vfs_ops.c b/fs/reiser4/vfs_ops.c +new file mode 100644 +index 0000000..31afd3e +--- /dev/null ++++ b/fs/reiser4/vfs_ops.c +@@ -0,0 +1,259 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Interface to VFS. Reiser4 {super|export|dentry}_operations are defined ++ here. */ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "coord.h" ++#include "plugin/item/item.h" ++#include "plugin/file/file.h" ++#include "plugin/security/perm.h" ++#include "plugin/disk_format/disk_format.h" ++#include "plugin/plugin.h" ++#include "plugin/plugin_set.h" ++#include "plugin/object.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree.h" ++#include "vfs_ops.h" ++#include "inode.h" ++#include "page_cache.h" ++#include "ktxnmgrd.h" ++#include "super.h" ++#include "reiser4.h" ++#include "entd.h" ++#include "status_flags.h" ++#include "flush.h" ++#include "dscale.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* update inode stat-data by calling plugin */ ++int reiser4_update_sd(struct inode *object) ++{ ++ file_plugin *fplug; ++ ++ assert("nikita-2338", object != NULL); ++ /* check for read-only file system. */ ++ if (IS_RDONLY(object)) ++ return 0; ++ ++ fplug = inode_file_plugin(object); ++ assert("nikita-2339", fplug != NULL); ++ return fplug->write_sd_by_inode(object); ++} ++ ++/* helper function: increase inode nlink count and call plugin method to save ++ updated stat-data. ++ ++ Used by link/create and during creation of dot and dotdot in mkdir ++*/ ++int reiser4_add_nlink(struct inode *object /* object to which link is added */ , ++ struct inode *parent /* parent where new entry will be */ ++ , ++ int write_sd_p /* true if stat-data has to be ++ * updated */ ) ++{ ++ file_plugin *fplug; ++ int result; ++ ++ assert("nikita-1351", object != NULL); ++ ++ fplug = inode_file_plugin(object); ++ assert("nikita-1445", fplug != NULL); ++ ++ /* ask plugin whether it can add yet another link to this ++ object */ ++ if (!fplug->can_add_link(object)) ++ return RETERR(-EMLINK); ++ ++ assert("nikita-2211", fplug->add_link != NULL); ++ /* call plugin to do actual addition of link */ ++ result = fplug->add_link(object, parent); ++ ++ /* optionally update stat data */ ++ if (result == 0 && write_sd_p) ++ result = fplug->write_sd_by_inode(object); ++ return result; ++} ++ ++/* helper function: decrease inode nlink count and call plugin method to save ++ updated stat-data. ++ ++ Used by unlink/create ++*/ ++int reiser4_del_nlink(struct inode *object /* object from which link is ++ * removed */ , ++ struct inode *parent /* parent where entry was */ , ++ int write_sd_p /* true is stat-data has to be ++ * updated */ ) ++{ ++ file_plugin *fplug; ++ int result; ++ ++ assert("nikita-1349", object != NULL); ++ ++ fplug = inode_file_plugin(object); ++ assert("nikita-1350", fplug != NULL); ++ assert("nikita-1446", object->i_nlink > 0); ++ assert("nikita-2210", fplug->rem_link != NULL); ++ ++ /* call plugin to do actual deletion of link */ ++ result = fplug->rem_link(object, parent); ++ ++ /* optionally update stat data */ ++ if (result == 0 && write_sd_p) ++ result = fplug->write_sd_by_inode(object); ++ return result; ++} ++ ++/* Release reiser4 dentry. This is d_op->d_release() method. */ ++static void reiser4_d_release(struct dentry *dentry /* dentry released */ ) ++{ ++ reiser4_free_dentry_fsdata(dentry); ++} ++ ++/* ++ * Called by reiser4_sync_inodes(), during speculative write-back (through ++ * pdflush, or balance_dirty_pages()). ++ */ ++void reiser4_writeout(struct super_block *sb, struct writeback_control *wbc) ++{ ++ long written = 0; ++ int repeats = 0; ++ int result; ++ struct address_space *mapping; ++ ++ /* ++ * Performs early flushing, trying to free some memory. If there is ++ * nothing to flush, commits some atoms. ++ */ ++ ++ /* Commit all atoms if reiser4_writepages() is called from sys_sync() or ++ sys_fsync(). */ ++ if (wbc->sync_mode != WB_SYNC_NONE) { ++ txnmgr_force_commit_all(sb, 0); ++ return; ++ } ++ ++ BUG_ON(reiser4_get_super_fake(sb) == NULL); ++ mapping = reiser4_get_super_fake(sb)->i_mapping; ++ do { ++ long nr_submitted = 0; ++ jnode *node = NULL; ++ ++ /* do not put more requests to overload write queue */ ++ if (wbc->nonblocking && ++ bdi_write_congested(mapping->backing_dev_info)) { ++ blk_run_address_space(mapping); ++ wbc->encountered_congestion = 1; ++ break; ++ } ++ repeats++; ++ BUG_ON(wbc->nr_to_write <= 0); ++ ++ if (get_current_context()->entd) { ++ entd_context *ent = get_entd_context(sb); ++ ++ if (ent->cur_request->node) ++ /* ++ * this is ent thread and it managed to capture ++ * requested page itself - start flush from ++ * that page ++ */ ++ node = jref(ent->cur_request->node); ++ } ++ ++ result = flush_some_atom(node, &nr_submitted, wbc, ++ JNODE_FLUSH_WRITE_BLOCKS); ++ if (result != 0) ++ warning("nikita-31001", "Flush failed: %i", result); ++ if (node) ++ jput(node); ++ if (!nr_submitted) ++ break; ++ ++ wbc->nr_to_write -= nr_submitted; ++ written += nr_submitted; ++ } while (wbc->nr_to_write > 0); ++} ++ ++void reiser4_throttle_write(struct inode *inode) ++{ ++ reiser4_txn_restart_current(); ++ balance_dirty_pages_ratelimited(inode->i_mapping); ++} ++ ++const char *REISER4_SUPER_MAGIC_STRING = "ReIsEr4"; ++const int REISER4_MAGIC_OFFSET = 16 * 4096; /* offset to magic string from the ++ * beginning of device */ ++ ++/* ++ * Reiser4 initialization/shutdown. ++ * ++ * Code below performs global reiser4 initialization that is done either as ++ * part of kernel initialization (when reiser4 is statically built-in), or ++ * during reiser4 module load (when compiled as module). ++ */ ++ ++void reiser4_handle_error(void) ++{ ++ struct super_block *sb = reiser4_get_current_sb(); ++ ++ if (!sb) ++ return; ++ reiser4_status_write(REISER4_STATUS_DAMAGED, 0, ++ "Filesystem error occured"); ++ switch (get_super_private(sb)->onerror) { ++ case 0: ++ reiser4_panic("foobar-42", "Filesystem error occured\n"); ++ case 1: ++ default: ++ if (sb->s_flags & MS_RDONLY) ++ return; ++ sb->s_flags |= MS_RDONLY; ++ break; ++ } ++} ++ ++struct dentry_operations reiser4_dentry_operations = { ++ .d_revalidate = NULL, ++ .d_hash = NULL, ++ .d_compare = NULL, ++ .d_delete = NULL, ++ .d_release = reiser4_d_release, ++ .d_iput = NULL, ++}; ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/vfs_ops.h b/fs/reiser4/vfs_ops.h +new file mode 100644 +index 0000000..03e16ce +--- /dev/null ++++ b/fs/reiser4/vfs_ops.h +@@ -0,0 +1,53 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* vfs_ops.c's exported symbols */ ++ ++#if !defined( __FS_REISER4_VFS_OPS_H__ ) ++#define __FS_REISER4_VFS_OPS_H__ ++ ++#include "forward.h" ++#include "coord.h" ++#include "seal.h" ++#include "plugin/file/file.h" ++#include "super.h" ++#include "readahead.h" ++ ++#include /* for loff_t */ ++#include /* for struct address_space */ ++#include /* for struct dentry */ ++#include ++#include ++ ++/* address space operations */ ++int reiser4_writepage(struct page *, struct writeback_control *); ++int reiser4_set_page_dirty(struct page *); ++void reiser4_invalidatepage(struct page *, unsigned long offset); ++int reiser4_releasepage(struct page *, gfp_t); ++ ++extern int reiser4_update_sd(struct inode *); ++extern int reiser4_add_nlink(struct inode *, struct inode *, int); ++extern int reiser4_del_nlink(struct inode *, struct inode *, int); ++ ++extern int reiser4_start_up_io(struct page *page); ++extern void reiser4_throttle_write(struct inode *); ++extern int jnode_is_releasable(jnode *); ++ ++#define CAPTURE_APAGE_BURST (1024l) ++void reiser4_writeout(struct super_block *, struct writeback_control *); ++ ++extern void reiser4_handle_error(void); ++ ++/* __FS_REISER4_VFS_OPS_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/wander.c b/fs/reiser4/wander.c +new file mode 100644 +index 0000000..6d1d1d9 +--- /dev/null ++++ b/fs/reiser4/wander.c +@@ -0,0 +1,1797 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Reiser4 Wandering Log */ ++ ++/* You should read http://www.namesys.com/txn-doc.html ++ ++ That describes how filesystem operations are performed as atomic ++ transactions, and how we try to arrange it so that we can write most of the ++ data only once while performing the operation atomically. ++ ++ For the purposes of this code, it is enough for it to understand that it ++ has been told a given block should be written either once, or twice (if ++ twice then once to the wandered location and once to the real location). ++ ++ This code guarantees that those blocks that are defined to be part of an ++ atom either all take effect or none of them take effect. ++ ++ Relocate set nodes are submitted to write by the jnode_flush() routine, and ++ the overwrite set is submitted by reiser4_write_log(). This is because with ++ the overwrite set we seek to optimize writes, and with the relocate set we ++ seek to cause disk order to correlate with the parent first pre-order. ++ ++ reiser4_write_log() allocates and writes wandered blocks and maintains ++ additional on-disk structures of the atom as wander records (each wander ++ record occupies one block) for storing of the "wandered map" (a table which ++ contains a relation between wandered and real block numbers) and other ++ information which might be needed at transaction recovery time. ++ ++ The wander records are unidirectionally linked into a circle: each wander ++ record contains a block number of the next wander record, the last wander ++ record points to the first one. ++ ++ One wander record (named "tx head" in this file) has a format which is ++ different from the other wander records. The "tx head" has a reference to the ++ "tx head" block of the previously committed atom. Also, "tx head" contains ++ fs information (the free blocks counter, and the oid allocator state) which ++ is logged in a special way . ++ ++ There are two journal control blocks, named journal header and journal ++ footer which have fixed on-disk locations. The journal header has a ++ reference to the "tx head" block of the last committed atom. The journal ++ footer points to the "tx head" of the last flushed atom. The atom is ++ "played" when all blocks from its overwrite set are written to disk the ++ second time (i.e. written to their real locations). ++ ++ NOTE: People who know reiserfs internals and its journal structure might be ++ confused with these terms journal footer and journal header. There is a table ++ with terms of similar semantics in reiserfs (reiser3) and reiser4: ++ ++ REISER3 TERM | REISER4 TERM | DESCRIPTION ++ --------------------+-----------------------+---------------------------- ++ commit record | journal header | atomic write of this record ++ | | ends transaction commit ++ --------------------+-----------------------+---------------------------- ++ journal header | journal footer | atomic write of this record ++ | | ends post-commit writes. ++ | | After successful ++ | | writing of this journal ++ | | blocks (in reiser3) or ++ | | wandered blocks/records are ++ | | free for re-use. ++ --------------------+-----------------------+---------------------------- ++ ++ The atom commit process is the following: ++ ++ 1. The overwrite set is taken from atom's clean list, and its size is ++ counted. ++ ++ 2. The number of necessary wander records (including tx head) is calculated, ++ and the wander record blocks are allocated. ++ ++ 3. Allocate wandered blocks and populate wander records by wandered map. ++ ++ 4. submit write requests for wander records and wandered blocks. ++ ++ 5. wait until submitted write requests complete. ++ ++ 6. update journal header: change the pointer to the block number of just ++ written tx head, submit an i/o for modified journal header block and wait ++ for i/o completion. ++ ++ NOTE: The special logging for bitmap blocks and some reiser4 super block ++ fields makes processes of atom commit, flush and recovering a bit more ++ complex (see comments in the source code for details). ++ ++ The atom playing process is the following: ++ ++ 1. Write atom's overwrite set in-place. ++ ++ 2. Wait on i/o. ++ ++ 3. Update journal footer: change the pointer to block number of tx head ++ block of the atom we currently flushing, submit an i/o, wait on i/o ++ completion. ++ ++ 4. Free disk space which was used for wandered blocks and wander records. ++ ++ After the freeing of wandered blocks and wander records we have that journal ++ footer points to the on-disk structure which might be overwritten soon. ++ Neither the log writer nor the journal recovery procedure use that pointer ++ for accessing the data. When the journal recovery procedure finds the oldest ++ transaction it compares the journal footer pointer value with the "prev_tx" ++ pointer value in tx head, if values are equal the oldest not flushed ++ transaction is found. ++ ++ NOTE on disk space leakage: the information about of what blocks and how many ++ blocks are allocated for wandered blocks, wandered records is not written to ++ the disk because of special logging for bitmaps and some super blocks ++ counters. After a system crash we the reiser4 does not remember those ++ objects allocation, thus we have no such a kind of disk space leakage. ++*/ ++ ++/* Special logging of reiser4 super block fields. */ ++ ++/* There are some reiser4 super block fields (free block count and OID allocator ++ state (number of files and next free OID) which are logged separately from ++ super block to avoid unnecessary atom fusion. ++ ++ So, the reiser4 super block can be not captured by a transaction with ++ allocates/deallocates disk blocks or create/delete file objects. Moreover, ++ the reiser4 on-disk super block is not touched when such a transaction is ++ committed and flushed. Those "counters logged specially" are logged in "tx ++ head" blocks and in the journal footer block. ++ ++ A step-by-step description of special logging: ++ ++ 0. The per-atom information about deleted or created files and allocated or ++ freed blocks is collected during the transaction. The atom's ++ ->nr_objects_created and ->nr_objects_deleted are for object ++ deletion/creation tracking, the numbers of allocated and freed blocks are ++ calculated using atom's delete set and atom's capture list -- all new and ++ relocated nodes should be on atom's clean list and should have JNODE_RELOC ++ bit set. ++ ++ 1. The "logged specially" reiser4 super block fields have their "committed" ++ versions in the reiser4 in-memory super block. They get modified only at ++ atom commit time. The atom's commit thread has an exclusive access to those ++ "committed" fields because the log writer implementation supports only one ++ atom commit a time (there is a per-fs "commit" mutex). At ++ that time "committed" counters are modified using per-atom information ++ collected during the transaction. These counters are stored on disk as a ++ part of tx head block when atom is committed. ++ ++ 2. When the atom is flushed the value of the free block counter and the OID ++ allocator state get written to the journal footer block. A special journal ++ procedure (journal_recover_sb_data()) takes those values from the journal ++ footer and updates the reiser4 in-memory super block. ++ ++ NOTE: That means free block count and OID allocator state are logged ++ separately from the reiser4 super block regardless of the fact that the ++ reiser4 super block has fields to store both the free block counter and the ++ OID allocator. ++ ++ Writing the whole super block at commit time requires knowing true values of ++ all its fields without changes made by not yet committed transactions. It is ++ possible by having their "committed" version of the super block like the ++ reiser4 bitmap blocks have "committed" and "working" versions. However, ++ another scheme was implemented which stores special logged values in the ++ unused free space inside transaction head block. In my opinion it has an ++ advantage of not writing whole super block when only part of it was ++ modified. */ ++ ++#include "debug.h" ++#include "dformat.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "page_cache.h" ++#include "wander.h" ++#include "reiser4.h" ++#include "super.h" ++#include "vfs_ops.h" ++#include "writeout.h" ++#include "inode.h" ++#include "entd.h" ++ ++#include ++#include /* for struct super_block */ ++#include /* for struct page */ ++#include ++#include /* for struct bio */ ++#include ++ ++static int write_jnodes_to_disk_extent( ++ jnode *, int, const reiser4_block_nr *, flush_queue_t *, int); ++ ++/* The commit_handle is a container for objects needed at atom commit time */ ++struct commit_handle { ++ /* A pointer to atom's list of OVRWR nodes */ ++ struct list_head *overwrite_set; ++ /* atom's overwrite set size */ ++ int overwrite_set_size; ++ /* jnodes for wander record blocks */ ++ struct list_head tx_list; ++ /* number of wander records */ ++ __u32 tx_size; ++ /* 'committed' sb counters are saved here until atom is completely ++ flushed */ ++ __u64 free_blocks; ++ __u64 nr_files; ++ __u64 next_oid; ++ /* A pointer to the atom which is being committed */ ++ txn_atom *atom; ++ /* A pointer to current super block */ ++ struct super_block *super; ++ /* The counter of modified bitmaps */ ++ reiser4_block_nr nr_bitmap; ++}; ++ ++static void init_commit_handle(struct commit_handle *ch, txn_atom *atom) ++{ ++ memset(ch, 0, sizeof(struct commit_handle)); ++ INIT_LIST_HEAD(&ch->tx_list); ++ ++ ch->atom = atom; ++ ch->super = reiser4_get_current_sb(); ++} ++ ++static void done_commit_handle(struct commit_handle *ch) ++{ ++ assert("zam-690", list_empty(&ch->tx_list)); ++} ++ ++static inline int reiser4_use_write_barrier(struct super_block * s) ++{ ++ return !reiser4_is_set(s, REISER4_NO_WRITE_BARRIER); ++} ++ ++static void disable_write_barrier(struct super_block * s) ++{ ++ notice("zam-1055", "%s does not support write barriers," ++ " using synchronous write instead.", s->s_id); ++ set_bit((int)REISER4_NO_WRITE_BARRIER, &get_super_private(s)->fs_flags); ++} ++ ++/* fill journal header block data */ ++static void format_journal_header(struct commit_handle *ch) ++{ ++ struct reiser4_super_info_data *sbinfo; ++ struct journal_header *header; ++ jnode *txhead; ++ ++ sbinfo = get_super_private(ch->super); ++ assert("zam-479", sbinfo != NULL); ++ assert("zam-480", sbinfo->journal_header != NULL); ++ ++ txhead = list_entry(ch->tx_list.next, jnode, capture_link); ++ ++ jload(sbinfo->journal_header); ++ ++ header = (struct journal_header *)jdata(sbinfo->journal_header); ++ assert("zam-484", header != NULL); ++ ++ put_unaligned(cpu_to_le64(*jnode_get_block(txhead)), ++ &header->last_committed_tx); ++ ++ jrelse(sbinfo->journal_header); ++} ++ ++/* fill journal footer block data */ ++static void format_journal_footer(struct commit_handle *ch) ++{ ++ struct reiser4_super_info_data *sbinfo; ++ struct journal_footer *footer; ++ jnode *tx_head; ++ ++ sbinfo = get_super_private(ch->super); ++ ++ tx_head = list_entry(ch->tx_list.next, jnode, capture_link); ++ ++ assert("zam-493", sbinfo != NULL); ++ assert("zam-494", sbinfo->journal_header != NULL); ++ ++ check_me("zam-691", jload(sbinfo->journal_footer) == 0); ++ ++ footer = (struct journal_footer *)jdata(sbinfo->journal_footer); ++ assert("zam-495", footer != NULL); ++ ++ put_unaligned(cpu_to_le64(*jnode_get_block(tx_head)), ++ &footer->last_flushed_tx); ++ put_unaligned(cpu_to_le64(ch->free_blocks), &footer->free_blocks); ++ ++ put_unaligned(cpu_to_le64(ch->nr_files), &footer->nr_files); ++ put_unaligned(cpu_to_le64(ch->next_oid), &footer->next_oid); ++ ++ jrelse(sbinfo->journal_footer); ++} ++ ++/* wander record capacity depends on current block size */ ++static int wander_record_capacity(const struct super_block *super) ++{ ++ return (super->s_blocksize - ++ sizeof(struct wander_record_header)) / ++ sizeof(struct wander_entry); ++} ++ ++/* Fill first wander record (tx head) in accordance with supplied given data */ ++static void format_tx_head(struct commit_handle *ch) ++{ ++ jnode *tx_head; ++ jnode *next; ++ struct tx_header *header; ++ ++ tx_head = list_entry(ch->tx_list.next, jnode, capture_link); ++ assert("zam-692", &ch->tx_list != &tx_head->capture_link); ++ ++ next = list_entry(tx_head->capture_link.next, jnode, capture_link); ++ if (&ch->tx_list == &next->capture_link) ++ next = tx_head; ++ ++ header = (struct tx_header *)jdata(tx_head); ++ ++ assert("zam-460", header != NULL); ++ assert("zam-462", ch->super->s_blocksize >= sizeof(struct tx_header)); ++ ++ memset(jdata(tx_head), 0, (size_t) ch->super->s_blocksize); ++ memcpy(jdata(tx_head), TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE); ++ ++ put_unaligned(cpu_to_le32(ch->tx_size), &header->total); ++ put_unaligned(cpu_to_le64(get_super_private(ch->super)->last_committed_tx), ++ &header->prev_tx); ++ put_unaligned(cpu_to_le64(*jnode_get_block(next)), &header->next_block); ++ put_unaligned(cpu_to_le64(ch->free_blocks), &header->free_blocks); ++ put_unaligned(cpu_to_le64(ch->nr_files), &header->nr_files); ++ put_unaligned(cpu_to_le64(ch->next_oid), &header->next_oid); ++} ++ ++/* prepare ordinary wander record block (fill all service fields) */ ++static void ++format_wander_record(struct commit_handle *ch, jnode *node, __u32 serial) ++{ ++ struct wander_record_header *LRH; ++ jnode *next; ++ ++ assert("zam-464", node != NULL); ++ ++ LRH = (struct wander_record_header *)jdata(node); ++ next = list_entry(node->capture_link.next, jnode, capture_link); ++ ++ if (&ch->tx_list == &next->capture_link) ++ next = list_entry(ch->tx_list.next, jnode, capture_link); ++ ++ assert("zam-465", LRH != NULL); ++ assert("zam-463", ++ ch->super->s_blocksize > sizeof(struct wander_record_header)); ++ ++ memset(jdata(node), 0, (size_t) ch->super->s_blocksize); ++ memcpy(jdata(node), WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE); ++ ++ put_unaligned(cpu_to_le32(ch->tx_size), &LRH->total); ++ put_unaligned(cpu_to_le32(serial), &LRH->serial); ++ put_unaligned(cpu_to_le64(*jnode_get_block(next)), &LRH->next_block); ++} ++ ++/* add one wandered map entry to formatted wander record */ ++static void ++store_entry(jnode * node, int index, const reiser4_block_nr * a, ++ const reiser4_block_nr * b) ++{ ++ char *data; ++ struct wander_entry *pairs; ++ ++ data = jdata(node); ++ assert("zam-451", data != NULL); ++ ++ pairs = ++ (struct wander_entry *)(data + sizeof(struct wander_record_header)); ++ ++ put_unaligned(cpu_to_le64(*a), &pairs[index].original); ++ put_unaligned(cpu_to_le64(*b), &pairs[index].wandered); ++} ++ ++/* currently, wander records contains contain only wandered map, which depend on ++ overwrite set size */ ++static void get_tx_size(struct commit_handle *ch) ++{ ++ assert("zam-440", ch->overwrite_set_size != 0); ++ assert("zam-695", ch->tx_size == 0); ++ ++ /* count all ordinary wander records ++ ( - 1) / + 1 and add one ++ for tx head block */ ++ ch->tx_size = ++ (ch->overwrite_set_size - 1) / wander_record_capacity(ch->super) + ++ 2; ++} ++ ++/* A special structure for using in store_wmap_actor() for saving its state ++ between calls */ ++struct store_wmap_params { ++ jnode *cur; /* jnode of current wander record to fill */ ++ int idx; /* free element index in wander record */ ++ int capacity; /* capacity */ ++ ++#if REISER4_DEBUG ++ struct list_head *tx_list; ++#endif ++}; ++ ++/* an actor for use in blocknr_set_iterator routine which populates the list ++ of pre-formatted wander records by wandered map info */ ++static int ++store_wmap_actor(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a, ++ const reiser4_block_nr * b, void *data) ++{ ++ struct store_wmap_params *params = data; ++ ++ if (params->idx >= params->capacity) { ++ /* a new wander record should be taken from the tx_list */ ++ params->cur = list_entry(params->cur->capture_link.next, jnode, capture_link); ++ assert("zam-454", ++ params->tx_list != ¶ms->cur->capture_link); ++ ++ params->idx = 0; ++ } ++ ++ store_entry(params->cur, params->idx, a, b); ++ params->idx++; ++ ++ return 0; ++} ++ ++/* This function is called after Relocate set gets written to disk, Overwrite ++ set is written to wandered locations and all wander records are written ++ also. Updated journal header blocks contains a pointer (block number) to ++ first wander record of the just written transaction */ ++static int update_journal_header(struct commit_handle *ch, int use_barrier) ++{ ++ struct reiser4_super_info_data *sbinfo = get_super_private(ch->super); ++ jnode *jh = sbinfo->journal_header; ++ jnode *head = list_entry(ch->tx_list.next, jnode, capture_link); ++ int ret; ++ ++ format_journal_header(ch); ++ ++ ret = write_jnodes_to_disk_extent(jh, 1, jnode_get_block(jh), NULL, ++ use_barrier ? WRITEOUT_BARRIER : 0); ++ if (ret) ++ return ret; ++ ++ // blk_run_address_space(sbinfo->fake->i_mapping); ++ /*blk_run_queues(); */ ++ ++ ret = jwait_io(jh, WRITE); ++ ++ if (ret) ++ return ret; ++ ++ sbinfo->last_committed_tx = *jnode_get_block(head); ++ ++ return 0; ++} ++ ++/* This function is called after write-back is finished. We update journal ++ footer block and free blocks which were occupied by wandered blocks and ++ transaction wander records */ ++static int update_journal_footer(struct commit_handle *ch, int use_barrier) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(ch->super); ++ ++ jnode *jf = sbinfo->journal_footer; ++ ++ int ret; ++ ++ format_journal_footer(ch); ++ ++ ret = write_jnodes_to_disk_extent(jf, 1, jnode_get_block(jf), NULL, ++ use_barrier ? WRITEOUT_BARRIER : 0); ++ if (ret) ++ return ret; ++ ++ // blk_run_address_space(sbinfo->fake->i_mapping); ++ /*blk_run_queue(); */ ++ ++ ret = jwait_io(jf, WRITE); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ ++/* free block numbers of wander records of already written in place transaction */ ++static void dealloc_tx_list(struct commit_handle *ch) ++{ ++ while (!list_empty(&ch->tx_list)) { ++ jnode *cur = list_entry(ch->tx_list.next, jnode, capture_link); ++ list_del(&cur->capture_link); ++ ON_DEBUG(INIT_LIST_HEAD(&cur->capture_link)); ++ reiser4_dealloc_block(jnode_get_block(cur), BLOCK_NOT_COUNTED, ++ BA_FORMATTED); ++ ++ unpin_jnode_data(cur); ++ reiser4_drop_io_head(cur); ++ } ++} ++ ++/* An actor for use in block_nr_iterator() routine which frees wandered blocks ++ from atom's overwrite set. */ ++static int ++dealloc_wmap_actor(txn_atom * atom UNUSED_ARG, ++ const reiser4_block_nr * a UNUSED_ARG, ++ const reiser4_block_nr * b, void *data UNUSED_ARG) ++{ ++ ++ assert("zam-499", b != NULL); ++ assert("zam-500", *b != 0); ++ assert("zam-501", !reiser4_blocknr_is_fake(b)); ++ ++ reiser4_dealloc_block(b, BLOCK_NOT_COUNTED, BA_FORMATTED); ++ return 0; ++} ++ ++/* free wandered block locations of already written in place transaction */ ++static void dealloc_wmap(struct commit_handle *ch) ++{ ++ assert("zam-696", ch->atom != NULL); ++ ++ blocknr_set_iterator(ch->atom, &ch->atom->wandered_map, ++ dealloc_wmap_actor, NULL, 1); ++} ++ ++/* helper function for alloc wandered blocks, which refill set of block ++ numbers needed for wandered blocks */ ++static int ++get_more_wandered_blocks(int count, reiser4_block_nr * start, int *len) ++{ ++ reiser4_blocknr_hint hint; ++ int ret; ++ ++ reiser4_block_nr wide_len = count; ++ ++ /* FIXME-ZAM: A special policy needed for allocation of wandered blocks ++ ZAM-FIXME-HANS: yes, what happened to our discussion of using a fixed ++ reserved allocation area so as to get the best qualities of fixed ++ journals? */ ++ reiser4_blocknr_hint_init(&hint); ++ hint.block_stage = BLOCK_GRABBED; ++ ++ ret = reiser4_alloc_blocks(&hint, start, &wide_len, ++ BA_FORMATTED | BA_USE_DEFAULT_SEARCH_START); ++ *len = (int)wide_len; ++ ++ return ret; ++} ++ ++/* ++ * roll back changes made before issuing BIO in the case of IO error. ++ */ ++static void undo_bio(struct bio *bio) ++{ ++ int i; ++ ++ for (i = 0; i < bio->bi_vcnt; ++i) { ++ struct page *pg; ++ jnode *node; ++ ++ pg = bio->bi_io_vec[i].bv_page; ++ end_page_writeback(pg); ++ node = jprivate(pg); ++ spin_lock_jnode(node); ++ JF_CLR(node, JNODE_WRITEBACK); ++ JF_SET(node, JNODE_DIRTY); ++ spin_unlock_jnode(node); ++ } ++ bio_put(bio); ++} ++ ++/* put overwrite set back to atom's clean list */ ++static void put_overwrite_set(struct commit_handle *ch) ++{ ++ jnode *cur; ++ ++ list_for_each_entry(cur, ch->overwrite_set, capture_link) ++ jrelse_tail(cur); ++} ++ ++/* Count overwrite set size, grab disk space for wandered blocks allocation. ++ Since we have a separate list for atom's overwrite set we just scan the list, ++ count bitmap and other not leaf nodes which wandered blocks allocation we ++ have to grab space for. */ ++static int get_overwrite_set(struct commit_handle *ch) ++{ ++ int ret; ++ jnode *cur; ++ __u64 nr_not_leaves = 0; ++#if REISER4_DEBUG ++ __u64 nr_formatted_leaves = 0; ++ __u64 nr_unformatted_leaves = 0; ++#endif ++ ++ assert("zam-697", ch->overwrite_set_size == 0); ++ ++ ch->overwrite_set = ATOM_OVRWR_LIST(ch->atom); ++ cur = list_entry(ch->overwrite_set->next, jnode, capture_link); ++ ++ while (ch->overwrite_set != &cur->capture_link) { ++ jnode *next = list_entry(cur->capture_link.next, jnode, capture_link); ++ ++ /* Count bitmap locks for getting correct statistics what number ++ * of blocks were cleared by the transaction commit. */ ++ if (jnode_get_type(cur) == JNODE_BITMAP) ++ ch->nr_bitmap++; ++ ++ assert("zam-939", JF_ISSET(cur, JNODE_OVRWR) ++ || jnode_get_type(cur) == JNODE_BITMAP); ++ ++ if (jnode_is_znode(cur) && znode_above_root(JZNODE(cur))) { ++ /* we replace fake znode by another (real) ++ znode which is suggested by disk_layout ++ plugin */ ++ ++ /* FIXME: it looks like fake znode should be ++ replaced by jnode supplied by ++ disk_layout. */ ++ ++ struct super_block *s = reiser4_get_current_sb(); ++ reiser4_super_info_data *sbinfo = ++ get_current_super_private(); ++ ++ if (sbinfo->df_plug->log_super) { ++ jnode *sj = sbinfo->df_plug->log_super(s); ++ ++ assert("zam-593", sj != NULL); ++ ++ if (IS_ERR(sj)) ++ return PTR_ERR(sj); ++ ++ spin_lock_jnode(sj); ++ JF_SET(sj, JNODE_OVRWR); ++ insert_into_atom_ovrwr_list(ch->atom, sj); ++ spin_unlock_jnode(sj); ++ ++ /* jload it as the rest of overwrite set */ ++ jload_gfp(sj, reiser4_ctx_gfp_mask_get(), 0); ++ ++ ch->overwrite_set_size++; ++ } ++ spin_lock_jnode(cur); ++ reiser4_uncapture_block(cur); ++ jput(cur); ++ ++ } else { ++ int ret; ++ ch->overwrite_set_size++; ++ ret = jload_gfp(cur, reiser4_ctx_gfp_mask_get(), 0); ++ if (ret) ++ reiser4_panic("zam-783", ++ "cannot load e-flushed jnode back (ret = %d)\n", ++ ret); ++ } ++ ++ /* Count not leaves here because we have to grab disk space ++ * for wandered blocks. They were not counted as "flush ++ * reserved". Counting should be done _after_ nodes are pinned ++ * into memory by jload(). */ ++ if (!jnode_is_leaf(cur)) ++ nr_not_leaves++; ++ else { ++#if REISER4_DEBUG ++ /* at this point @cur either has JNODE_FLUSH_RESERVED ++ * or is eflushed. Locking is not strong enough to ++ * write an assertion checking for this. */ ++ if (jnode_is_znode(cur)) ++ nr_formatted_leaves++; ++ else ++ nr_unformatted_leaves++; ++#endif ++ JF_CLR(cur, JNODE_FLUSH_RESERVED); ++ } ++ ++ cur = next; ++ } ++ ++ /* Grab space for writing (wandered blocks) of not leaves found in ++ * overwrite set. */ ++ ret = reiser4_grab_space_force(nr_not_leaves, BA_RESERVED); ++ if (ret) ++ return ret; ++ ++ /* Disk space for allocation of wandered blocks of leaf nodes already ++ * reserved as "flush reserved", move it to grabbed space counter. */ ++ spin_lock_atom(ch->atom); ++ assert("zam-940", ++ nr_formatted_leaves + nr_unformatted_leaves <= ++ ch->atom->flush_reserved); ++ flush_reserved2grabbed(ch->atom, ch->atom->flush_reserved); ++ spin_unlock_atom(ch->atom); ++ ++ return ch->overwrite_set_size; ++} ++ ++/** ++ * write_jnodes_to_disk_extent - submit write request ++ * @head: ++ * @first: first jnode of the list ++ * @nr: number of jnodes on the list ++ * @block_p: ++ * @fq: ++ * @flags: used to decide whether page is to get PG_reclaim flag ++ * ++ * Submits a write request for @nr jnodes beginning from the @first, other ++ * jnodes are after the @first on the double-linked "capture" list. All jnodes ++ * will be written to the disk region of @nr blocks starting with @block_p block ++ * number. If @fq is not NULL it means that waiting for i/o completion will be ++ * done more efficiently by using flush_queue_t objects. ++ * This function is the one which writes list of jnodes in batch mode. It does ++ * all low-level things as bio construction and page states manipulation. ++ * ++ * ZAM-FIXME-HANS: brief me on why this function exists, and why bios are ++ * aggregated in this function instead of being left to the layers below ++ * ++ * FIXME: ZAM->HANS: What layer are you talking about? Can you point me to that? ++ * Why that layer needed? Why BIOs cannot be constructed here? ++ */ ++static int write_jnodes_to_disk_extent( ++ jnode *first, int nr, const reiser4_block_nr *block_p, ++ flush_queue_t *fq, int flags) ++{ ++ struct super_block *super = reiser4_get_current_sb(); ++ int write_op = ( flags & WRITEOUT_BARRIER ) ? WRITE_BARRIER : WRITE; ++ int max_blocks; ++ jnode *cur = first; ++ reiser4_block_nr block; ++ ++ assert("zam-571", first != NULL); ++ assert("zam-572", block_p != NULL); ++ assert("zam-570", nr > 0); ++ ++ block = *block_p; ++ max_blocks = min(bio_get_nr_vecs(super->s_bdev), BIO_MAX_PAGES); ++ ++ while (nr > 0) { ++ struct bio *bio; ++ int nr_blocks = min(nr, max_blocks); ++ int i; ++ int nr_used; ++ ++ bio = bio_alloc(GFP_NOIO, nr_blocks); ++ if (!bio) ++ return RETERR(-ENOMEM); ++ ++ bio->bi_bdev = super->s_bdev; ++ bio->bi_sector = block * (super->s_blocksize >> 9); ++ for (nr_used = 0, i = 0; i < nr_blocks; i++) { ++ struct page *pg; ++ ++ pg = jnode_page(cur); ++ assert("zam-573", pg != NULL); ++ ++ page_cache_get(pg); ++ ++ lock_and_wait_page_writeback(pg); ++ ++ if (!bio_add_page(bio, pg, super->s_blocksize, 0)) { ++ /* ++ * underlying device is satiated. Stop adding ++ * pages to the bio. ++ */ ++ unlock_page(pg); ++ page_cache_release(pg); ++ break; ++ } ++ ++ spin_lock_jnode(cur); ++ assert("nikita-3166", ++ pg->mapping == jnode_get_mapping(cur)); ++ assert("zam-912", !JF_ISSET(cur, JNODE_WRITEBACK)); ++#if REISER4_DEBUG ++ spin_lock(&cur->load); ++ assert("nikita-3165", !jnode_is_releasable(cur)); ++ spin_unlock(&cur->load); ++#endif ++ JF_SET(cur, JNODE_WRITEBACK); ++ JF_CLR(cur, JNODE_DIRTY); ++ ON_DEBUG(cur->written++); ++ spin_unlock_jnode(cur); ++ ++ ClearPageError(pg); ++ set_page_writeback(pg); ++ ++ if (get_current_context()->entd) { ++ /* this is ent thread */ ++ entd_context *ent = get_entd_context(super); ++ struct wbq *rq, *next; ++ ++ spin_lock(&ent->guard); ++ ++ if (pg == ent->cur_request->page) { ++ /* ++ * entd is called for this page. This ++ * request is not in th etodo list ++ */ ++ ent->cur_request->written = 1; ++ } else { ++ /* ++ * if we have written a page for which writepage ++ * is called for - move request to another list. ++ */ ++ list_for_each_entry_safe(rq, next, &ent->todo_list, link) { ++ assert("", rq->magic == WBQ_MAGIC); ++ if (pg == rq->page) { ++ /* ++ * remove request from ++ * entd's queue, but do ++ * not wake up a thread ++ * which put this ++ * request ++ */ ++ list_del_init(&rq->link); ++ ent->nr_todo_reqs --; ++ list_add_tail(&rq->link, &ent->done_list); ++ ent->nr_done_reqs ++; ++ rq->written = 1; ++ break; ++ } ++ } ++ } ++ spin_unlock(&ent->guard); ++ } ++ ++ clear_page_dirty_for_io(pg); ++ ++ unlock_page(pg); ++ ++ cur = list_entry(cur->capture_link.next, jnode, capture_link); ++ nr_used++; ++ } ++ if (nr_used > 0) { ++ assert("nikita-3453", ++ bio->bi_size == super->s_blocksize * nr_used); ++ assert("nikita-3454", bio->bi_vcnt == nr_used); ++ ++ /* Check if we are allowed to write at all */ ++ if (super->s_flags & MS_RDONLY) ++ undo_bio(bio); ++ else { ++ int not_supported; ++ ++ add_fq_to_bio(fq, bio); ++ bio_get(bio); ++ reiser4_submit_bio(write_op, bio); ++ not_supported = bio_flagged(bio, BIO_EOPNOTSUPP); ++ bio_put(bio); ++ if (not_supported) ++ return -EOPNOTSUPP; ++ } ++ ++ block += nr_used - 1; ++ update_blocknr_hint_default(super, &block); ++ block += 1; ++ } else { ++ bio_put(bio); ++ } ++ nr -= nr_used; ++ } ++ ++ return 0; ++} ++ ++/* This is a procedure which recovers a contiguous sequences of disk block ++ numbers in the given list of j-nodes and submits write requests on this ++ per-sequence basis */ ++int ++write_jnode_list(struct list_head *head, flush_queue_t *fq, ++ long *nr_submitted, int flags) ++{ ++ int ret; ++ jnode *beg = list_entry(head->next, jnode, capture_link); ++ ++ while (head != &beg->capture_link) { ++ int nr = 1; ++ jnode *cur = list_entry(beg->capture_link.next, jnode, capture_link); ++ ++ while (head != &cur->capture_link) { ++ if (*jnode_get_block(cur) != *jnode_get_block(beg) + nr) ++ break; ++ ++nr; ++ cur = list_entry(cur->capture_link.next, jnode, capture_link); ++ } ++ ++ ret = write_jnodes_to_disk_extent( ++ beg, nr, jnode_get_block(beg), fq, flags); ++ if (ret) ++ return ret; ++ ++ if (nr_submitted) ++ *nr_submitted += nr; ++ ++ beg = cur; ++ } ++ ++ return 0; ++} ++ ++/* add given wandered mapping to atom's wandered map */ ++static int ++add_region_to_wmap(jnode * cur, int len, const reiser4_block_nr * block_p) ++{ ++ int ret; ++ blocknr_set_entry *new_bsep = NULL; ++ reiser4_block_nr block; ++ ++ txn_atom *atom; ++ ++ assert("zam-568", block_p != NULL); ++ block = *block_p; ++ assert("zam-569", len > 0); ++ ++ while ((len--) > 0) { ++ do { ++ atom = get_current_atom_locked(); ++ assert("zam-536", ++ !reiser4_blocknr_is_fake(jnode_get_block(cur))); ++ ret = ++ blocknr_set_add_pair(atom, &atom->wandered_map, ++ &new_bsep, ++ jnode_get_block(cur), &block); ++ } while (ret == -E_REPEAT); ++ ++ if (ret) { ++ /* deallocate blocks which were not added to wandered ++ map */ ++ reiser4_block_nr wide_len = len; ++ ++ reiser4_dealloc_blocks(&block, &wide_len, ++ BLOCK_NOT_COUNTED, ++ BA_FORMATTED ++ /* formatted, without defer */ ); ++ ++ return ret; ++ } ++ ++ spin_unlock_atom(atom); ++ ++ cur = list_entry(cur->capture_link.next, jnode, capture_link); ++ ++block; ++ } ++ ++ return 0; ++} ++ ++/* Allocate wandered blocks for current atom's OVERWRITE SET and immediately ++ submit IO for allocated blocks. We assume that current atom is in a stage ++ when any atom fusion is impossible and atom is unlocked and it is safe. */ ++static int alloc_wandered_blocks(struct commit_handle *ch, flush_queue_t *fq) ++{ ++ reiser4_block_nr block; ++ ++ int rest; ++ int len; ++ int ret; ++ ++ jnode *cur; ++ ++ assert("zam-534", ch->overwrite_set_size > 0); ++ ++ rest = ch->overwrite_set_size; ++ ++ cur = list_entry(ch->overwrite_set->next, jnode, capture_link); ++ while (ch->overwrite_set != &cur->capture_link) { ++ assert("zam-567", JF_ISSET(cur, JNODE_OVRWR)); ++ ++ ret = get_more_wandered_blocks(rest, &block, &len); ++ if (ret) ++ return ret; ++ ++ rest -= len; ++ ++ ret = add_region_to_wmap(cur, len, &block); ++ if (ret) ++ return ret; ++ ++ ret = write_jnodes_to_disk_extent(cur, len, &block, fq, 0); ++ if (ret) ++ return ret; ++ ++ while ((len--) > 0) { ++ assert("zam-604", ++ ch->overwrite_set != &cur->capture_link); ++ cur = list_entry(cur->capture_link.next, jnode, capture_link); ++ } ++ } ++ ++ return 0; ++} ++ ++/* allocate given number of nodes over the journal area and link them into a ++ list, return pointer to the first jnode in the list */ ++static int alloc_tx(struct commit_handle *ch, flush_queue_t * fq) ++{ ++ reiser4_blocknr_hint hint; ++ reiser4_block_nr allocated = 0; ++ reiser4_block_nr first, len; ++ jnode *cur; ++ jnode *txhead; ++ int ret; ++ reiser4_context *ctx; ++ reiser4_super_info_data *sbinfo; ++ ++ assert("zam-698", ch->tx_size > 0); ++ assert("zam-699", list_empty_careful(&ch->tx_list)); ++ ++ ctx = get_current_context(); ++ sbinfo = get_super_private(ctx->super); ++ ++ while (allocated < (unsigned)ch->tx_size) { ++ len = (ch->tx_size - allocated); ++ ++ reiser4_blocknr_hint_init(&hint); ++ ++ hint.block_stage = BLOCK_GRABBED; ++ ++ /* FIXME: there should be some block allocation policy for ++ nodes which contain wander records */ ++ ++ /* We assume that disk space for wandered record blocks can be ++ * taken from reserved area. */ ++ ret = reiser4_alloc_blocks(&hint, &first, &len, ++ BA_FORMATTED | BA_RESERVED | ++ BA_USE_DEFAULT_SEARCH_START); ++ reiser4_blocknr_hint_done(&hint); ++ ++ if (ret) ++ return ret; ++ ++ allocated += len; ++ ++ /* create jnodes for all wander records */ ++ while (len--) { ++ cur = reiser4_alloc_io_head(&first); ++ ++ if (cur == NULL) { ++ ret = RETERR(-ENOMEM); ++ goto free_not_assigned; ++ } ++ ++ ret = jinit_new(cur, reiser4_ctx_gfp_mask_get()); ++ ++ if (ret != 0) { ++ jfree(cur); ++ goto free_not_assigned; ++ } ++ ++ pin_jnode_data(cur); ++ ++ list_add_tail(&cur->capture_link, &ch->tx_list); ++ ++ first++; ++ } ++ } ++ ++ { /* format a on-disk linked list of wander records */ ++ int serial = 1; ++ ++ txhead = list_entry(ch->tx_list.next, jnode, capture_link); ++ format_tx_head(ch); ++ ++ cur = list_entry(txhead->capture_link.next, jnode, capture_link); ++ while (&ch->tx_list != &cur->capture_link) { ++ format_wander_record(ch, cur, serial++); ++ cur = list_entry(cur->capture_link.next, jnode, capture_link); ++ } ++ } ++ ++ { /* Fill wander records with Wandered Set */ ++ struct store_wmap_params params; ++ txn_atom *atom; ++ ++ params.cur = list_entry(txhead->capture_link.next, jnode, capture_link); ++ ++ params.idx = 0; ++ params.capacity = ++ wander_record_capacity(reiser4_get_current_sb()); ++ ++ atom = get_current_atom_locked(); ++ blocknr_set_iterator(atom, &atom->wandered_map, ++ &store_wmap_actor, ¶ms, 0); ++ spin_unlock_atom(atom); ++ } ++ ++ { /* relse all jnodes from tx_list */ ++ cur = list_entry(ch->tx_list.next, jnode, capture_link); ++ while (&ch->tx_list != &cur->capture_link) { ++ jrelse(cur); ++ cur = list_entry(cur->capture_link.next, jnode, capture_link); ++ } ++ } ++ ++ ret = write_jnode_list(&ch->tx_list, fq, NULL, 0); ++ ++ return ret; ++ ++ free_not_assigned: ++ /* We deallocate blocks not yet assigned to jnodes on tx_list. The ++ caller takes care about invalidating of tx list */ ++ reiser4_dealloc_blocks(&first, &len, BLOCK_NOT_COUNTED, BA_FORMATTED); ++ ++ return ret; ++} ++ ++static int commit_tx(struct commit_handle *ch) ++{ ++ flush_queue_t *fq; ++ int barrier; ++ int ret; ++ ++ /* Grab more space for wandered records. */ ++ ret = reiser4_grab_space_force((__u64) (ch->tx_size), BA_RESERVED); ++ if (ret) ++ return ret; ++ ++ fq = get_fq_for_current_atom(); ++ if (IS_ERR(fq)) ++ return PTR_ERR(fq); ++ ++ spin_unlock_atom(fq->atom); ++ do { ++ ret = alloc_wandered_blocks(ch, fq); ++ if (ret) ++ break; ++ ret = alloc_tx(ch, fq); ++ if (ret) ++ break; ++ } while (0); ++ ++ reiser4_fq_put(fq); ++ if (ret) ++ return ret; ++ repeat_wo_barrier: ++ barrier = reiser4_use_write_barrier(ch->super); ++ if (!barrier) { ++ ret = current_atom_finish_all_fq(); ++ if (ret) ++ return ret; ++ } ++ ret = update_journal_header(ch, barrier); ++ if (barrier) { ++ if (ret) { ++ if (ret == -EOPNOTSUPP) { ++ disable_write_barrier(ch->super); ++ goto repeat_wo_barrier; ++ } ++ return ret; ++ } ++ ret = current_atom_finish_all_fq(); ++ } ++ return ret; ++} ++ ++static int write_tx_back(struct commit_handle * ch) ++{ ++ flush_queue_t *fq; ++ int ret; ++ int barrier; ++ ++ reiser4_post_commit_hook(); ++ fq = get_fq_for_current_atom(); ++ if (IS_ERR(fq)) ++ return PTR_ERR(fq); ++ spin_unlock_atom(fq->atom); ++ ret = write_jnode_list( ++ ch->overwrite_set, fq, NULL, WRITEOUT_FOR_PAGE_RECLAIM); ++ reiser4_fq_put(fq); ++ if (ret) ++ return ret; ++ repeat_wo_barrier: ++ barrier = reiser4_use_write_barrier(ch->super); ++ if (!barrier) { ++ ret = current_atom_finish_all_fq(); ++ if (ret) ++ return ret; ++ } ++ ret = update_journal_footer(ch, barrier); ++ if (barrier) { ++ if (ret) { ++ if (ret == -EOPNOTSUPP) { ++ disable_write_barrier(ch->super); ++ goto repeat_wo_barrier; ++ } ++ return ret; ++ } ++ ret = current_atom_finish_all_fq(); ++ } ++ if (ret) ++ return ret; ++ reiser4_post_write_back_hook(); ++ return 0; ++} ++ ++/* We assume that at this moment all captured blocks are marked as RELOC or ++ WANDER (belong to Relocate o Overwrite set), all nodes from Relocate set ++ are submitted to write. ++*/ ++ ++int reiser4_write_logs(long *nr_submitted) ++{ ++ txn_atom *atom; ++ struct super_block *super = reiser4_get_current_sb(); ++ reiser4_super_info_data *sbinfo = get_super_private(super); ++ struct commit_handle ch; ++ int ret; ++ ++ writeout_mode_enable(); ++ ++ /* block allocator may add j-nodes to the clean_list */ ++ ret = reiser4_pre_commit_hook(); ++ if (ret) ++ return ret; ++ ++ /* No locks are required if we take atom which stage >= ++ * ASTAGE_PRE_COMMIT */ ++ atom = get_current_context()->trans->atom; ++ assert("zam-965", atom != NULL); ++ ++ /* relocate set is on the atom->clean_nodes list after ++ * current_atom_complete_writes() finishes. It can be safely ++ * uncaptured after commit_mutex is locked, because any atom that ++ * captures these nodes is guaranteed to commit after current one. ++ * ++ * This can only be done after reiser4_pre_commit_hook(), because it is where ++ * early flushed jnodes with CREATED bit are transferred to the ++ * overwrite list. */ ++ reiser4_invalidate_list(ATOM_CLEAN_LIST(atom)); ++ spin_lock_atom(atom); ++ /* There might be waiters for the relocate nodes which we have ++ * released, wake them up. */ ++ reiser4_atom_send_event(atom); ++ spin_unlock_atom(atom); ++ ++ if (REISER4_DEBUG) { ++ int level; ++ ++ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; ++level) ++ assert("nikita-3352", ++ list_empty_careful(ATOM_DIRTY_LIST(atom, level))); ++ } ++ ++ sbinfo->nr_files_committed += (unsigned)atom->nr_objects_created; ++ sbinfo->nr_files_committed -= (unsigned)atom->nr_objects_deleted; ++ ++ init_commit_handle(&ch, atom); ++ ++ ch.free_blocks = sbinfo->blocks_free_committed; ++ ch.nr_files = sbinfo->nr_files_committed; ++ /* ZAM-FIXME-HANS: email me what the contention level is for the super ++ * lock. */ ++ ch.next_oid = oid_next(super); ++ ++ /* count overwrite set and place it in a separate list */ ++ ret = get_overwrite_set(&ch); ++ ++ if (ret <= 0) { ++ /* It is possible that overwrite set is empty here, it means ++ all captured nodes are clean */ ++ goto up_and_ret; ++ } ++ ++ /* Inform the caller about what number of dirty pages will be ++ * submitted to disk. */ ++ *nr_submitted += ch.overwrite_set_size - ch.nr_bitmap; ++ ++ /* count all records needed for storing of the wandered set */ ++ get_tx_size(&ch); ++ ++ ret = commit_tx(&ch); ++ if (ret) ++ goto up_and_ret; ++ ++ spin_lock_atom(atom); ++ reiser4_atom_set_stage(atom, ASTAGE_POST_COMMIT); ++ spin_unlock_atom(atom); ++ ++ ret = write_tx_back(&ch); ++ reiser4_post_write_back_hook(); ++ ++ up_and_ret: ++ if (ret) { ++ /* there could be fq attached to current atom; the only way to ++ remove them is: */ ++ current_atom_finish_all_fq(); ++ } ++ ++ /* free blocks of flushed transaction */ ++ dealloc_tx_list(&ch); ++ dealloc_wmap(&ch); ++ ++ put_overwrite_set(&ch); ++ ++ done_commit_handle(&ch); ++ ++ writeout_mode_disable(); ++ ++ return ret; ++} ++ ++/* consistency checks for journal data/control blocks: header, footer, log ++ records, transactions head blocks. All functions return zero on success. */ ++ ++static int check_journal_header(const jnode * node UNUSED_ARG) ++{ ++ /* FIXME: journal header has no magic field yet. */ ++ return 0; ++} ++ ++/* wait for write completion for all jnodes from given list */ ++static int wait_on_jnode_list(struct list_head *head) ++{ ++ jnode *scan; ++ int ret = 0; ++ ++ list_for_each_entry(scan, head, capture_link) { ++ struct page *pg = jnode_page(scan); ++ ++ if (pg) { ++ if (PageWriteback(pg)) ++ wait_on_page_writeback(pg); ++ ++ if (PageError(pg)) ++ ret++; ++ } ++ } ++ ++ return ret; ++} ++ ++static int check_journal_footer(const jnode * node UNUSED_ARG) ++{ ++ /* FIXME: journal footer has no magic field yet. */ ++ return 0; ++} ++ ++static int check_tx_head(const jnode * node) ++{ ++ struct tx_header *header = (struct tx_header *)jdata(node); ++ ++ if (memcmp(&header->magic, TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE) != 0) { ++ warning("zam-627", "tx head at block %s corrupted\n", ++ sprint_address(jnode_get_block(node))); ++ return RETERR(-EIO); ++ } ++ ++ return 0; ++} ++ ++static int check_wander_record(const jnode * node) ++{ ++ struct wander_record_header *RH = ++ (struct wander_record_header *)jdata(node); ++ ++ if (memcmp(&RH->magic, WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE) != ++ 0) { ++ warning("zam-628", "wander record at block %s corrupted\n", ++ sprint_address(jnode_get_block(node))); ++ return RETERR(-EIO); ++ } ++ ++ return 0; ++} ++ ++/* fill commit_handler structure by everything what is needed for update_journal_footer */ ++static int restore_commit_handle(struct commit_handle *ch, jnode *tx_head) ++{ ++ struct tx_header *TXH; ++ int ret; ++ ++ ret = jload(tx_head); ++ if (ret) ++ return ret; ++ ++ TXH = (struct tx_header *)jdata(tx_head); ++ ++ ch->free_blocks = le64_to_cpu(get_unaligned(&TXH->free_blocks)); ++ ch->nr_files = le64_to_cpu(get_unaligned(&TXH->nr_files)); ++ ch->next_oid = le64_to_cpu(get_unaligned(&TXH->next_oid)); ++ ++ jrelse(tx_head); ++ ++ list_add(&tx_head->capture_link, &ch->tx_list); ++ ++ return 0; ++} ++ ++/* replay one transaction: restore and write overwrite set in place */ ++static int replay_transaction(const struct super_block *s, ++ jnode * tx_head, ++ const reiser4_block_nr * log_rec_block_p, ++ const reiser4_block_nr * end_block, ++ unsigned int nr_wander_records) ++{ ++ reiser4_block_nr log_rec_block = *log_rec_block_p; ++ struct commit_handle ch; ++ LIST_HEAD(overwrite_set); ++ jnode *log; ++ int ret; ++ ++ init_commit_handle(&ch, NULL); ++ ch.overwrite_set = &overwrite_set; ++ ++ restore_commit_handle(&ch, tx_head); ++ ++ while (log_rec_block != *end_block) { ++ struct wander_record_header *header; ++ struct wander_entry *entry; ++ ++ int i; ++ ++ if (nr_wander_records == 0) { ++ warning("zam-631", ++ "number of wander records in the linked list" ++ " greater than number stored in tx head.\n"); ++ ret = RETERR(-EIO); ++ goto free_ow_set; ++ } ++ ++ log = reiser4_alloc_io_head(&log_rec_block); ++ if (log == NULL) ++ return RETERR(-ENOMEM); ++ ++ ret = jload(log); ++ if (ret < 0) { ++ reiser4_drop_io_head(log); ++ return ret; ++ } ++ ++ ret = check_wander_record(log); ++ if (ret) { ++ jrelse(log); ++ reiser4_drop_io_head(log); ++ return ret; ++ } ++ ++ header = (struct wander_record_header *)jdata(log); ++ log_rec_block = le64_to_cpu(get_unaligned(&header->next_block)); ++ ++ entry = (struct wander_entry *)(header + 1); ++ ++ /* restore overwrite set from wander record content */ ++ for (i = 0; i < wander_record_capacity(s); i++) { ++ reiser4_block_nr block; ++ jnode *node; ++ ++ block = le64_to_cpu(get_unaligned(&entry->wandered)); ++ if (block == 0) ++ break; ++ ++ node = reiser4_alloc_io_head(&block); ++ if (node == NULL) { ++ ret = RETERR(-ENOMEM); ++ /* ++ * FIXME-VS:??? ++ */ ++ jrelse(log); ++ reiser4_drop_io_head(log); ++ goto free_ow_set; ++ } ++ ++ ret = jload(node); ++ ++ if (ret < 0) { ++ reiser4_drop_io_head(node); ++ /* ++ * FIXME-VS:??? ++ */ ++ jrelse(log); ++ reiser4_drop_io_head(log); ++ goto free_ow_set; ++ } ++ ++ block = le64_to_cpu(get_unaligned(&entry->original)); ++ ++ assert("zam-603", block != 0); ++ ++ jnode_set_block(node, &block); ++ ++ list_add_tail(&node->capture_link, ch.overwrite_set); ++ ++ ++entry; ++ } ++ ++ jrelse(log); ++ reiser4_drop_io_head(log); ++ ++ --nr_wander_records; ++ } ++ ++ if (nr_wander_records != 0) { ++ warning("zam-632", "number of wander records in the linked list" ++ " less than number stored in tx head.\n"); ++ ret = RETERR(-EIO); ++ goto free_ow_set; ++ } ++ ++ { /* write wandered set in place */ ++ write_jnode_list(ch.overwrite_set, NULL, NULL, 0); ++ ret = wait_on_jnode_list(ch.overwrite_set); ++ ++ if (ret) { ++ ret = RETERR(-EIO); ++ goto free_ow_set; ++ } ++ } ++ ++ ret = update_journal_footer(&ch, 0); ++ ++ free_ow_set: ++ ++ while (!list_empty(ch.overwrite_set)) { ++ jnode *cur = list_entry(ch.overwrite_set->next, jnode, capture_link); ++ list_del_init(&cur->capture_link); ++ jrelse(cur); ++ reiser4_drop_io_head(cur); ++ } ++ ++ list_del_init(&tx_head->capture_link); ++ ++ done_commit_handle(&ch); ++ ++ return ret; ++} ++ ++/* find oldest committed and not played transaction and play it. The transaction ++ * was committed and journal header block was updated but the blocks from the ++ * process of writing the atom's overwrite set in-place and updating of journal ++ * footer block were not completed. This function completes the process by ++ * recovering the atom's overwrite set from their wandered locations and writes ++ * them in-place and updating the journal footer. */ ++static int replay_oldest_transaction(struct super_block *s) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(s); ++ jnode *jf = sbinfo->journal_footer; ++ unsigned int total; ++ struct journal_footer *F; ++ struct tx_header *T; ++ ++ reiser4_block_nr prev_tx; ++ reiser4_block_nr last_flushed_tx; ++ reiser4_block_nr log_rec_block = 0; ++ ++ jnode *tx_head; ++ ++ int ret; ++ ++ if ((ret = jload(jf)) < 0) ++ return ret; ++ ++ F = (struct journal_footer *)jdata(jf); ++ ++ last_flushed_tx = le64_to_cpu(get_unaligned(&F->last_flushed_tx)); ++ ++ jrelse(jf); ++ ++ if (sbinfo->last_committed_tx == last_flushed_tx) { ++ /* all transactions are replayed */ ++ return 0; ++ } ++ ++ prev_tx = sbinfo->last_committed_tx; ++ ++ /* searching for oldest not flushed transaction */ ++ while (1) { ++ tx_head = reiser4_alloc_io_head(&prev_tx); ++ if (!tx_head) ++ return RETERR(-ENOMEM); ++ ++ ret = jload(tx_head); ++ if (ret < 0) { ++ reiser4_drop_io_head(tx_head); ++ return ret; ++ } ++ ++ ret = check_tx_head(tx_head); ++ if (ret) { ++ jrelse(tx_head); ++ reiser4_drop_io_head(tx_head); ++ return ret; ++ } ++ ++ T = (struct tx_header *)jdata(tx_head); ++ ++ prev_tx = le64_to_cpu(get_unaligned(&T->prev_tx)); ++ ++ if (prev_tx == last_flushed_tx) ++ break; ++ ++ jrelse(tx_head); ++ reiser4_drop_io_head(tx_head); ++ } ++ ++ total = le32_to_cpu(get_unaligned(&T->total)); ++ log_rec_block = le64_to_cpu(get_unaligned(&T->next_block)); ++ ++ pin_jnode_data(tx_head); ++ jrelse(tx_head); ++ ++ ret = ++ replay_transaction(s, tx_head, &log_rec_block, ++ jnode_get_block(tx_head), total - 1); ++ ++ unpin_jnode_data(tx_head); ++ reiser4_drop_io_head(tx_head); ++ ++ if (ret) ++ return ret; ++ return -E_REPEAT; ++} ++ ++/* The reiser4 journal current implementation was optimized to not to capture ++ super block if certain super blocks fields are modified. Currently, the set ++ is (, ). These fields are logged by ++ special way which includes storing them in each transaction head block at ++ atom commit time and writing that information to journal footer block at ++ atom flush time. For getting info from journal footer block to the ++ in-memory super block there is a special function ++ reiser4_journal_recover_sb_data() which should be called after disk format ++ plugin re-reads super block after journal replaying. ++*/ ++ ++/* get the information from journal footer in-memory super block */ ++int reiser4_journal_recover_sb_data(struct super_block *s) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(s); ++ struct journal_footer *jf; ++ int ret; ++ ++ assert("zam-673", sbinfo->journal_footer != NULL); ++ ++ ret = jload(sbinfo->journal_footer); ++ if (ret != 0) ++ return ret; ++ ++ ret = check_journal_footer(sbinfo->journal_footer); ++ if (ret != 0) ++ goto out; ++ ++ jf = (struct journal_footer *)jdata(sbinfo->journal_footer); ++ ++ /* was there at least one flushed transaction? */ ++ if (jf->last_flushed_tx) { ++ ++ /* restore free block counter logged in this transaction */ ++ reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&jf->free_blocks))); ++ ++ /* restore oid allocator state */ ++ oid_init_allocator(s, ++ le64_to_cpu(get_unaligned(&jf->nr_files)), ++ le64_to_cpu(get_unaligned(&jf->next_oid))); ++ } ++ out: ++ jrelse(sbinfo->journal_footer); ++ return ret; ++} ++ ++/* reiser4 replay journal procedure */ ++int reiser4_journal_replay(struct super_block *s) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(s); ++ jnode *jh, *jf; ++ struct journal_header *header; ++ int nr_tx_replayed = 0; ++ int ret; ++ ++ assert("zam-582", sbinfo != NULL); ++ ++ jh = sbinfo->journal_header; ++ jf = sbinfo->journal_footer; ++ ++ if (!jh || !jf) { ++ /* it is possible that disk layout does not support journal ++ structures, we just warn about this */ ++ warning("zam-583", ++ "journal control blocks were not loaded by disk layout plugin. " ++ "journal replaying is not possible.\n"); ++ return 0; ++ } ++ ++ /* Take free block count from journal footer block. The free block ++ counter value corresponds the last flushed transaction state */ ++ ret = jload(jf); ++ if (ret < 0) ++ return ret; ++ ++ ret = check_journal_footer(jf); ++ if (ret) { ++ jrelse(jf); ++ return ret; ++ } ++ ++ jrelse(jf); ++ ++ /* store last committed transaction info in reiser4 in-memory super ++ block */ ++ ret = jload(jh); ++ if (ret < 0) ++ return ret; ++ ++ ret = check_journal_header(jh); ++ if (ret) { ++ jrelse(jh); ++ return ret; ++ } ++ ++ header = (struct journal_header *)jdata(jh); ++ sbinfo->last_committed_tx = le64_to_cpu(get_unaligned(&header->last_committed_tx)); ++ ++ jrelse(jh); ++ ++ /* replay committed transactions */ ++ while ((ret = replay_oldest_transaction(s)) == -E_REPEAT) ++ nr_tx_replayed++; ++ ++ return ret; ++} ++ ++/* load journal control block (either journal header or journal footer block) */ ++static int ++load_journal_control_block(jnode ** node, const reiser4_block_nr * block) ++{ ++ int ret; ++ ++ *node = reiser4_alloc_io_head(block); ++ if (!(*node)) ++ return RETERR(-ENOMEM); ++ ++ ret = jload(*node); ++ ++ if (ret) { ++ reiser4_drop_io_head(*node); ++ *node = NULL; ++ return ret; ++ } ++ ++ pin_jnode_data(*node); ++ jrelse(*node); ++ ++ return 0; ++} ++ ++/* unload journal header or footer and free jnode */ ++static void unload_journal_control_block(jnode ** node) ++{ ++ if (*node) { ++ unpin_jnode_data(*node); ++ reiser4_drop_io_head(*node); ++ *node = NULL; ++ } ++} ++ ++/* release journal control blocks */ ++void reiser4_done_journal_info(struct super_block *s) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(s); ++ ++ assert("zam-476", sbinfo != NULL); ++ ++ unload_journal_control_block(&sbinfo->journal_header); ++ unload_journal_control_block(&sbinfo->journal_footer); ++ rcu_barrier(); ++} ++ ++/* load journal control blocks */ ++int reiser4_init_journal_info(struct super_block *s) ++{ ++ reiser4_super_info_data *sbinfo = get_super_private(s); ++ journal_location *loc; ++ int ret; ++ ++ loc = &sbinfo->jloc; ++ ++ assert("zam-651", loc != NULL); ++ assert("zam-652", loc->header != 0); ++ assert("zam-653", loc->footer != 0); ++ ++ ret = load_journal_control_block(&sbinfo->journal_header, &loc->header); ++ ++ if (ret) ++ return ret; ++ ++ ret = load_journal_control_block(&sbinfo->journal_footer, &loc->footer); ++ ++ if (ret) { ++ unload_journal_control_block(&sbinfo->journal_header); ++ } ++ ++ return ret; ++} ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ End: ++*/ +diff --git a/fs/reiser4/wander.h b/fs/reiser4/wander.h +new file mode 100644 +index 0000000..8746710 +--- /dev/null ++++ b/fs/reiser4/wander.h +@@ -0,0 +1,135 @@ ++/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#if !defined (__FS_REISER4_WANDER_H__) ++#define __FS_REISER4_WANDER_H__ ++ ++#include "dformat.h" ++ ++#include /* for struct super_block */ ++ ++/* REISER4 JOURNAL ON-DISK DATA STRUCTURES */ ++ ++#define TX_HEADER_MAGIC "TxMagic4" ++#define WANDER_RECORD_MAGIC "LogMagc4" ++ ++#define TX_HEADER_MAGIC_SIZE (8) ++#define WANDER_RECORD_MAGIC_SIZE (8) ++ ++/* journal header block format */ ++struct journal_header { ++ /* last written transaction head location */ ++ d64 last_committed_tx; ++}; ++ ++typedef struct journal_location { ++ reiser4_block_nr footer; ++ reiser4_block_nr header; ++} journal_location; ++ ++/* The wander.c head comment describes usage and semantic of all these structures */ ++/* journal footer block format */ ++struct journal_footer { ++ /* last flushed transaction location. */ ++ /* This block number is no more valid after the transaction it points ++ to gets flushed, this number is used only at journal replaying time ++ for detection of the end of on-disk list of committed transactions ++ which were not flushed completely */ ++ d64 last_flushed_tx; ++ ++ /* free block counter is written in journal footer at transaction ++ flushing , not in super block because free blocks counter is logged ++ by another way than super block fields (root pointer, for ++ example). */ ++ d64 free_blocks; ++ ++ /* number of used OIDs and maximal used OID are logged separately from ++ super block */ ++ d64 nr_files; ++ d64 next_oid; ++}; ++ ++/* Each wander record (except the first one) has unified format with wander ++ record header followed by an array of log entries */ ++struct wander_record_header { ++ /* when there is no predefined location for wander records, this magic ++ string should help reiser4fsck. */ ++ char magic[WANDER_RECORD_MAGIC_SIZE]; ++ ++ /* transaction id */ ++ d64 id; ++ ++ /* total number of wander records in current transaction */ ++ d32 total; ++ ++ /* this block number in transaction */ ++ d32 serial; ++ ++ /* number of previous block in commit */ ++ d64 next_block; ++}; ++ ++/* The first wander record (transaction head) of written transaction has the ++ special format */ ++struct tx_header { ++ /* magic string makes first block in transaction different from other ++ logged blocks, it should help fsck. */ ++ char magic[TX_HEADER_MAGIC_SIZE]; ++ ++ /* transaction id */ ++ d64 id; ++ ++ /* total number of records (including this first tx head) in the ++ transaction */ ++ d32 total; ++ ++ /* align next field to 8-byte boundary; this field always is zero */ ++ d32 padding; ++ ++ /* block number of previous transaction head */ ++ d64 prev_tx; ++ ++ /* next wander record location */ ++ d64 next_block; ++ ++ /* committed versions of free blocks counter */ ++ d64 free_blocks; ++ ++ /* number of used OIDs (nr_files) and maximal used OID are logged ++ separately from super block */ ++ d64 nr_files; ++ d64 next_oid; ++}; ++ ++/* A transaction gets written to disk as a set of wander records (each wander ++ record size is fs block) */ ++ ++/* As it was told above a wander The rest of wander record is filled by these log entries, unused space filled ++ by zeroes */ ++struct wander_entry { ++ d64 original; /* block original location */ ++ d64 wandered; /* block wandered location */ ++}; ++ ++/* REISER4 JOURNAL WRITER FUNCTIONS */ ++ ++extern int reiser4_write_logs(long *); ++extern int reiser4_journal_replay(struct super_block *); ++extern int reiser4_journal_recover_sb_data(struct super_block *); ++ ++extern int reiser4_init_journal_info(struct super_block *); ++extern void reiser4_done_journal_info(struct super_block *); ++ ++extern int write_jnode_list(struct list_head *, flush_queue_t *, long *, int); ++ ++#endif /* __FS_REISER4_WANDER_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ scroll-step: 1 ++ End: ++*/ +diff --git a/fs/reiser4/writeout.h b/fs/reiser4/writeout.h +new file mode 100644 +index 0000000..446b63b +--- /dev/null ++++ b/fs/reiser4/writeout.h +@@ -0,0 +1,21 @@ ++/* Copyright 2002, 2003, 2004 by Hans Reiser, licensing governed by reiser4/README */ ++ ++#if !defined (__FS_REISER4_WRITEOUT_H__) ++ ++#define WRITEOUT_SINGLE_STREAM (0x1) ++#define WRITEOUT_FOR_PAGE_RECLAIM (0x2) ++#define WRITEOUT_BARRIER (0x4) ++ ++extern int reiser4_get_writeout_flags(void); ++ ++#endif /* __FS_REISER4_WRITEOUT_H__ */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 80 ++ End: ++*/ +diff --git a/fs/reiser4/znode.c b/fs/reiser4/znode.c +new file mode 100644 +index 0000000..b695111 +--- /dev/null ++++ b/fs/reiser4/znode.c +@@ -0,0 +1,1029 @@ ++/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++/* Znode manipulation functions. */ ++/* Znode is the in-memory header for a tree node. It is stored ++ separately from the node itself so that it does not get written to ++ disk. In this respect znode is like buffer head or page head. We ++ also use znodes for additional reiser4 specific purposes: ++ ++ . they are organized into tree structure which is a part of whole ++ reiser4 tree. ++ . they are used to implement node grained locking ++ . they are used to keep additional state associated with a ++ node ++ . they contain links to lists used by the transaction manager ++ ++ Znode is attached to some variable "block number" which is instance of ++ fs/reiser4/tree.h:reiser4_block_nr type. Znode can exist without ++ appropriate node being actually loaded in memory. Existence of znode itself ++ is regulated by reference count (->x_count) in it. Each time thread ++ acquires reference to znode through call to zget(), ->x_count is ++ incremented and decremented on call to zput(). Data (content of node) are ++ brought in memory through call to zload(), which also increments ->d_count ++ reference counter. zload can block waiting on IO. Call to zrelse() ++ decreases this counter. Also, ->c_count keeps track of number of child ++ znodes and prevents parent znode from being recycled until all of its ++ children are. ->c_count is decremented whenever child goes out of existence ++ (being actually recycled in zdestroy()) which can be some time after last ++ reference to this child dies if we support some form of LRU cache for ++ znodes. ++ ++*/ ++/* EVERY ZNODE'S STORY ++ ++ 1. His infancy. ++ ++ Once upon a time, the znode was born deep inside of zget() by call to ++ zalloc(). At the return from zget() znode had: ++ ++ . reference counter (x_count) of 1 ++ . assigned block number, marked as used in bitmap ++ . pointer to parent znode. Root znode parent pointer points ++ to its father: "fake" znode. This, in turn, has NULL parent pointer. ++ . hash table linkage ++ . no data loaded from disk ++ . no node plugin ++ . no sibling linkage ++ ++ 2. His childhood ++ ++ Each node is either brought into memory as a result of tree traversal, or ++ created afresh, creation of the root being a special case of the latter. In ++ either case it's inserted into sibling list. This will typically require ++ some ancillary tree traversing, but ultimately both sibling pointers will ++ exist and JNODE_LEFT_CONNECTED and JNODE_RIGHT_CONNECTED will be true in ++ zjnode.state. ++ ++ 3. His youth. ++ ++ If znode is bound to already existing node in a tree, its content is read ++ from the disk by call to zload(). At that moment, JNODE_LOADED bit is set ++ in zjnode.state and zdata() function starts to return non null for this ++ znode. zload() further calls zparse() that determines which node layout ++ this node is rendered in, and sets ->nplug on success. ++ ++ If znode is for new node just created, memory for it is allocated and ++ zinit_new() function is called to initialise data, according to selected ++ node layout. ++ ++ 4. His maturity. ++ ++ After this point, znode lingers in memory for some time. Threads can ++ acquire references to znode either by blocknr through call to zget(), or by ++ following a pointer to unallocated znode from internal item. Each time ++ reference to znode is obtained, x_count is increased. Thread can read/write ++ lock znode. Znode data can be loaded through calls to zload(), d_count will ++ be increased appropriately. If all references to znode are released ++ (x_count drops to 0), znode is not recycled immediately. Rather, it is ++ still cached in the hash table in the hope that it will be accessed ++ shortly. ++ ++ There are two ways in which znode existence can be terminated: ++ ++ . sudden death: node bound to this znode is removed from the tree ++ . overpopulation: znode is purged out of memory due to memory pressure ++ ++ 5. His death. ++ ++ Death is complex process. ++ ++ When we irrevocably commit ourselves to decision to remove node from the ++ tree, JNODE_HEARD_BANSHEE bit is set in zjnode.state of corresponding ++ znode. This is done either in ->kill_hook() of internal item or in ++ reiser4_kill_root() function when tree root is removed. ++ ++ At this moment znode still has: ++ ++ . locks held on it, necessary write ones ++ . references to it ++ . disk block assigned to it ++ . data loaded from the disk ++ . pending requests for lock ++ ++ But once JNODE_HEARD_BANSHEE bit set, last call to unlock_znode() does node ++ deletion. Node deletion includes two phases. First all ways to get ++ references to that znode (sibling and parent links and hash lookup using ++ block number stored in parent node) should be deleted -- it is done through ++ sibling_list_remove(), also we assume that nobody uses down link from ++ parent node due to its nonexistence or proper parent node locking and ++ nobody uses parent pointers from children due to absence of them. Second we ++ invalidate all pending lock requests which still are on znode's lock ++ request queue, this is done by reiser4_invalidate_lock(). Another ++ JNODE_IS_DYING znode status bit is used to invalidate pending lock requests. ++ Once it set all requesters are forced to return -EINVAL from ++ longterm_lock_znode(). Future locking attempts are not possible because all ++ ways to get references to that znode are removed already. Last, node is ++ uncaptured from transaction. ++ ++ When last reference to the dying znode is just about to be released, ++ block number for this lock is released and znode is removed from the ++ hash table. ++ ++ Now znode can be recycled. ++ ++ [it's possible to free bitmap block and remove znode from the hash ++ table when last lock is released. This will result in having ++ referenced but completely orphaned znode] ++ ++ 6. Limbo ++ ++ As have been mentioned above znodes with reference counter 0 are ++ still cached in a hash table. Once memory pressure increases they are ++ purged out of there [this requires something like LRU list for ++ efficient implementation. LRU list would also greatly simplify ++ implementation of coord cache that would in this case morph to just ++ scanning some initial segment of LRU list]. Data loaded into ++ unreferenced znode are flushed back to the durable storage if ++ necessary and memory is freed. Znodes themselves can be recycled at ++ this point too. ++ ++*/ ++ ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/plugin_header.h" ++#include "plugin/node/node.h" ++#include "plugin/plugin.h" ++#include "txnmgr.h" ++#include "jnode.h" ++#include "znode.h" ++#include "block_alloc.h" ++#include "tree.h" ++#include "tree_walk.h" ++#include "super.h" ++#include "reiser4.h" ++ ++#include ++#include ++#include ++#include ++ ++static z_hash_table *get_htable(reiser4_tree *, ++ const reiser4_block_nr * const blocknr); ++static z_hash_table *znode_get_htable(const znode *); ++static void zdrop(znode *); ++ ++/* hash table support */ ++ ++/* compare two block numbers for equality. Used by hash-table macros */ ++static inline int ++blknreq(const reiser4_block_nr * b1, const reiser4_block_nr * b2) ++{ ++ assert("nikita-534", b1 != NULL); ++ assert("nikita-535", b2 != NULL); ++ ++ return *b1 == *b2; ++} ++ ++/* Hash znode by block number. Used by hash-table macros */ ++/* Audited by: umka (2002.06.11) */ ++static inline __u32 ++blknrhashfn(z_hash_table * table, const reiser4_block_nr * b) ++{ ++ assert("nikita-536", b != NULL); ++ ++ return *b & (REISER4_ZNODE_HASH_TABLE_SIZE - 1); ++} ++ ++/* The hash table definition */ ++#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get()) ++#define KFREE(ptr, size) kfree(ptr) ++TYPE_SAFE_HASH_DEFINE(z, znode, reiser4_block_nr, zjnode.key.z, zjnode.link.z, ++ blknrhashfn, blknreq); ++#undef KFREE ++#undef KMALLOC ++ ++/* slab for znodes */ ++static struct kmem_cache *znode_cache; ++ ++int znode_shift_order; ++ ++/** ++ * init_znodes - create znode cache ++ * ++ * Initializes slab cache of znodes. It is part of reiser4 module initialization. ++ */ ++int init_znodes(void) ++{ ++ znode_cache = kmem_cache_create("znode", sizeof(znode), 0, ++ SLAB_HWCACHE_ALIGN | ++ SLAB_RECLAIM_ACCOUNT, NULL, NULL); ++ if (znode_cache == NULL) ++ return RETERR(-ENOMEM); ++ ++ for (znode_shift_order = 0; (1 << znode_shift_order) < sizeof(znode); ++ ++znode_shift_order); ++ --znode_shift_order; ++ return 0; ++} ++ ++/** ++ * done_znodes - delete znode cache ++ * ++ * This is called on reiser4 module unloading or system shutdown. ++ */ ++void done_znodes(void) ++{ ++ destroy_reiser4_cache(&znode_cache); ++} ++ ++/* call this to initialise tree of znodes */ ++int znodes_tree_init(reiser4_tree * tree /* tree to initialise znodes for */ ) ++{ ++ int result; ++ assert("umka-050", tree != NULL); ++ ++ rwlock_init(&tree->dk_lock); ++ ++ result = z_hash_init(&tree->zhash_table, REISER4_ZNODE_HASH_TABLE_SIZE); ++ if (result != 0) ++ return result; ++ result = z_hash_init(&tree->zfake_table, REISER4_ZNODE_HASH_TABLE_SIZE); ++ return result; ++} ++ ++/* free this znode */ ++void zfree(znode * node /* znode to free */ ) ++{ ++ assert("nikita-465", node != NULL); ++ assert("nikita-2120", znode_page(node) == NULL); ++ assert("nikita-2301", list_empty_careful(&node->lock.owners)); ++ assert("nikita-2302", list_empty_careful(&node->lock.requestors)); ++ assert("nikita-2663", (list_empty_careful(&ZJNODE(node)->capture_link) && ++ NODE_LIST(ZJNODE(node)) == NOT_CAPTURED)); ++ assert("nikita-3220", list_empty(&ZJNODE(node)->jnodes)); ++ assert("nikita-3293", !znode_is_right_connected(node)); ++ assert("nikita-3294", !znode_is_left_connected(node)); ++ assert("nikita-3295", node->left == NULL); ++ assert("nikita-3296", node->right == NULL); ++ ++ /* not yet phash_jnode_destroy(ZJNODE(node)); */ ++ ++ kmem_cache_free(znode_cache, node); ++} ++ ++/* call this to free tree of znodes */ ++void znodes_tree_done(reiser4_tree * tree /* tree to finish with znodes of */ ) ++{ ++ znode *node; ++ znode *next; ++ z_hash_table *ztable; ++ ++ /* scan znode hash-tables and kill all znodes, then free hash tables ++ * themselves. */ ++ ++ assert("nikita-795", tree != NULL); ++ ++ ztable = &tree->zhash_table; ++ ++ if (ztable->_table != NULL) { ++ for_all_in_htable(ztable, z, node, next) { ++ node->c_count = 0; ++ node->in_parent.node = NULL; ++ assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0); ++ zdrop(node); ++ } ++ ++ z_hash_done(&tree->zhash_table); ++ } ++ ++ ztable = &tree->zfake_table; ++ ++ if (ztable->_table != NULL) { ++ for_all_in_htable(ztable, z, node, next) { ++ node->c_count = 0; ++ node->in_parent.node = NULL; ++ assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0); ++ zdrop(node); ++ } ++ ++ z_hash_done(&tree->zfake_table); ++ } ++} ++ ++/* ZNODE STRUCTURES */ ++ ++/* allocate fresh znode */ ++znode *zalloc(gfp_t gfp_flag /* allocation flag */ ) ++{ ++ znode *node; ++ ++ node = kmem_cache_alloc(znode_cache, gfp_flag); ++ return node; ++} ++ ++/* Initialize fields of znode ++ @node: znode to initialize; ++ @parent: parent znode; ++ @tree: tree we are in. */ ++void zinit(znode * node, const znode * parent, reiser4_tree * tree) ++{ ++ assert("nikita-466", node != NULL); ++ assert("umka-268", current_tree != NULL); ++ ++ memset(node, 0, sizeof *node); ++ ++ assert("umka-051", tree != NULL); ++ ++ jnode_init(&node->zjnode, tree, JNODE_FORMATTED_BLOCK); ++ reiser4_init_lock(&node->lock); ++ init_parent_coord(&node->in_parent, parent); ++} ++ ++/* ++ * remove znode from indices. This is called jput() when last reference on ++ * znode is released. ++ */ ++void znode_remove(znode * node /* znode to remove */ , reiser4_tree * tree) ++{ ++ assert("nikita-2108", node != NULL); ++ assert("nikita-470", node->c_count == 0); ++ assert_rw_write_locked(&(tree->tree_lock)); ++ ++ /* remove reference to this znode from cbk cache */ ++ cbk_cache_invalidate(node, tree); ++ ++ /* update c_count of parent */ ++ if (znode_parent(node) != NULL) { ++ assert("nikita-472", znode_parent(node)->c_count > 0); ++ /* father, onto your hands I forward my spirit... */ ++ znode_parent(node)->c_count--; ++ node->in_parent.node = NULL; ++ } else { ++ /* orphaned znode?! Root? */ ++ } ++ ++ /* remove znode from hash-table */ ++ z_hash_remove_rcu(znode_get_htable(node), node); ++} ++ ++/* zdrop() -- Remove znode from the tree. ++ ++ This is called when znode is removed from the memory. */ ++static void zdrop(znode * node /* znode to finish with */ ) ++{ ++ jdrop(ZJNODE(node)); ++} ++ ++/* ++ * put znode into right place in the hash table. This is called by relocate ++ * code. ++ */ ++int znode_rehash(znode * node /* node to rehash */ , ++ const reiser4_block_nr * new_block_nr /* new block number */ ) ++{ ++ z_hash_table *oldtable; ++ z_hash_table *newtable; ++ reiser4_tree *tree; ++ ++ assert("nikita-2018", node != NULL); ++ ++ tree = znode_get_tree(node); ++ oldtable = znode_get_htable(node); ++ newtable = get_htable(tree, new_block_nr); ++ ++ write_lock_tree(tree); ++ /* remove znode from hash-table */ ++ z_hash_remove_rcu(oldtable, node); ++ ++ /* assertion no longer valid due to RCU */ ++ /* assert("nikita-2019", z_hash_find(newtable, new_block_nr) == NULL); */ ++ ++ /* update blocknr */ ++ znode_set_block(node, new_block_nr); ++ node->zjnode.key.z = *new_block_nr; ++ ++ /* insert it into hash */ ++ z_hash_insert_rcu(newtable, node); ++ write_unlock_tree(tree); ++ return 0; ++} ++ ++/* ZNODE LOOKUP, GET, PUT */ ++ ++/* zlook() - get znode with given block_nr in a hash table or return NULL ++ ++ If result is non-NULL then the znode's x_count is incremented. Internal version ++ accepts pre-computed hash index. The hash table is accessed under caller's ++ tree->hash_lock. ++*/ ++znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const blocknr) ++{ ++ znode *result; ++ __u32 hash; ++ z_hash_table *htable; ++ ++ assert("jmacd-506", tree != NULL); ++ assert("jmacd-507", blocknr != NULL); ++ ++ htable = get_htable(tree, blocknr); ++ hash = blknrhashfn(htable, blocknr); ++ ++ rcu_read_lock(); ++ result = z_hash_find_index(htable, hash, blocknr); ++ ++ if (result != NULL) { ++ add_x_ref(ZJNODE(result)); ++ result = znode_rip_check(tree, result); ++ } ++ rcu_read_unlock(); ++ ++ return result; ++} ++ ++/* return hash table where znode with block @blocknr is (or should be) ++ * stored */ ++static z_hash_table *get_htable(reiser4_tree * tree, ++ const reiser4_block_nr * const blocknr) ++{ ++ z_hash_table *table; ++ if (is_disk_addr_unallocated(blocknr)) ++ table = &tree->zfake_table; ++ else ++ table = &tree->zhash_table; ++ return table; ++} ++ ++/* return hash table where znode @node is (or should be) stored */ ++static z_hash_table *znode_get_htable(const znode * node) ++{ ++ return get_htable(znode_get_tree(node), znode_get_block(node)); ++} ++ ++/* zget() - get znode from hash table, allocating it if necessary. ++ ++ First a call to zlook, locating a x-referenced znode if one ++ exists. If znode is not found, allocate new one and return. Result ++ is returned with x_count reference increased. ++ ++ LOCKS TAKEN: TREE_LOCK, ZNODE_LOCK ++ LOCK ORDERING: NONE ++*/ ++znode *zget(reiser4_tree * tree, ++ const reiser4_block_nr * const blocknr, ++ znode * parent, tree_level level, gfp_t gfp_flag) ++{ ++ znode *result; ++ __u32 hashi; ++ ++ z_hash_table *zth; ++ ++ assert("jmacd-512", tree != NULL); ++ assert("jmacd-513", blocknr != NULL); ++ assert("jmacd-514", level < REISER4_MAX_ZTREE_HEIGHT); ++ ++ zth = get_htable(tree, blocknr); ++ hashi = blknrhashfn(zth, blocknr); ++ ++ /* NOTE-NIKITA address-as-unallocated-blocknr still is not ++ implemented. */ ++ ++ z_hash_prefetch_bucket(zth, hashi); ++ ++ rcu_read_lock(); ++ /* Find a matching BLOCKNR in the hash table. If the znode is found, ++ we obtain an reference (x_count) but the znode remains unlocked. ++ Have to worry about race conditions later. */ ++ result = z_hash_find_index(zth, hashi, blocknr); ++ /* According to the current design, the hash table lock protects new ++ znode references. */ ++ if (result != NULL) { ++ add_x_ref(ZJNODE(result)); ++ /* NOTE-NIKITA it should be so, but special case during ++ creation of new root makes such assertion highly ++ complicated. */ ++ assert("nikita-2131", 1 || znode_parent(result) == parent || ++ (ZF_ISSET(result, JNODE_ORPHAN) ++ && (znode_parent(result) == NULL))); ++ result = znode_rip_check(tree, result); ++ } ++ ++ rcu_read_unlock(); ++ ++ if (!result) { ++ znode *shadow; ++ ++ result = zalloc(gfp_flag); ++ if (!result) { ++ return ERR_PTR(RETERR(-ENOMEM)); ++ } ++ ++ zinit(result, parent, tree); ++ ZJNODE(result)->blocknr = *blocknr; ++ ZJNODE(result)->key.z = *blocknr; ++ result->level = level; ++ ++ write_lock_tree(tree); ++ ++ shadow = z_hash_find_index(zth, hashi, blocknr); ++ if (unlikely(shadow != NULL && !ZF_ISSET(shadow, JNODE_RIP))) { ++ jnode_list_remove(ZJNODE(result)); ++ zfree(result); ++ result = shadow; ++ } else { ++ result->version = znode_build_version(tree); ++ z_hash_insert_index_rcu(zth, hashi, result); ++ ++ if (parent != NULL) ++ ++parent->c_count; ++ } ++ ++ add_x_ref(ZJNODE(result)); ++ ++ write_unlock_tree(tree); ++ } ++#if REISER4_DEBUG ++ if (!reiser4_blocknr_is_fake(blocknr) && *blocknr != 0) ++ reiser4_check_block(blocknr, 1); ++#endif ++ /* Check for invalid tree level, return -EIO */ ++ if (unlikely(znode_get_level(result) != level)) { ++ warning("jmacd-504", ++ "Wrong level for cached block %llu: %i expecting %i", ++ (unsigned long long)(*blocknr), znode_get_level(result), ++ level); ++ zput(result); ++ return ERR_PTR(RETERR(-EIO)); ++ } ++ ++ assert("nikita-1227", znode_invariant(result)); ++ ++ return result; ++} ++ ++/* ZNODE PLUGINS/DATA */ ++ ++/* "guess" plugin for node loaded from the disk. Plugin id of node plugin is ++ stored at the fixed offset from the beginning of the node. */ ++static node_plugin *znode_guess_plugin(const znode * node /* znode to guess ++ * plugin of */ ) ++{ ++ reiser4_tree *tree; ++ ++ assert("nikita-1053", node != NULL); ++ assert("nikita-1055", zdata(node) != NULL); ++ ++ tree = znode_get_tree(node); ++ assert("umka-053", tree != NULL); ++ ++ if (reiser4_is_set(tree->super, REISER4_ONE_NODE_PLUGIN)) { ++ return tree->nplug; ++ } else { ++ return node_plugin_by_disk_id ++ (tree, &((common_node_header *) zdata(node))->plugin_id); ++#ifdef GUESS_EXISTS ++ reiser4_plugin *plugin; ++ ++ /* NOTE-NIKITA add locking here when dynamic plugins will be ++ * implemented */ ++ for_all_plugins(REISER4_NODE_PLUGIN_TYPE, plugin) { ++ if ((plugin->u.node.guess != NULL) ++ && plugin->u.node.guess(node)) ++ return plugin; ++ } ++ warning("nikita-1057", "Cannot guess node plugin"); ++ print_znode("node", node); ++ return NULL; ++#endif ++ } ++} ++ ++/* parse node header and install ->node_plugin */ ++int zparse(znode * node /* znode to parse */ ) ++{ ++ int result; ++ ++ assert("nikita-1233", node != NULL); ++ assert("nikita-2370", zdata(node) != NULL); ++ ++ if (node->nplug == NULL) { ++ node_plugin *nplug; ++ ++ nplug = znode_guess_plugin(node); ++ if (likely(nplug != NULL)) { ++ result = nplug->parse(node); ++ if (likely(result == 0)) ++ node->nplug = nplug; ++ } else { ++ result = RETERR(-EIO); ++ } ++ } else ++ result = 0; ++ return result; ++} ++ ++/* zload with readahead */ ++int zload_ra(znode * node /* znode to load */ , ra_info_t * info) ++{ ++ int result; ++ ++ assert("nikita-484", node != NULL); ++ assert("nikita-1377", znode_invariant(node)); ++ assert("jmacd-7771", !znode_above_root(node)); ++ assert("nikita-2125", atomic_read(&ZJNODE(node)->x_count) > 0); ++ assert("nikita-3016", reiser4_schedulable()); ++ ++ if (info) ++ formatted_readahead(node, info); ++ ++ result = jload(ZJNODE(node)); ++ assert("nikita-1378", znode_invariant(node)); ++ return result; ++} ++ ++/* load content of node into memory */ ++int zload(znode * node) ++{ ++ return zload_ra(node, NULL); ++} ++ ++/* call node plugin to initialise newly allocated node. */ ++int zinit_new(znode * node /* znode to initialise */ , gfp_t gfp_flags) ++{ ++ return jinit_new(ZJNODE(node), gfp_flags); ++} ++ ++/* drop reference to node data. When last reference is dropped, data are ++ unloaded. */ ++void zrelse(znode * node /* znode to release references to */ ) ++{ ++ assert("nikita-1381", znode_invariant(node)); ++ ++ jrelse(ZJNODE(node)); ++} ++ ++/* returns free space in node */ ++unsigned znode_free_space(znode * node /* znode to query */ ) ++{ ++ assert("nikita-852", node != NULL); ++ return node_plugin_by_node(node)->free_space(node); ++} ++ ++/* left delimiting key of znode */ ++reiser4_key *znode_get_rd_key(znode * node /* znode to query */ ) ++{ ++ assert("nikita-958", node != NULL); ++ assert_rw_locked(&(znode_get_tree(node)->dk_lock)); ++ assert("nikita-3067", LOCK_CNT_GTZ(rw_locked_dk)); ++ assert("nikita-30671", node->rd_key_version != 0); ++ return &node->rd_key; ++} ++ ++/* right delimiting key of znode */ ++reiser4_key *znode_get_ld_key(znode * node /* znode to query */ ) ++{ ++ assert("nikita-974", node != NULL); ++ assert_rw_locked(&(znode_get_tree(node)->dk_lock)); ++ assert("nikita-3068", LOCK_CNT_GTZ(rw_locked_dk)); ++ assert("nikita-30681", node->ld_key_version != 0); ++ return &node->ld_key; ++} ++ ++ON_DEBUG(atomic_t delim_key_version = ATOMIC_INIT(0); ++ ) ++ ++/* update right-delimiting key of @node */ ++reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key) ++{ ++ assert("nikita-2937", node != NULL); ++ assert("nikita-2939", key != NULL); ++ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock)); ++ assert("nikita-3069", LOCK_CNT_GTZ(write_locked_dk)); ++ assert("nikita-2944", ++ znode_is_any_locked(node) || ++ znode_get_level(node) != LEAF_LEVEL || ++ keyge(key, &node->rd_key) || ++ keyeq(&node->rd_key, reiser4_min_key()) || ++ ZF_ISSET(node, JNODE_HEARD_BANSHEE)); ++ ++ node->rd_key = *key; ++ ON_DEBUG(node->rd_key_version = atomic_inc_return(&delim_key_version)); ++ return &node->rd_key; ++} ++ ++/* update left-delimiting key of @node */ ++reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key) ++{ ++ assert("nikita-2940", node != NULL); ++ assert("nikita-2941", key != NULL); ++ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock)); ++ assert("nikita-3070", LOCK_CNT_GTZ(write_locked_dk)); ++ assert("nikita-2943", ++ znode_is_any_locked(node) || keyeq(&node->ld_key, ++ reiser4_min_key())); ++ ++ node->ld_key = *key; ++ ON_DEBUG(node->ld_key_version = atomic_inc_return(&delim_key_version)); ++ return &node->ld_key; ++} ++ ++/* true if @key is inside key range for @node */ ++int znode_contains_key(znode * node /* znode to look in */ , ++ const reiser4_key * key /* key to look for */ ) ++{ ++ assert("nikita-1237", node != NULL); ++ assert("nikita-1238", key != NULL); ++ ++ /* left_delimiting_key <= key <= right_delimiting_key */ ++ return keyle(znode_get_ld_key(node), key) ++ && keyle(key, znode_get_rd_key(node)); ++} ++ ++/* same as znode_contains_key(), but lock dk lock */ ++int znode_contains_key_lock(znode * node /* znode to look in */ , ++ const reiser4_key * key /* key to look for */ ) ++{ ++ int result; ++ ++ assert("umka-056", node != NULL); ++ assert("umka-057", key != NULL); ++ ++ read_lock_dk(znode_get_tree(node)); ++ result = znode_contains_key(node, key); ++ read_unlock_dk(znode_get_tree(node)); ++ return result; ++} ++ ++/* get parent pointer, assuming tree is not locked */ ++znode *znode_parent_nolock(const znode * node /* child znode */ ) ++{ ++ assert("nikita-1444", node != NULL); ++ return node->in_parent.node; ++} ++ ++/* get parent pointer of znode */ ++znode *znode_parent(const znode * node /* child znode */ ) ++{ ++ assert("nikita-1226", node != NULL); ++ assert("nikita-1406", LOCK_CNT_GTZ(rw_locked_tree)); ++ return znode_parent_nolock(node); ++} ++ ++/* detect uber znode used to protect in-superblock tree root pointer */ ++int znode_above_root(const znode * node /* znode to query */ ) ++{ ++ assert("umka-059", node != NULL); ++ ++ return disk_addr_eq(&ZJNODE(node)->blocknr, &UBER_TREE_ADDR); ++} ++ ++/* check that @node is root---that its block number is recorder in the tree as ++ that of root node */ ++#if REISER4_DEBUG ++static int znode_is_true_root(const znode * node /* znode to query */ ) ++{ ++ assert("umka-060", node != NULL); ++ assert("umka-061", current_tree != NULL); ++ ++ return disk_addr_eq(znode_get_block(node), ++ &znode_get_tree(node)->root_block); ++} ++#endif ++ ++/* check that @node is root */ ++int znode_is_root(const znode * node /* znode to query */ ) ++{ ++ assert("nikita-1206", node != NULL); ++ ++ return znode_get_level(node) == znode_get_tree(node)->height; ++} ++ ++/* Returns true is @node was just created by zget() and wasn't ever loaded ++ into memory. */ ++/* NIKITA-HANS: yes */ ++int znode_just_created(const znode * node) ++{ ++ assert("nikita-2188", node != NULL); ++ return (znode_page(node) == NULL); ++} ++ ++/* obtain updated ->znode_epoch. See seal.c for description. */ ++__u64 znode_build_version(reiser4_tree * tree) ++{ ++ __u64 result; ++ ++ spin_lock(&tree->epoch_lock); ++ result = ++tree->znode_epoch; ++ spin_unlock(&tree->epoch_lock); ++ return result; ++} ++ ++void init_load_count(load_count * dh) ++{ ++ assert("nikita-2105", dh != NULL); ++ memset(dh, 0, sizeof *dh); ++} ++ ++void done_load_count(load_count * dh) ++{ ++ assert("nikita-2106", dh != NULL); ++ if (dh->node != NULL) { ++ for (; dh->d_ref > 0; --dh->d_ref) ++ zrelse(dh->node); ++ dh->node = NULL; ++ } ++} ++ ++static int incr_load_count(load_count * dh) ++{ ++ int result; ++ ++ assert("nikita-2110", dh != NULL); ++ assert("nikita-2111", dh->node != NULL); ++ ++ result = zload(dh->node); ++ if (result == 0) ++ ++dh->d_ref; ++ return result; ++} ++ ++int incr_load_count_znode(load_count * dh, znode * node) ++{ ++ assert("nikita-2107", dh != NULL); ++ assert("nikita-2158", node != NULL); ++ assert("nikita-2109", ++ ergo(dh->node != NULL, (dh->node == node) || (dh->d_ref == 0))); ++ ++ dh->node = node; ++ return incr_load_count(dh); ++} ++ ++int incr_load_count_jnode(load_count * dh, jnode * node) ++{ ++ if (jnode_is_znode(node)) { ++ return incr_load_count_znode(dh, JZNODE(node)); ++ } ++ return 0; ++} ++ ++void copy_load_count(load_count * new, load_count * old) ++{ ++ int ret = 0; ++ done_load_count(new); ++ new->node = old->node; ++ new->d_ref = 0; ++ ++ while ((new->d_ref < old->d_ref) && (ret = incr_load_count(new)) == 0) { ++ } ++ ++ assert("jmacd-87589", ret == 0); ++} ++ ++void move_load_count(load_count * new, load_count * old) ++{ ++ done_load_count(new); ++ new->node = old->node; ++ new->d_ref = old->d_ref; ++ old->node = NULL; ++ old->d_ref = 0; ++} ++ ++/* convert parent pointer into coord */ ++void parent_coord_to_coord(const parent_coord_t * pcoord, coord_t * coord) ++{ ++ assert("nikita-3204", pcoord != NULL); ++ assert("nikita-3205", coord != NULL); ++ ++ coord_init_first_unit_nocheck(coord, pcoord->node); ++ coord_set_item_pos(coord, pcoord->item_pos); ++ coord->between = AT_UNIT; ++} ++ ++/* pack coord into parent_coord_t */ ++void coord_to_parent_coord(const coord_t * coord, parent_coord_t * pcoord) ++{ ++ assert("nikita-3206", pcoord != NULL); ++ assert("nikita-3207", coord != NULL); ++ ++ pcoord->node = coord->node; ++ pcoord->item_pos = coord->item_pos; ++} ++ ++/* Initialize a parent hint pointer. (parent hint pointer is a field in znode, ++ look for comments there) */ ++void init_parent_coord(parent_coord_t * pcoord, const znode * node) ++{ ++ pcoord->node = (znode *) node; ++ pcoord->item_pos = (unsigned short)~0; ++} ++ ++#if REISER4_DEBUG ++ ++/* debugging aid: znode invariant */ ++static int znode_invariant_f(const znode * node /* znode to check */ , ++ char const **msg /* where to store error ++ * message, if any */ ) ++{ ++#define _ergo(ant, con) \ ++ ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con))) ++ ++#define _equi(e1, e2) \ ++ ((*msg) = "{" #e1 "} <=> {" #e2 "}", equi((e1), (e2))) ++ ++#define _check(exp) ((*msg) = #exp, (exp)) ++ ++ return jnode_invariant_f(ZJNODE(node), msg) && ++ /* [znode-fake] invariant */ ++ /* fake znode doesn't have a parent, and */ ++ _ergo(znode_get_level(node) == 0, znode_parent(node) == NULL) && ++ /* there is another way to express this very check, and */ ++ _ergo(znode_above_root(node), znode_parent(node) == NULL) && ++ /* it has special block number, and */ ++ _ergo(znode_get_level(node) == 0, ++ disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) && ++ /* it is the only znode with such block number, and */ ++ _ergo(!znode_above_root(node) && znode_is_loaded(node), ++ !disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) && ++ /* it is parent of the tree root node */ ++ _ergo(znode_is_true_root(node), ++ znode_above_root(znode_parent(node))) && ++ /* [znode-level] invariant */ ++ /* level of parent znode is one larger than that of child, ++ except for the fake znode, and */ ++ _ergo(znode_parent(node) && !znode_above_root(znode_parent(node)), ++ znode_get_level(znode_parent(node)) == ++ znode_get_level(node) + 1) && ++ /* left neighbor is at the same level, and */ ++ _ergo(znode_is_left_connected(node) && node->left != NULL, ++ znode_get_level(node) == znode_get_level(node->left)) && ++ /* right neighbor is at the same level */ ++ _ergo(znode_is_right_connected(node) && node->right != NULL, ++ znode_get_level(node) == znode_get_level(node->right)) && ++ /* [znode-connected] invariant */ ++ _ergo(node->left != NULL, znode_is_left_connected(node)) && ++ _ergo(node->right != NULL, znode_is_right_connected(node)) && ++ _ergo(!znode_is_root(node) && node->left != NULL, ++ znode_is_right_connected(node->left) && ++ node->left->right == node) && ++ _ergo(!znode_is_root(node) && node->right != NULL, ++ znode_is_left_connected(node->right) && ++ node->right->left == node) && ++ /* [znode-c_count] invariant */ ++ /* for any znode, c_count of its parent is greater than 0 */ ++ _ergo(znode_parent(node) != NULL && ++ !znode_above_root(znode_parent(node)), ++ znode_parent(node)->c_count > 0) && ++ /* leaves don't have children */ ++ _ergo(znode_get_level(node) == LEAF_LEVEL, ++ node->c_count == 0) && ++ _check(node->zjnode.jnodes.prev != NULL) && ++ _check(node->zjnode.jnodes.next != NULL) && ++ /* orphan doesn't have a parent */ ++ _ergo(ZF_ISSET(node, JNODE_ORPHAN), znode_parent(node) == 0) && ++ /* [znode-modify] invariant */ ++ /* if znode is not write-locked, its checksum remains ++ * invariant */ ++ /* unfortunately, zlock is unordered w.r.t. jnode_lock, so we ++ * cannot check this. */ ++ /* [znode-refs] invariant */ ++ /* only referenced znode can be long-term locked */ ++ _ergo(znode_is_locked(node), ++ atomic_read(&ZJNODE(node)->x_count) != 0); ++} ++ ++/* debugging aid: check znode invariant and panic if it doesn't hold */ ++int znode_invariant(znode * node /* znode to check */ ) ++{ ++ char const *failed_msg; ++ int result; ++ ++ assert("umka-063", node != NULL); ++ assert("umka-064", current_tree != NULL); ++ ++ spin_lock_znode(node); ++ read_lock_tree(znode_get_tree(node)); ++ result = znode_invariant_f(node, &failed_msg); ++ if (!result) { ++ /* print_znode("corrupted node", node); */ ++ warning("jmacd-555", "Condition %s failed", failed_msg); ++ } ++ read_unlock_tree(znode_get_tree(node)); ++ spin_unlock_znode(node); ++ return result; ++} ++ ++/* return non-0 iff data are loaded into znode */ ++int znode_is_loaded(const znode * node /* znode to query */ ) ++{ ++ assert("nikita-497", node != NULL); ++ return jnode_is_loaded(ZJNODE(node)); ++} ++ ++unsigned long znode_times_locked(const znode * z) ++{ ++ return z->times_locked; ++} ++ ++#endif /* REISER4_DEBUG */ ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/fs/reiser4/znode.h b/fs/reiser4/znode.h +new file mode 100644 +index 0000000..4699d0f +--- /dev/null ++++ b/fs/reiser4/znode.h +@@ -0,0 +1,434 @@ ++/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by ++ * reiser4/README */ ++ ++/* Declaration of znode (Zam's node). See znode.c for more details. */ ++ ++#ifndef __ZNODE_H__ ++#define __ZNODE_H__ ++ ++#include "forward.h" ++#include "debug.h" ++#include "dformat.h" ++#include "key.h" ++#include "coord.h" ++#include "plugin/node/node.h" ++#include "jnode.h" ++#include "lock.h" ++#include "readahead.h" ++ ++#include ++#include ++#include /* for PAGE_CACHE_SIZE */ ++#include ++#include ++ ++/* znode tracks its position within parent (internal item in a parent node, ++ * that contains znode's block number). */ ++typedef struct parent_coord { ++ znode *node; ++ pos_in_node_t item_pos; ++} parent_coord_t; ++ ++/* &znode - node in a reiser4 tree. ++ ++ NOTE-NIKITA fields in this struct have to be rearranged (later) to reduce ++ cacheline pressure. ++ ++ Locking: ++ ++ Long term: data in a disk node attached to this znode are protected ++ by long term, deadlock aware lock ->lock; ++ ++ Spin lock: the following fields are protected by the spin lock: ++ ++ ->lock ++ ++ Following fields are protected by the global tree lock: ++ ++ ->left ++ ->right ++ ->in_parent ++ ->c_count ++ ++ Following fields are protected by the global delimiting key lock (dk_lock): ++ ++ ->ld_key (to update ->ld_key long-term lock on the node is also required) ++ ->rd_key ++ ++ Following fields are protected by the long term lock: ++ ++ ->nr_items ++ ++ ->node_plugin is never changed once set. This means that after code made ++ itself sure that field is valid it can be accessed without any additional ++ locking. ++ ++ ->level is immutable. ++ ++ Invariants involving this data-type: ++ ++ [znode-fake] ++ [znode-level] ++ [znode-connected] ++ [znode-c_count] ++ [znode-refs] ++ [jnode-refs] ++ [jnode-queued] ++ [znode-modify] ++ ++ For this to be made into a clustering or NUMA filesystem, we would want to eliminate all of the global locks. ++ Suggestions for how to do that are desired.*/ ++struct znode { ++ /* Embedded jnode. */ ++ jnode zjnode; ++ ++ /* contains three subfields, node, pos_in_node, and pos_in_unit. ++ ++ pos_in_node and pos_in_unit are only hints that are cached to ++ speed up lookups during balancing. They are not required to be up to ++ date. Synched in find_child_ptr(). ++ ++ This value allows us to avoid expensive binary searches. ++ ++ in_parent->node points to the parent of this node, and is NOT a ++ hint. ++ */ ++ parent_coord_t in_parent; ++ ++ /* ++ * sibling list pointers ++ */ ++ ++ /* left-neighbor */ ++ znode *left; ++ /* right-neighbor */ ++ znode *right; ++ ++ /* long term lock on node content. This lock supports deadlock ++ detection. See lock.c ++ */ ++ zlock lock; ++ ++ /* You cannot remove from memory a node that has children in ++ memory. This is because we rely on the fact that parent of given ++ node can always be reached without blocking for io. When reading a ++ node into memory you must increase the c_count of its parent, when ++ removing it from memory you must decrease the c_count. This makes ++ the code simpler, and the cases where it is suboptimal are truly ++ obscure. ++ */ ++ int c_count; ++ ++ /* plugin of node attached to this znode. NULL if znode is not ++ loaded. */ ++ node_plugin *nplug; ++ ++ /* version of znode data. This is increased on each modification. This ++ * is necessary to implement seals (see seal.[ch]) efficiently. */ ++ __u64 version; ++ ++ /* left delimiting key. Necessary to efficiently perform ++ balancing with node-level locking. Kept in memory only. */ ++ reiser4_key ld_key; ++ /* right delimiting key. */ ++ reiser4_key rd_key; ++ ++ /* znode's tree level */ ++ __u16 level; ++ /* number of items in this node. This field is modified by node ++ * plugin. */ ++ __u16 nr_items; ++ ++#if REISER4_DEBUG ++ void *creator; ++ reiser4_key first_key; ++ unsigned long times_locked; ++ int left_version; /* when node->left was updated */ ++ int right_version; /* when node->right was updated */ ++ int ld_key_version; /* when node->ld_key was updated */ ++ int rd_key_version; /* when node->rd_key was updated */ ++#endif ++ ++} __attribute__ ((aligned(16))); ++ ++ON_DEBUG(extern atomic_t delim_key_version; ++ ) ++ ++/* In general I think these macros should not be exposed. */ ++#define znode_is_locked(node) (lock_is_locked(&node->lock)) ++#define znode_is_rlocked(node) (lock_is_rlocked(&node->lock)) ++#define znode_is_wlocked(node) (lock_is_wlocked(&node->lock)) ++#define znode_is_wlocked_once(node) (lock_is_wlocked_once(&node->lock)) ++#define znode_can_be_rlocked(node) (lock_can_be_rlocked(&node->lock)) ++#define is_lock_compatible(node, mode) (lock_mode_compatible(&node->lock, mode)) ++/* Macros for accessing the znode state. */ ++#define ZF_CLR(p,f) JF_CLR (ZJNODE(p), (f)) ++#define ZF_ISSET(p,f) JF_ISSET(ZJNODE(p), (f)) ++#define ZF_SET(p,f) JF_SET (ZJNODE(p), (f)) ++extern znode *zget(reiser4_tree * tree, const reiser4_block_nr * const block, ++ znode * parent, tree_level level, gfp_t gfp_flag); ++extern znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const block); ++extern int zload(znode * node); ++extern int zload_ra(znode * node, ra_info_t * info); ++extern int zinit_new(znode * node, gfp_t gfp_flags); ++extern void zrelse(znode * node); ++extern void znode_change_parent(znode * new_parent, reiser4_block_nr * block); ++ ++/* size of data in znode */ ++static inline unsigned ++znode_size(const znode * node UNUSED_ARG /* znode to query */ ) ++{ ++ assert("nikita-1416", node != NULL); ++ return PAGE_CACHE_SIZE; ++} ++ ++extern void parent_coord_to_coord(const parent_coord_t * pcoord, ++ coord_t * coord); ++extern void coord_to_parent_coord(const coord_t * coord, ++ parent_coord_t * pcoord); ++extern void init_parent_coord(parent_coord_t * pcoord, const znode * node); ++ ++extern unsigned znode_free_space(znode * node); ++ ++extern reiser4_key *znode_get_rd_key(znode * node); ++extern reiser4_key *znode_get_ld_key(znode * node); ++ ++extern reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key); ++extern reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key); ++ ++/* `connected' state checks */ ++static inline int znode_is_right_connected(const znode * node) ++{ ++ return ZF_ISSET(node, JNODE_RIGHT_CONNECTED); ++} ++ ++static inline int znode_is_left_connected(const znode * node) ++{ ++ return ZF_ISSET(node, JNODE_LEFT_CONNECTED); ++} ++ ++static inline int znode_is_connected(const znode * node) ++{ ++ return znode_is_right_connected(node) && znode_is_left_connected(node); ++} ++ ++extern int znode_shift_order; ++extern int znode_rehash(znode * node, const reiser4_block_nr * new_block_nr); ++extern void znode_remove(znode *, reiser4_tree *); ++extern znode *znode_parent(const znode * node); ++extern znode *znode_parent_nolock(const znode * node); ++extern int znode_above_root(const znode * node); ++extern int init_znodes(void); ++extern void done_znodes(void); ++extern int znodes_tree_init(reiser4_tree * ztree); ++extern void znodes_tree_done(reiser4_tree * ztree); ++extern int znode_contains_key(znode * node, const reiser4_key * key); ++extern int znode_contains_key_lock(znode * node, const reiser4_key * key); ++extern unsigned znode_save_free_space(znode * node); ++extern unsigned znode_recover_free_space(znode * node); ++extern znode *zalloc(gfp_t gfp_flag); ++extern void zinit(znode *, const znode * parent, reiser4_tree *); ++extern int zparse(znode * node); ++ ++extern int znode_just_created(const znode * node); ++ ++extern void zfree(znode * node); ++ ++#if REISER4_DEBUG ++extern void print_znode(const char *prefix, const znode * node); ++#else ++#define print_znode( p, n ) noop ++#endif ++ ++/* Make it look like various znode functions exist instead of treating znodes as ++ jnodes in znode-specific code. */ ++#define znode_page(x) jnode_page ( ZJNODE(x) ) ++#define zdata(x) jdata ( ZJNODE(x) ) ++#define znode_get_block(x) jnode_get_block ( ZJNODE(x) ) ++#define znode_created(x) jnode_created ( ZJNODE(x) ) ++#define znode_set_created(x) jnode_set_created ( ZJNODE(x) ) ++#define znode_convertible(x) jnode_convertible (ZJNODE(x)) ++#define znode_set_convertible(x) jnode_set_convertible (ZJNODE(x)) ++ ++#define znode_is_dirty(x) jnode_is_dirty ( ZJNODE(x) ) ++#define znode_check_dirty(x) jnode_check_dirty ( ZJNODE(x) ) ++#define znode_make_clean(x) jnode_make_clean ( ZJNODE(x) ) ++#define znode_set_block(x, b) jnode_set_block ( ZJNODE(x), (b) ) ++ ++#define spin_lock_znode(x) spin_lock_jnode ( ZJNODE(x) ) ++#define spin_unlock_znode(x) spin_unlock_jnode ( ZJNODE(x) ) ++#define spin_trylock_znode(x) spin_trylock_jnode ( ZJNODE(x) ) ++#define spin_znode_is_locked(x) spin_jnode_is_locked ( ZJNODE(x) ) ++#define spin_znode_is_not_locked(x) spin_jnode_is_not_locked ( ZJNODE(x) ) ++ ++#if REISER4_DEBUG ++extern int znode_x_count_is_protected(const znode * node); ++extern int znode_invariant(znode * node); ++#endif ++ ++/* acquire reference to @node */ ++static inline znode *zref(znode * node) ++{ ++ /* change of x_count from 0 to 1 is protected by tree spin-lock */ ++ return JZNODE(jref(ZJNODE(node))); ++} ++ ++/* release reference to @node */ ++static inline void zput(znode * node) ++{ ++ assert("nikita-3564", znode_invariant(node)); ++ jput(ZJNODE(node)); ++} ++ ++/* get the level field for a znode */ ++static inline tree_level znode_get_level(const znode * node) ++{ ++ return node->level; ++} ++ ++/* get the level field for a jnode */ ++static inline tree_level jnode_get_level(const jnode * node) ++{ ++ if (jnode_is_znode(node)) ++ return znode_get_level(JZNODE(node)); ++ else ++ /* unformatted nodes are all at the LEAF_LEVEL and for ++ "semi-formatted" nodes like bitmaps, level doesn't matter. */ ++ return LEAF_LEVEL; ++} ++ ++/* true if jnode is on leaf level */ ++static inline int jnode_is_leaf(const jnode * node) ++{ ++ if (jnode_is_znode(node)) ++ return (znode_get_level(JZNODE(node)) == LEAF_LEVEL); ++ if (jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK) ++ return 1; ++ return 0; ++} ++ ++/* return znode's tree */ ++static inline reiser4_tree *znode_get_tree(const znode * node) ++{ ++ assert("nikita-2692", node != NULL); ++ return jnode_get_tree(ZJNODE(node)); ++} ++ ++/* resolve race with zput */ ++static inline znode *znode_rip_check(reiser4_tree * tree, znode * node) ++{ ++ jnode *j; ++ ++ j = jnode_rip_sync(tree, ZJNODE(node)); ++ if (likely(j != NULL)) ++ node = JZNODE(j); ++ else ++ node = NULL; ++ return node; ++} ++ ++#if defined(REISER4_DEBUG) ++int znode_is_loaded(const znode * node /* znode to query */ ); ++#endif ++ ++extern __u64 znode_build_version(reiser4_tree * tree); ++ ++/* Data-handles. A data handle object manages pairing calls to zload() and zrelse(). We ++ must load the data for a node in many places. We could do this by simply calling ++ zload() everywhere, the difficulty arises when we must release the loaded data by ++ calling zrelse. In a function with many possible error/return paths, it requires extra ++ work to figure out which exit paths must call zrelse and those which do not. The data ++ handle automatically calls zrelse for every zload that it is responsible for. In that ++ sense, it acts much like a lock_handle. ++*/ ++typedef struct load_count { ++ znode *node; ++ int d_ref; ++} load_count; ++ ++extern void init_load_count(load_count * lc); /* Initialize a load_count set the current node to NULL. */ ++extern void done_load_count(load_count * dh); /* Finalize a load_count: call zrelse() if necessary */ ++extern int incr_load_count_znode(load_count * dh, znode * node); /* Set the argument znode to the current node, call zload(). */ ++extern int incr_load_count_jnode(load_count * dh, jnode * node); /* If the argument jnode is formatted, do the same as ++ * incr_load_count_znode, otherwise do nothing (unformatted nodes ++ * don't require zload/zrelse treatment). */ ++extern void move_load_count(load_count * new, load_count * old); /* Move the contents of a load_count. Old handle is released. */ ++extern void copy_load_count(load_count * new, load_count * old); /* Copy the contents of a load_count. Old handle remains held. */ ++ ++/* Variable initializers for load_count. */ ++#define INIT_LOAD_COUNT ( load_count * ){ .node = NULL, .d_ref = 0 } ++#define INIT_LOAD_COUNT_NODE( n ) ( load_count ){ .node = ( n ), .d_ref = 0 } ++/* A convenience macro for use in assertions or debug-only code, where loaded ++ data is only required to perform the debugging check. This macro ++ encapsulates an expression inside a pair of calls to zload()/zrelse(). */ ++#define WITH_DATA( node, exp ) \ ++({ \ ++ long __with_dh_result; \ ++ znode *__with_dh_node; \ ++ \ ++ __with_dh_node = ( node ); \ ++ __with_dh_result = zload( __with_dh_node ); \ ++ if( __with_dh_result == 0 ) { \ ++ __with_dh_result = ( long )( exp ); \ ++ zrelse( __with_dh_node ); \ ++ } \ ++ __with_dh_result; \ ++}) ++ ++/* Same as above, but accepts a return value in case zload fails. */ ++#define WITH_DATA_RET( node, ret, exp ) \ ++({ \ ++ int __with_dh_result; \ ++ znode *__with_dh_node; \ ++ \ ++ __with_dh_node = ( node ); \ ++ __with_dh_result = zload( __with_dh_node ); \ ++ if( __with_dh_result == 0 ) { \ ++ __with_dh_result = ( int )( exp ); \ ++ zrelse( __with_dh_node ); \ ++ } else \ ++ __with_dh_result = ( ret ); \ ++ __with_dh_result; \ ++}) ++ ++#define WITH_COORD(coord, exp) \ ++({ \ ++ coord_t *__coord; \ ++ \ ++ __coord = (coord); \ ++ coord_clear_iplug(__coord); \ ++ WITH_DATA(__coord->node, exp); \ ++}) ++ ++#if REISER4_DEBUG ++#define STORE_COUNTERS \ ++ reiser4_lock_counters_info __entry_counters = \ ++ *reiser4_lock_counters() ++#define CHECK_COUNTERS \ ++ON_DEBUG_CONTEXT( \ ++({ \ ++ __entry_counters.x_refs = reiser4_lock_counters() -> x_refs; \ ++ __entry_counters.t_refs = reiser4_lock_counters() -> t_refs; \ ++ __entry_counters.d_refs = reiser4_lock_counters() -> d_refs; \ ++ assert("nikita-2159", \ ++ !memcmp(&__entry_counters, reiser4_lock_counters(), \ ++ sizeof __entry_counters)); \ ++}) ) ++ ++#else ++#define STORE_COUNTERS ++#define CHECK_COUNTERS noop ++#endif ++ ++/* __ZNODE_H__ */ ++#endif ++ ++/* Make Linus happy. ++ Local variables: ++ c-indentation-style: "K&R" ++ mode-name: "LC" ++ c-basic-offset: 8 ++ tab-width: 8 ++ fill-column: 120 ++ End: ++*/ +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 1410e53..dd12411 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1165,6 +1165,8 @@ struct super_operations { + void (*clear_inode) (struct inode *); + void (*umount_begin) (struct vfsmount *, int); + ++ void (*sync_inodes) (struct super_block *sb, ++ struct writeback_control *wbc); + int (*show_options)(struct seq_file *, struct vfsmount *); + int (*show_stats)(struct seq_file *, struct vfsmount *); + #ifdef CONFIG_QUOTA +@@ -1583,6 +1585,7 @@ extern int invalidate_inode_pages2(struct address_space *mapping); + extern int invalidate_inode_pages2_range(struct address_space *mapping, + pgoff_t start, pgoff_t end); + extern int write_inode_now(struct inode *, int); ++extern void generic_sync_sb_inodes(struct super_block *, struct writeback_control *); + extern int filemap_fdatawrite(struct address_space *); + extern int filemap_flush(struct address_space *); + extern int filemap_fdatawait(struct address_space *); +diff --git a/lib/radix-tree.c b/lib/radix-tree.c +index d69ddbe..ed3e15f 100644 +--- a/lib/radix-tree.c ++++ b/lib/radix-tree.c +@@ -151,6 +151,7 @@ int radix_tree_preload(gfp_t gfp_mask) + out: + return ret; + } ++EXPORT_SYMBOL(radix_tree_preload); + + static inline void tag_set(struct radix_tree_node *node, unsigned int tag, + int offset) +diff --git a/mm/filemap.c b/mm/filemap.c +index 8332c77..b16d2cb 100644 +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -121,6 +121,7 @@ void __remove_from_page_cache(struct page *page) + mapping->nrpages--; + __dec_zone_page_state(page, NR_FILE_PAGES); + } ++EXPORT_SYMBOL(__remove_from_page_cache); + + void remove_from_page_cache(struct page *page) + { +@@ -132,6 +133,7 @@ void remove_from_page_cache(struct page *page) + __remove_from_page_cache(page); + write_unlock_irq(&mapping->tree_lock); + } ++EXPORT_SYMBOL(remove_from_page_cache); + + static int sync_page(void *word) + { +@@ -465,6 +467,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, + lru_cache_add(page); + return ret; + } ++EXPORT_SYMBOL(add_to_page_cache_lru); + + #ifdef CONFIG_NUMA + struct page *__page_cache_alloc(gfp_t gfp) +@@ -738,6 +741,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, + read_unlock_irq(&mapping->tree_lock); + return ret; + } ++EXPORT_SYMBOL(find_get_pages); + + /** + * find_get_pages_contig - gang contiguous pagecache lookup +@@ -798,6 +802,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, + read_unlock_irq(&mapping->tree_lock); + return ret; + } ++EXPORT_SYMBOL(find_get_pages_tag); + + /** + * grab_cache_page_nowait - returns locked page at given index in given cache +diff --git a/mm/readahead.c b/mm/readahead.c +index 0f539e8..9db41de 100644 +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -568,6 +568,7 @@ void handle_ra_miss(struct address_space *mapping, + ra->flags &= ~RA_FLAG_INCACHE; + ra->cache_hit = 0; + } ++EXPORT_SYMBOL_GPL(handle_ra_miss); + + /* + * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a