]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
Merge tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Dec 2023 21:39:30 +0000 (06:39 +0900)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Dec 2023 21:39:30 +0000 (06:39 +0900)
Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
     - Invalid namespace identification error handling (Marizio Ewan,
       Keith)
     - Fabrics keep-alive tuning (Mark)

 - Fix for a bad error check regression in bcache (Markus)

 - Fix for a performance regression with O_DIRECT (Ming)

 - Fix for a flush related deadlock (Ming)

 - Make the read-only warn on per-partition (Yu)

* tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux:
  nvme-core: check for too small lba shift
  blk-mq: don't count completed flush data request as inflight in case of quiesce
  block: Document the role of the two attribute groups
  block: warn once for each partition in bio_check_ro()
  block: move .bd_inode into 1st cacheline of block_device
  nvme: check for valid nvme_identify_ns() before using it
  nvme-core: fix a memory leak in nvme_ns_info_from_identify()
  nvme: fine-tune sending of first keep-alive
  bcache: revert replacing IS_ERR_OR_NULL with IS_ERR

631 files changed:
Documentation/ABI/testing/sysfs-class-led
Documentation/arch/loongarch/introduction.rst
Documentation/arch/x86/boot.rst
Documentation/devicetree/bindings/net/ethernet-controller.yaml
Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml
Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
Documentation/devicetree/bindings/usb/microchip,usb5744.yaml
Documentation/devicetree/bindings/usb/qcom,dwc3.yaml
Documentation/devicetree/bindings/usb/usb-hcd.yaml
Documentation/filesystems/erofs.rst
Documentation/process/maintainer-netdev.rst
Documentation/translations/zh_CN/arch/loongarch/introduction.rst
MAINTAINERS
Makefile
arch/arm/xen/enlighten.c
arch/arm64/Makefile
arch/arm64/include/asm/setup.h
arch/arm64/mm/pageattr.c
arch/loongarch/Makefile
arch/loongarch/include/asm/asmmacro.h
arch/loongarch/include/asm/percpu.h
arch/loongarch/include/asm/setup.h
arch/loongarch/kernel/relocate.c
arch/loongarch/kernel/time.c
arch/loongarch/mm/pgtable.c
arch/parisc/Kconfig
arch/parisc/include/asm/alternative.h
arch/parisc/include/asm/assembly.h
arch/parisc/include/asm/bug.h
arch/parisc/include/asm/elf.h
arch/parisc/include/asm/jump_label.h
arch/parisc/include/asm/ldcw.h
arch/parisc/include/asm/processor.h
arch/parisc/include/asm/uaccess.h
arch/parisc/include/uapi/asm/errno.h
arch/parisc/kernel/processor.c
arch/parisc/kernel/sys_parisc.c
arch/parisc/kernel/vmlinux.lds.S
arch/s390/include/asm/processor.h
arch/s390/kernel/ipl.c
arch/s390/kernel/perf_pai_crypto.c
arch/s390/kernel/perf_pai_ext.c
arch/x86/events/intel/core.c
arch/x86/hyperv/hv_init.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/xen/hypervisor.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/microcode/internal.h
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/signal_64.c
block/bdev.c
drivers/accel/ivpu/ivpu_hw_37xx.c
drivers/accel/ivpu/ivpu_pm.c
drivers/acpi/acpi_video.c
drivers/acpi/device_pm.c
drivers/acpi/processor_idle.c
drivers/acpi/resource.c
drivers/ata/libata-scsi.c
drivers/ata/pata_isapnp.c
drivers/dpll/dpll_netlink.c
drivers/firewire/sbp2.c
drivers/firmware/Kconfig
drivers/firmware/efi/unaccepted_memory.c
drivers/firmware/qemu_fw_cfg.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
drivers/gpu/drm/amd/display/dc/dc_types.h
drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c
drivers/gpu/drm/amd/display/dc/link/link_detection.c
drivers/gpu/drm/amd/display/dmub/dmub_srv.h
drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
drivers/gpu/drm/ast/ast_drv.h
drivers/gpu/drm/ast/ast_mode.c
drivers/gpu/drm/ci/xfails/requirements.txt
drivers/gpu/drm/drm_panel_orientation_quirks.c
drivers/gpu/drm/i915/display/intel_dp_mst.c
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/i915_driver.c
drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
drivers/gpu/drm/msm/dp/dp_display.c
drivers/gpu/drm/msm/dp/dp_drm.c
drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
drivers/gpu/drm/msm/msm_drv.c
drivers/gpu/drm/nouveau/include/nvkm/core/event.h
drivers/gpu/drm/nouveau/nouveau_display.c
drivers/gpu/drm/nouveau/nvkm/core/event.c
drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
drivers/gpu/drm/panel/panel-simple.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/hid/hid-apple.c
drivers/hid/hid-asus.c
drivers/hid/hid-core.c
drivers/hid/hid-debug.c
drivers/hid/hid-glorious.c
drivers/hid/hid-ids.h
drivers/hid/hid-logitech-dj.c
drivers/hid/hid-mcp2221.c
drivers/hid/hid-multitouch.c
drivers/hid/hid-quirks.c
drivers/i2c/busses/i2c-designware-common.c
drivers/i2c/busses/i2c-ocores.c
drivers/i2c/busses/i2c-pxa.c
drivers/irqchip/irq-gic-v3-its.c
drivers/leds/led-class.c
drivers/md/bcache/btree.c
drivers/md/bcache/journal.c
drivers/md/bcache/movinggc.c
drivers/md/bcache/request.c
drivers/md/bcache/request.h
drivers/md/bcache/super.c
drivers/md/bcache/writeback.c
drivers/md/dm-bufio.c
drivers/md/dm-crypt.c
drivers/md/dm-delay.c
drivers/md/dm-flakey.c
drivers/md/dm-verity-fec.c
drivers/md/dm-verity-target.c
drivers/md/dm-verity.h
drivers/media/pci/mgb4/Kconfig
drivers/media/pci/mgb4/mgb4_core.c
drivers/media/platform/renesas/vsp1/vsp1_pipe.c
drivers/media/platform/renesas/vsp1/vsp1_rpf.c
drivers/media/platform/renesas/vsp1/vsp1_rwpf.c
drivers/media/platform/renesas/vsp1/vsp1_rwpf.h
drivers/media/platform/renesas/vsp1/vsp1_wpf.c
drivers/mmc/core/block.c
drivers/mmc/core/core.c
drivers/mmc/host/cqhci-core.c
drivers/mmc/host/sdhci-pci-gli.c
drivers/mmc/host/sdhci-sprd.c
drivers/net/bonding/bond_main.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/ethernet/amd/pds_core/adminq.c
drivers/net/ethernet/amd/pds_core/core.h
drivers/net/ethernet/amd/pds_core/dev.c
drivers/net/ethernet/amd/pds_core/devlink.c
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/cortina/gemini.h
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
drivers/net/ethernet/google/gve/gve_main.c
drivers/net/ethernet/google/gve/gve_rx.c
drivers/net/ethernet/google/gve/gve_tx.c
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/ice/ice_ddp.c
drivers/net/ethernet/intel/ice/ice_dpll.c
drivers/net/ethernet/intel/ice/ice_dpll.h
drivers/net/ethernet/intel/ice/ice_lag.c
drivers/net/ethernet/intel/ice/ice_lag.h
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_ptp.c
drivers/net/ethernet/intel/ice/ice_ptp.h
drivers/net/ethernet/intel/ice/ice_ptp_hw.c
drivers/net/ethernet/intel/ice/ice_ptp_hw.h
drivers/net/ethernet/intel/ice/ice_txrx.c
drivers/net/ethernet/intel/ice/ice_txrx.h
drivers/net/ethernet/intel/ice/ice_vf_lib.c
drivers/net/ethernet/intel/ice/ice_virtchnl.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/rswitch.c
drivers/net/ethernet/stmicro/stmmac/Kconfig
drivers/net/ethernet/stmicro/stmmac/mmc_core.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/ti/icssg/icssg_prueth.c
drivers/net/ethernet/wangxun/libwx/wx_hw.c
drivers/net/ethernet/wangxun/libwx/wx_lib.c
drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/ipa/reg/gsi_reg-v5.0.c
drivers/net/ipvlan/ipvlan_core.c
drivers/net/macvlan.c
drivers/net/netdevsim/bpf.c
drivers/net/netkit.c
drivers/net/ppp/ppp_synctty.c
drivers/net/usb/aqc111.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/qmi_wwan.c
drivers/net/veth.c
drivers/net/vrf.c
drivers/net/wireguard/device.c
drivers/net/wireguard/receive.c
drivers/net/wireguard/send.c
drivers/net/wireless/ath/ath9k/Kconfig
drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
drivers/net/wireless/mediatek/mt76/mt7925/main.c
drivers/nfc/virtual_ncidev.c
drivers/parisc/power.c
drivers/phy/Kconfig
drivers/phy/Makefile
drivers/phy/realtek/Kconfig [deleted file]
drivers/phy/realtek/Makefile [deleted file]
drivers/phy/realtek/phy-rtk-usb2.c [deleted file]
drivers/phy/realtek/phy-rtk-usb3.c [deleted file]
drivers/pinctrl/cirrus/Kconfig
drivers/pinctrl/core.c
drivers/pinctrl/nxp/pinctrl-s32cc.c
drivers/pinctrl/pinctrl-cy8c95x0.c
drivers/pinctrl/realtek/pinctrl-rtd.c
drivers/pinctrl/stm32/pinctrl-stm32.c
drivers/platform/x86/amd/pmc/pmc.c
drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel/telemetry/core.c
drivers/pmdomain/arm/scmi_perf_domain.c
drivers/ptp/ptp_chardev.c
drivers/ptp/ptp_clock.c
drivers/ptp/ptp_private.h
drivers/ptp/ptp_sysfs.c
drivers/s390/net/Kconfig
drivers/s390/net/ism_drv.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/scsi_debug.c
drivers/scsi/sd.c
drivers/thunderbolt/switch.c
drivers/thunderbolt/tb.c
drivers/ufs/core/ufs-mcq.c
drivers/ufs/core/ufshcd.c
drivers/usb/cdns3/cdnsp-ring.c
drivers/usb/core/config.c
drivers/usb/core/hub.c
drivers/usb/dwc2/hcd_intr.c
drivers/usb/dwc3/core.c
drivers/usb/dwc3/drd.c
drivers/usb/dwc3/dwc3-qcom.c
drivers/usb/dwc3/dwc3-rtk.c
drivers/usb/host/xhci-mtk-sch.c
drivers/usb/host/xhci-mtk.h
drivers/usb/host/xhci-plat.c
drivers/usb/misc/onboard_usb_hub.c
drivers/usb/misc/onboard_usb_hub.h
drivers/usb/misc/usb-ljca.c
drivers/usb/serial/option.c
drivers/usb/typec/tcpm/tcpm.c
drivers/usb/typec/tipd/core.c
drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
drivers/vhost/vdpa.c
drivers/virtio/virtio_pci_common.c
drivers/virtio/virtio_pci_modern_dev.c
drivers/xen/events/events_2l.c
drivers/xen/events/events_base.c
drivers/xen/events/events_internal.h
drivers/xen/pcpu.c
drivers/xen/privcmd.c
drivers/xen/swiotlb-xen.c
drivers/xen/xen-front-pgdir-shbuf.c
fs/afs/dynroot.c
fs/afs/internal.h
fs/afs/server_list.c
fs/afs/super.c
fs/afs/vl_rotate.c
fs/autofs/inode.c
fs/bcachefs/Kconfig
fs/bcachefs/alloc_foreground.c
fs/bcachefs/backpointers.c
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_journal_iter.c
fs/bcachefs/btree_journal_iter.h
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_key_cache_types.h [new file with mode: 0644]
fs/bcachefs/btree_trans_commit.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/buckets.c
fs/bcachefs/compress.c
fs/bcachefs/data_update.c
fs/bcachefs/data_update.h
fs/bcachefs/disk_groups.c
fs/bcachefs/ec.c
fs/bcachefs/errcode.h
fs/bcachefs/extents.c
fs/bcachefs/fs-io-direct.c
fs/bcachefs/fs-io-pagecache.c
fs/bcachefs/fs-io-pagecache.h
fs/bcachefs/fs.c
fs/bcachefs/fsck.c
fs/bcachefs/inode.c
fs/bcachefs/io_read.c
fs/bcachefs/io_write.c
fs/bcachefs/io_write.h
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/journal_io.c
fs/bcachefs/journal_io.h
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h
fs/bcachefs/move.c
fs/bcachefs/move.h
fs/bcachefs/movinggc.c
fs/bcachefs/recovery.c
fs/bcachefs/replicas.c
fs/bcachefs/replicas.h
fs/bcachefs/six.c
fs/bcachefs/snapshot.c
fs/bcachefs/subvolume_types.h
fs/bcachefs/super-io.c
fs/bcachefs/super.c
fs/bcachefs/super_types.h
fs/bcachefs/trace.h
fs/bcachefs/xattr.c
fs/btrfs/ctree.c
fs/btrfs/delayed-ref.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent-tree.h
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/raid-stripe-tree.c
fs/btrfs/ref-verify.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/tree-checker.c
fs/btrfs/volumes.c
fs/btrfs/zoned.c
fs/debugfs/file.c
fs/debugfs/inode.c
fs/debugfs/internal.h
fs/ecryptfs/inode.c
fs/erofs/Kconfig
fs/erofs/data.c
fs/erofs/inode.c
fs/ext2/file.c
fs/inode.c
fs/libfs.c
fs/nfsd/cache.h
fs/nfsd/nfs4state.c
fs/nfsd/nfscache.c
fs/nfsd/nfssvc.c
fs/overlayfs/inode.c
fs/overlayfs/overlayfs.h
fs/overlayfs/params.c
fs/overlayfs/util.c
fs/smb/client/cifs_spnego.c
fs/smb/client/cifsglob.h
fs/smb/client/cifspdu.h
fs/smb/client/cifsproto.h
fs/smb/client/cifssmb.c
fs/smb/client/connect.c
fs/smb/client/inode.c
fs/smb/client/readdir.c
fs/smb/client/sess.c
fs/smb/client/smb1ops.c
fs/smb/client/smb2inode.c
fs/smb/client/smb2ops.c
fs/smb/client/smb2transport.c
fs/smb/server/ksmbd_work.c
fs/smb/server/oplock.c
fs/smb/server/smb2pdu.c
fs/smb/server/smbacl.c
fs/smb/server/smbacl.h
fs/smb/server/vfs.c
fs/smb/server/vfs.h
fs/smb/server/vfs_cache.c
fs/smb/server/vfs_cache.h
fs/stat.c
fs/tracefs/event_inode.c
fs/tracefs/inode.c
fs/xfs/Kconfig
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot_item_recover.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item_recover.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_reflink.c
include/acpi/acpi_bus.h
include/asm-generic/qspinlock.h
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/closure.h
include/linux/cpuhotplug.h
include/linux/debugfs.h
include/linux/export-internal.h
include/linux/hid.h
include/linux/hrtimer.h
include/linux/ieee80211.h
include/linux/mdio.h
include/linux/netdevice.h
include/linux/pagemap.h
include/linux/perf_event.h
include/linux/skmsg.h
include/linux/stackleak.h
include/linux/usb/phy.h
include/linux/virtio_pci_modern.h
include/net/af_unix.h
include/net/cfg80211.h
include/net/neighbour.h
include/net/netfilter/nf_tables.h
include/net/netkit.h
include/net/tc_act/tc_ct.h
include/scsi/scsi_device.h
include/trace/events/rxrpc.h
include/uapi/linux/btrfs_tree.h
include/uapi/linux/fcntl.h
include/uapi/linux/stddef.h
include/uapi/linux/v4l2-subdev.h
include/uapi/linux/virtio_pci.h
include/xen/events.h
io_uring/fdinfo.c
io_uring/fs.c
io_uring/rsrc.c
io_uring/sqpoll.c
kernel/audit_watch.c
kernel/bpf/core.c
kernel/bpf/memalloc.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup.c
kernel/cpu.c
kernel/events/core.c
kernel/futex/core.c
kernel/locking/lockdep.c
kernel/sched/fair.c
kernel/sys.c
kernel/time/hrtimer.c
lib/closure.c
lib/errname.c
lib/iov_iter.c
lib/kunit/kunit-test.c
lib/kunit/test.c
lib/zstd/common/fse_decompress.c
mm/damon/core.c
mm/damon/sysfs-schemes.c
mm/damon/sysfs.c
mm/filemap.c
mm/huge_memory.c
mm/ksm.c
mm/memcontrol.c
mm/page-writeback.c
mm/userfaultfd.c
mm/util.c
net/bridge/netfilter/nf_conntrack_bridge.c
net/core/dev.c
net/core/filter.c
net/core/gso_test.c
net/core/skmsg.c
net/ethtool/netlink.c
net/ipv4/igmp.c
net/ipv4/inet_diag.c
net/ipv4/inet_hashtables.c
net/ipv4/raw_diag.c
net/ipv4/route.c
net/ipv4/tcp_diag.c
net/ipv4/udp_diag.c
net/mac80211/Kconfig
net/mac80211/debugfs_netdev.c
net/mac80211/debugfs_sta.c
net/mac80211/driver-ops.h
net/mac80211/ht.c
net/mptcp/mptcp_diag.c
net/mptcp/options.c
net/mptcp/pm_netlink.c
net/mptcp/protocol.c
net/mptcp/sockopt.c
net/ncsi/ncsi-aen.c
net/netfilter/ipset/ip_set_core.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_byteorder.c
net/netfilter/nft_meta.c
net/netfilter/nft_set_rbtree.c
net/packet/diag.c
net/rxrpc/conn_client.c
net/rxrpc/input.c
net/sched/act_ct.c
net/sctp/diag.c
net/smc/af_smc.c
net/smc/smc_diag.c
net/tipc/diag.c
net/tipc/netlink_compat.c
net/tls/tls_sw.c
net/unix/af_unix.c
net/unix/diag.c
net/unix/unix_bpf.c
net/vmw_vsock/diag.c
net/wireless/core.c
net/wireless/core.h
net/wireless/debugfs.c
net/wireless/nl80211.c
net/xdp/xsk_diag.c
scripts/Makefile.lib
scripts/checkstack.pl
scripts/gcc-plugins/latent_entropy_plugin.c
scripts/gcc-plugins/randomize_layout_plugin.c
scripts/kconfig/symbol.c
scripts/mod/modpost.c
sound/pci/hda/cs35l56_hda_i2c.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
tools/arch/arm64/include/asm/cputype.h
tools/arch/arm64/include/uapi/asm/kvm.h
tools/arch/arm64/include/uapi/asm/perf_regs.h
tools/arch/arm64/tools/Makefile
tools/arch/parisc/include/uapi/asm/errno.h
tools/arch/s390/include/uapi/asm/kvm.h
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/disabled-features.h
tools/arch/x86/include/asm/msr-index.h
tools/arch/x86/include/uapi/asm/prctl.h
tools/hv/hv_kvp_daemon.c
tools/hv/hv_set_ifconfig.sh
tools/include/asm-generic/unaligned.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/drm/drm.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/fscrypt.h
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/mount.h
tools/include/uapi/linux/vhost.h
tools/net/ynl/Makefile.deps
tools/net/ynl/generated/devlink-user.c
tools/net/ynl/generated/ethtool-user.c
tools/net/ynl/generated/fou-user.c
tools/net/ynl/generated/handshake-user.c
tools/net/ynl/ynl-gen-c.py
tools/perf/MANIFEST
tools/perf/Makefile.perf
tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
tools/perf/arch/s390/entry/syscalls/syscall.tbl
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
tools/perf/builtin-kwork.c
tools/perf/trace/beauty/include/linux/socket.h
tools/perf/util/Build
tools/perf/util/bpf_lock_contention.c
tools/power/pm-graph/sleepgraph.py
tools/power/x86/turbostat/turbostat.c
tools/testing/selftests/arm64/fp/za-fork.c
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/prog_tests/tc_redirect.c
tools/testing/selftests/bpf/prog_tests/verifier.c
tools/testing/selftests/bpf/progs/bpf_loop_bench.c
tools/testing/selftests/bpf/progs/cb_refs.c
tools/testing/selftests/bpf/progs/exceptions_fail.c
tools/testing/selftests/bpf/progs/strobemeta.h
tools/testing/selftests/bpf/progs/test_sockmap_listen.c
tools/testing/selftests/bpf/progs/verifier_cfg.c
tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/verifier_loops1.c
tools/testing/selftests/bpf/progs/verifier_precision.c
tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
tools/testing/selftests/bpf/verifier/calls.c
tools/testing/selftests/bpf/verifier/ld_imm64.c
tools/testing/selftests/bpf/xskxceiver.c
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/mm/.gitignore
tools/testing/selftests/mm/pagemap_ioctl.c
tools/testing/selftests/mm/run_vmtests.sh
tools/testing/selftests/net/af_unix/diag_uid.c
tools/testing/selftests/net/cmsg_sender.c
tools/testing/selftests/net/ipsec.c
tools/testing/selftests/net/mptcp/mptcp_connect.c
tools/testing/selftests/net/mptcp/mptcp_inq.c
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/rtnetlink.sh
tools/testing/vsock/vsock_test.c

index b2ff0012c0f2b8df7d843fc50f3568c0ca03f715..2e24ac3bd7efa4e64d8dc084462cc0ef92dc9202 100644 (file)
@@ -59,15 +59,6 @@ Description:
                brightness. Reading this file when no hw brightness change
                event has happened will return an ENODATA error.
 
-What:          /sys/class/leds/<led>/color
-Date:          June 2023
-KernelVersion: 6.5
-Description:
-               Color of the LED.
-
-               This is a read-only file. Reading this file returns the color
-               of the LED as a string (e.g: "red", "green", "multicolor").
-
 What:          /sys/class/leds/<led>/trigger
 Date:          March 2006
 KernelVersion: 2.6.17
index 8c568cfc2107984092f4d82c2076a3bed0c05c4f..5e6db78abeaf50fd48a9e8e67fa4d61f49c64905 100644 (file)
@@ -375,9 +375,9 @@ Developer web site of Loongson and LoongArch (Software and Documentation):
 
 Documentation of LoongArch ISA:
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (in Chinese)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (in Chinese)
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (in English)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (in English)
 
 Documentation of LoongArch ELF psABI:
 
index f5d2f2414de8b62fc33aef9550b0db067d924421..22cc7a040dae053ec6da2f44436524b60fdf5a60 100644 (file)
@@ -77,7 +77,7 @@ Protocol 2.14 BURNT BY INCORRECT COMMIT
 Protocol 2.15  (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
 =============  ============================================================
 
-.. note::
+  .. note::
      The protocol version number should be changed only if the setup header
      is changed. There is no need to update the version number if boot_params
      or kernel_info are changed. Additionally, it is recommended to use
index 9f6a5ccbcefef45e62d4447bfadde3052f27b21e..d14d123ad7a028872d5f04de0644685dcba684e3 100644 (file)
@@ -275,12 +275,12 @@ allOf:
       properties:
         rx-internal-delay-ps:
           description:
-            RGMII Receive Clock Delay defined in pico seconds.This is used for
+            RGMII Receive Clock Delay defined in pico seconds. This is used for
             controllers that have configurable RX internal delays. If this
             property is present then the MAC applies the RX delay.
         tx-internal-delay-ps:
           description:
-            RGMII Transmit Clock Delay defined in pico seconds.This is used for
+            RGMII Transmit Clock Delay defined in pico seconds. This is used for
             controllers that have configurable TX internal delays. If this
             property is present then the MAC applies the TX delay.
 
index d49aafd8c5f4d97a42a2e9ea6f1a36d91e7adf13..a24286e4def623475e528337b74aa455073b9be1 100644 (file)
@@ -9,7 +9,7 @@ title: NXP S32G2 pin controller
 
 maintainers:
   - Ghennadi Procopciuc <Ghennadi.Procopciuc@oss.nxp.com>
-  - Chester Lin <clin@suse.com>
+  - Chester Lin <chester62515@gmail.com>
 
 description: |
   S32G2 pinmux is implemented in SIUL2 (System Integration Unit Lite2),
index 462ead5a1cec3298bdd5725d4bf2e8f151920b36..2cf3d016db42c11d6b9dfcf18d9a1e0cfb59c079 100644 (file)
@@ -36,6 +36,7 @@ properties:
           - qcom,sm8350-ufshc
           - qcom,sm8450-ufshc
           - qcom,sm8550-ufshc
+          - qcom,sm8650-ufshc
       - const: qcom,ufshc
       - const: jedec,ufs-2.0
 
@@ -122,6 +123,7 @@ allOf:
               - qcom,sm8350-ufshc
               - qcom,sm8450-ufshc
               - qcom,sm8550-ufshc
+              - qcom,sm8650-ufshc
     then:
       properties:
         clocks:
index ff3a1707ef570f50871ca6bd630381c6d130fbb7..6d4cfd943f5847ff43cbccd13e5f210a95448c1c 100644 (file)
@@ -36,7 +36,11 @@ properties:
 
   vdd-supply:
     description:
-      VDD power supply to the hub
+      3V3 power supply to the hub
+
+  vdd2-supply:
+    description:
+      1V2 power supply to the hub
 
   peer-hub:
     $ref: /schemas/types.yaml#/definitions/phandle
@@ -62,6 +66,7 @@ allOf:
       properties:
         reset-gpios: false
         vdd-supply: false
+        vdd2-supply: false
         peer-hub: false
         i2c-bus: false
     else:
index e889158ca20578893ef0c7072ce1fbf64f70a41c..915c8205623b3abf98a43c9ab851e2bdb32c36f7 100644 (file)
@@ -521,8 +521,8 @@ examples:
 
             interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
                          <GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>,
-                         <GIC_SPI 488 IRQ_TYPE_LEVEL_HIGH>,
-                         <GIC_SPI 489 IRQ_TYPE_LEVEL_HIGH>;
+                         <GIC_SPI 488 IRQ_TYPE_EDGE_BOTH>,
+                         <GIC_SPI 489 IRQ_TYPE_EDGE_BOTH>;
             interrupt-names = "hs_phy_irq", "ss_phy_irq",
                           "dm_hs_phy_irq", "dp_hs_phy_irq";
 
index 692dd60e3f73f321a20761f81e54d512953f9dba..45a19d4928afa6eeb521556e603004bd6d6a5ed8 100644 (file)
@@ -41,7 +41,7 @@ examples:
   - |
     usb {
         phys = <&usb2_phy1>, <&usb3_phy1>;
-        phy-names = "usb";
+        phy-names = "usb2", "usb3";
         #address-cells = <1>;
         #size-cells = <0>;
 
index 57c6ae23b3fcf97ce4f875cfd34d4bd08e7bbae1..cc4626d6ee4f836b50dcb8b3e85de56d01da968f 100644 (file)
@@ -91,6 +91,10 @@ compatibility checking tool (fsck.erofs), and a debugging tool (dump.erofs):
 
 - git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
 
+For more information, please also refer to the documentation site:
+
+- https://erofs.docs.kernel.org
+
 Bugs and patches are welcome, please kindly help us and send to the following
 linux-erofs mailing list:
 
index 7feacc20835e424b584927ce0db990e4a9657c8c..84ee60fceef24cbf1ba9e090ac91c94abd4064b5 100644 (file)
@@ -193,9 +193,23 @@ Review timelines
 Generally speaking, the patches get triaged quickly (in less than
 48h). But be patient, if your patch is active in patchwork (i.e. it's
 listed on the project's patch list) the chances it was missed are close to zero.
-Asking the maintainer for status updates on your
-patch is a good way to ensure your patch is ignored or pushed to the
-bottom of the priority list.
+
+The high volume of development on netdev makes reviewers move on
+from discussions relatively quickly. New comments and replies
+are very unlikely to arrive after a week of silence. If a patch
+is no longer active in patchwork and the thread went idle for more
+than a week - clarify the next steps and/or post the next version.
+
+For RFC postings specifically, if nobody responded in a week - reviewers
+either missed the posting or have no strong opinions. If the code is ready,
+repost as a PATCH.
+
+Emails saying just "ping" or "bump" are considered rude. If you can't figure
+out the status of the patch from patchwork or where the discussion has
+landed - describe your best guess and ask if it's correct. For example::
+
+  I don't understand what the next steps are. Person X seems to be unhappy
+  with A, should I do B and repost the patches?
 
 .. _Changes requested:
 
index 59d6bf33050cb831236321d91eab7bbf313655ea..bf463c5a4c5144c6d2520449472553ae9da88ed8 100644 (file)
@@ -338,9 +338,9 @@ Loongson与LoongArch的开发者网站(软件与文档资源):
 
 LoongArch指令集架构的文档:
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (中文版)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (中文版)
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (英文版)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (英文版)
 
 LoongArch的ELF psABI文档:
 
index 97f51d5ec1cfd715487a616c78afd40324082dfc..edae86acdfdc3b627e69b3210e265b669f2481f0 100644 (file)
@@ -5076,7 +5076,6 @@ CLANG CONTROL FLOW INTEGRITY SUPPORT
 M:     Sami Tolvanen <samitolvanen@google.com>
 M:     Kees Cook <keescook@chromium.org>
 R:     Nathan Chancellor <nathan@kernel.org>
-R:     Nick Desaulniers <ndesaulniers@google.com>
 L:     llvm@lists.linux.dev
 S:     Supported
 B:     https://github.com/ClangBuiltLinux/linux/issues
@@ -5091,8 +5090,9 @@ F:        .clang-format
 
 CLANG/LLVM BUILD SUPPORT
 M:     Nathan Chancellor <nathan@kernel.org>
-M:     Nick Desaulniers <ndesaulniers@google.com>
-R:     Tom Rix <trix@redhat.com>
+R:     Nick Desaulniers <ndesaulniers@google.com>
+R:     Bill Wendling <morbo@google.com>
+R:     Justin Stitt <justinstitt@google.com>
 L:     llvm@lists.linux.dev
 S:     Supported
 W:     https://clangbuiltlinux.github.io/
@@ -5242,7 +5242,6 @@ F:        drivers/platform/x86/compal-laptop.c
 
 COMPILER ATTRIBUTES
 M:     Miguel Ojeda <ojeda@kernel.org>
-R:     Nick Desaulniers <ndesaulniers@google.com>
 S:     Maintained
 F:     include/linux/compiler_attributes.h
 
@@ -7855,6 +7854,7 @@ R:        Yue Hu <huyue2@coolpad.com>
 R:     Jeffle Xu <jefflexu@linux.alibaba.com>
 L:     linux-erofs@lists.ozlabs.org
 S:     Maintained
+W:     https://erofs.docs.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
 F:     Documentation/ABI/testing/sysfs-fs-erofs
 F:     Documentation/filesystems/erofs.rst
@@ -8950,7 +8950,6 @@ S:        Maintained
 F:     scripts/get_maintainer.pl
 
 GFS2 FILE SYSTEM
-M:     Bob Peterson <rpeterso@redhat.com>
 M:     Andreas Gruenbacher <agruenba@redhat.com>
 L:     gfs2@lists.linux.dev
 S:     Supported
@@ -11025,7 +11024,6 @@ F:      drivers/net/wireless/intel/iwlwifi/
 
 INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER
 M:     Jithu Joseph <jithu.joseph@intel.com>
-R:     Maurice Ma <maurice.ma@intel.com>
 S:     Maintained
 W:     https://slimbootloader.github.io/security/firmware-update.html
 F:     drivers/platform/x86/intel/wmi/sbl-fw-update.c
@@ -11517,7 +11515,6 @@ F:      fs/autofs/
 KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
 M:     Masahiro Yamada <masahiroy@kernel.org>
 R:     Nathan Chancellor <nathan@kernel.org>
-R:     Nick Desaulniers <ndesaulniers@google.com>
 R:     Nicolas Schier <nicolas@fjasle.eu>
 L:     linux-kbuild@vger.kernel.org
 S:     Maintained
@@ -13779,7 +13776,6 @@ F:      drivers/net/ethernet/mellanox/mlxfw/
 MELLANOX HARDWARE PLATFORM SUPPORT
 M:     Hans de Goede <hdegoede@redhat.com>
 M:     Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:     Mark Gross <markgross@kernel.org>
 M:     Vadim Pasternak <vadimp@nvidia.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Supported
@@ -14388,7 +14384,6 @@ F:      drivers/platform/surface/surface_gpe.c
 MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT
 M:     Hans de Goede <hdegoede@redhat.com>
 M:     Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:     Mark Gross <markgross@kernel.org>
 M:     Maximilian Luz <luzmaximilian@gmail.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
@@ -14995,6 +14990,7 @@ M:      Jakub Kicinski <kuba@kernel.org>
 M:     Paolo Abeni <pabeni@redhat.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
+P:     Documentation/process/maintainer-netdev.rst
 Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
@@ -15046,6 +15042,7 @@ M:      Jakub Kicinski <kuba@kernel.org>
 M:     Paolo Abeni <pabeni@redhat.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
+P:     Documentation/process/maintainer-netdev.rst
 Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 B:     mailto:netdev@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
@@ -15056,6 +15053,7 @@ F:      Documentation/networking/
 F:     Documentation/process/maintainer-netdev.rst
 F:     Documentation/userspace-api/netlink/
 F:     include/linux/in.h
+F:     include/linux/indirect_call_wrapper.h
 F:     include/linux/net.h
 F:     include/linux/netdevice.h
 F:     include/net/
@@ -21769,7 +21767,9 @@ F:      Documentation/devicetree/bindings/counter/ti-eqep.yaml
 F:     drivers/counter/ti-eqep.c
 
 TI ETHERNET SWITCH DRIVER (CPSW)
-R:     Grygorii Strashko <grygorii.strashko@ti.com>
+R:     Siddharth Vadapalli <s-vadapalli@ti.com>
+R:     Ravi Gunasekaran <r-gunasekaran@ti.com>
+R:     Roger Quadros <rogerq@kernel.org>
 L:     linux-omap@vger.kernel.org
 L:     netdev@vger.kernel.org
 S:     Maintained
@@ -21793,6 +21793,15 @@ F:     Documentation/devicetree/bindings/media/i2c/ti,ds90*
 F:     drivers/media/i2c/ds90*
 F:     include/media/i2c/ds90*
 
+TI ICSSG ETHERNET DRIVER (ICSSG)
+R:     MD Danish Anwar <danishanwar@ti.com>
+R:     Roger Quadros <rogerq@kernel.org>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/net/ti,icss*.yaml
+F:     drivers/net/ethernet/ti/icssg/*
+
 TI J721E CSI2RX DRIVER
 M:     Jai Luthra <j-luthra@ti.com>
 L:     linux-media@vger.kernel.org
@@ -22068,6 +22077,7 @@ F:      drivers/watchdog/tqmx86_wdt.c
 TRACING
 M:     Steven Rostedt <rostedt@goodmis.org>
 M:     Masami Hiramatsu <mhiramat@kernel.org>
+R:     Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 L:     linux-kernel@vger.kernel.org
 L:     linux-trace-kernel@vger.kernel.org
 S:     Maintained
@@ -23654,7 +23664,6 @@ F:      drivers/platform/x86/x86-android-tablets/
 X86 PLATFORM DRIVERS
 M:     Hans de Goede <hdegoede@redhat.com>
 M:     Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:     Mark Gross <markgross@kernel.org>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
 Q:     https://patchwork.kernel.org/project/platform-driver-x86/list/
@@ -23692,6 +23701,20 @@ F:     arch/x86/kernel/dumpstack.c
 F:     arch/x86/kernel/stacktrace.c
 F:     arch/x86/kernel/unwind_*.c
 
+X86 TRUST DOMAIN EXTENSIONS (TDX)
+M:     Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+R:     Dave Hansen <dave.hansen@linux.intel.com>
+L:     x86@kernel.org
+L:     linux-coco@lists.linux.dev
+S:     Supported
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/tdx
+F:     arch/x86/boot/compressed/tdx*
+F:     arch/x86/coco/tdx/
+F:     arch/x86/include/asm/shared/tdx.h
+F:     arch/x86/include/asm/tdx.h
+F:     arch/x86/virt/vmx/tdx/
+F:     drivers/virt/coco/tdx-guest
+
 X86 VDSO
 M:     Andy Lutomirski <luto@kernel.org>
 L:     linux-kernel@vger.kernel.org
@@ -23872,8 +23895,7 @@ T:      git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 P:     Documentation/filesystems/xfs-maintainer-entry-profile.rst
 F:     Documentation/ABI/testing/sysfs-fs-xfs
 F:     Documentation/admin-guide/xfs.rst
-F:     Documentation/filesystems/xfs-delayed-logging-design.rst
-F:     Documentation/filesystems/xfs-self-describing-metadata.rst
+F:     Documentation/filesystems/xfs-*
 F:     fs/xfs/
 F:     include/uapi/linux/dqblk_xfs.h
 F:     include/uapi/linux/fsmap.h
index ede0bd24105602eaf7fac59ff7690206ccc1b4a1..99db546fbb4520f8718c5a9cd7b71da5b7d16c94 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc3
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
index 9afdc4c4a5dc15f9bf4a1972894e2b046f5489d6..a395b6c0aae2a92e40b88dc37faa2829df9f95f0 100644 (file)
@@ -484,7 +484,8 @@ static int __init xen_guest_init(void)
         * for secondary CPUs as they are brought up.
         * For uniformity we use VCPUOP_register_vcpu_info even on cpu0.
         */
-       xen_vcpu_info = alloc_percpu(struct vcpu_info);
+       xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info),
+                                      1 << fls(sizeof(struct vcpu_info) - 1));
        if (xen_vcpu_info == NULL)
                return -ENOMEM;
 
index 4bd85cc0d32bfac906b039cab192f36a5d77a813..9a2d3723cd0fa9563647d8aaa9d6602f2546f823 100644 (file)
@@ -158,7 +158,7 @@ endif
 
 all:   $(notdir $(KBUILD_IMAGE))
 
-
+vmlinuz.efi: Image
 Image vmlinuz.efi: vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
index f4af547ef54caa70a521387d442ffeb6d04a230b..2e4d7da74fb87a8444c516e343108f88f611336e 100644 (file)
@@ -21,9 +21,22 @@ static inline bool arch_parse_debug_rodata(char *arg)
        extern bool rodata_enabled;
        extern bool rodata_full;
 
-       if (arg && !strcmp(arg, "full")) {
+       if (!arg)
+               return false;
+
+       if (!strcmp(arg, "full")) {
+               rodata_enabled = rodata_full = true;
+               return true;
+       }
+
+       if (!strcmp(arg, "off")) {
+               rodata_enabled = rodata_full = false;
+               return true;
+       }
+
+       if (!strcmp(arg, "on")) {
                rodata_enabled = true;
-               rodata_full = true;
+               rodata_full = false;
                return true;
        }
 
index 8e2017ba5f1b114640544e1f01e0ebb399d074c7..924843f1f661bfe1ff5c6b8f9eff753872416040 100644 (file)
@@ -29,8 +29,8 @@ bool can_set_direct_map(void)
         *
         * KFENCE pool requires page-granular mapping if initialized late.
         */
-       return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
-               arm64_kfence_can_set_direct_map();
+       return rodata_full || debug_pagealloc_enabled() ||
+              arm64_kfence_can_set_direct_map();
 }
 
 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
@@ -105,8 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages,
         * If we are manipulating read-only permissions, apply the same
         * change to the linear mapping of the pages that back this VM area.
         */
-       if (rodata_enabled &&
-           rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
+       if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
                            pgprot_val(clear_mask) == PTE_RDONLY)) {
                for (i = 0; i < area->nr_pages; i++) {
                        __change_memory_common((u64)page_address(area->pages[i]),
index 9eeb0c05f3f4d2a48ab743c15e42ac6cb2c88fe8..204b94b2e6aaa6e3afc71b292004fb3b329256b2 100644 (file)
@@ -68,6 +68,7 @@ LDFLAGS_vmlinux                       += -static -n -nostdlib
 ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
 cflags-y                       += $(call cc-option,-mexplicit-relocs)
 KBUILD_CFLAGS_KERNEL           += $(call cc-option,-mdirect-extern-access)
+KBUILD_CFLAGS_KERNEL           += $(call cc-option,-fdirect-access-external-data)
 KBUILD_AFLAGS_MODULE           += $(call cc-option,-fno-direct-access-external-data)
 KBUILD_CFLAGS_MODULE           += $(call cc-option,-fno-direct-access-external-data)
 KBUILD_AFLAGS_MODULE           += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
@@ -142,6 +143,8 @@ vdso-install-y += arch/loongarch/vdso/vdso.so.dbg
 
 all:   $(notdir $(KBUILD_IMAGE))
 
+vmlinuz.efi: vmlinux.efi
+
 vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@
 
index c9544f358c33991b46b67edec578af1df44939b7..655db7d7a42796d947e857b6c546023d82c13fba 100644 (file)
        lu32i.d \reg, 0
        lu52i.d \reg, \reg, 0
        .pushsection ".la_abs", "aw", %progbits
-       768:
-       .dword  768b-766b
+       .dword  766b
        .dword  \sym
        .popsection
 #endif
index ed5da02b1cf6f1611ac4b83e560d1f544ed6e270..9b36ac003f8907ed06a279c94bb49ede82b213b2 100644 (file)
@@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr,               \
        switch (size) {                                                 \
        case 4:                                                         \
                __asm__ __volatile__(                                   \
-               "am"#asm_op".w" " %[ret], %[val], %[ptr]        \n"             \
+               "am"#asm_op".w" " %[ret], %[val], %[ptr]        \n"     \
                : [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr)           \
                : [val] "r" (val));                                     \
                break;                                                  \
        case 8:                                                         \
                __asm__ __volatile__(                                   \
-               "am"#asm_op".d" " %[ret], %[val], %[ptr]        \n"             \
+               "am"#asm_op".d" " %[ret], %[val], %[ptr]        \n"     \
                : [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr)           \
                : [val] "r" (val));                                     \
                break;                                                  \
@@ -63,7 +63,7 @@ PERCPU_OP(and, and, &)
 PERCPU_OP(or, or, |)
 #undef PERCPU_OP
 
-static __always_inline unsigned long __percpu_read(void *ptr, int size)
+static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size)
 {
        unsigned long ret;
 
@@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size)
        return ret;
 }
 
-static __always_inline void __percpu_write(void *ptr, unsigned long val, int size)
+static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size)
 {
        switch (size) {
        case 1:
@@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz
        }
 }
 
-static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
-                                                  int size)
+static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size)
 {
        switch (size) {
        case 1:
index a0bc159ce8bdc0348defe27953c07d973dd58b5f..ee52fb1e99631653e3e40d6998afd159a7e5986d 100644 (file)
@@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len
 #ifdef CONFIG_RELOCATABLE
 
 struct rela_la_abs {
-       long offset;
+       long pc;
        long symvalue;
 };
 
index 6c3eff9af9fb1ed4cc4af8ffa9b9ea5490591ee3..1acfa704c8d09b95b625da9fd047833b181d35f7 100644 (file)
@@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset)
        for (p = begin; (void *)p < end; p++) {
                long v = p->symvalue;
                uint32_t lu12iw, ori, lu32id, lu52id;
-               union loongarch_instruction *insn = (void *)p - p->offset;
+               union loongarch_instruction *insn = (void *)p->pc;
 
                lu12iw = (v >> 12) & 0xfffff;
                ori    = v & 0xfff;
@@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void)
        return hash;
 }
 
+static int __init nokaslr(char *p)
+{
+       pr_info("KASLR is disabled.\n");
+
+       return 0; /* Print a notice and silence the boot warning */
+}
+early_param("nokaslr", nokaslr);
+
 static inline __init bool kaslr_disabled(void)
 {
        char *str;
index 3064af94db9c2e14e953a4aad68c2a8d28588447..e7015f7b70e37c4cabf736512c50a998455bbdf9 100644 (file)
@@ -58,14 +58,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt)
        return 0;
 }
 
-static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
+static int constant_set_state_periodic(struct clock_event_device *evt)
 {
+       unsigned long period;
        unsigned long timer_config;
 
        raw_spin_lock(&state_lock);
 
-       timer_config = csr_read64(LOONGARCH_CSR_TCFG);
-       timer_config &= ~CSR_TCFG_EN;
+       period = const_clock_freq / HZ;
+       timer_config = period & CSR_TCFG_VAL;
+       timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
        csr_write64(timer_config, LOONGARCH_CSR_TCFG);
 
        raw_spin_unlock(&state_lock);
@@ -73,16 +75,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
        return 0;
 }
 
-static int constant_set_state_periodic(struct clock_event_device *evt)
+static int constant_set_state_shutdown(struct clock_event_device *evt)
 {
-       unsigned long period;
        unsigned long timer_config;
 
        raw_spin_lock(&state_lock);
 
-       period = const_clock_freq / HZ;
-       timer_config = period & CSR_TCFG_VAL;
-       timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
+       timer_config = csr_read64(LOONGARCH_CSR_TCFG);
+       timer_config &= ~CSR_TCFG_EN;
        csr_write64(timer_config, LOONGARCH_CSR_TCFG);
 
        raw_spin_unlock(&state_lock);
@@ -90,11 +90,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt)
        return 0;
 }
 
-static int constant_set_state_shutdown(struct clock_event_device *evt)
-{
-       return 0;
-}
-
 static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt)
 {
        unsigned long timer_config;
@@ -161,7 +156,7 @@ int constant_clockevent_init(void)
        cd->rating = 320;
        cd->cpumask = cpumask_of(cpu);
        cd->set_state_oneshot = constant_set_state_oneshot;
-       cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped;
+       cd->set_state_oneshot_stopped = constant_set_state_shutdown;
        cd->set_state_periodic = constant_set_state_periodic;
        cd->set_state_shutdown = constant_set_state_shutdown;
        cd->set_next_event = constant_timer_next_event;
index 71d0539e2d0b0207f901e3ef75679bc4e6bc2fa3..2aae72e638713a658475e6fb82fc73eae0fc3469 100644 (file)
@@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr)
 {
        return pfn_to_page(virt_to_pfn(kaddr));
 }
-EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+EXPORT_SYMBOL(dmw_virt_to_page);
 
 struct page *tlb_virt_to_page(unsigned long kaddr)
 {
        return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
 }
-EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+EXPORT_SYMBOL(tlb_virt_to_page);
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
index fd69dfa0cdabbe7aa6bd31241e8c11fad26a9592..d14ccc948a29b920854b6c750febffac625619fd 100644 (file)
@@ -115,9 +115,12 @@ config ARCH_HAS_ILOG2_U64
        default n
 
 config GENERIC_BUG
-       bool
-       default y
+       def_bool y
        depends on BUG
+       select GENERIC_BUG_RELATIVE_POINTERS if 64BIT
+
+config GENERIC_BUG_RELATIVE_POINTERS
+       bool
 
 config GENERIC_HWEIGHT
        bool
@@ -140,11 +143,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
        default 8
 
 config ARCH_MMAP_RND_BITS_MAX
-       default 24 if 64BIT
-       default 17
+       default 18 if 64BIT
+       default 13
 
 config ARCH_MMAP_RND_COMPAT_BITS_MAX
-       default 17
+       default 13
 
 # unless you want to implement ACPI on PA-RISC ... ;-)
 config PM
index 1ed45fd085d3b80ece36baf4c815fa91e709df63..1eb488f25b838074da6934e5d406e170ffdc6ed2 100644 (file)
@@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
 
 /* Alternative SMP implementation. */
 #define ALTERNATIVE(cond, replacement)         "!0:"   \
-       ".section .altinstructions, \"aw\"      !"      \
+       ".section .altinstructions, \"a\"       !"      \
+       ".align 4                               !"      \
        ".word (0b-4-.)                         !"      \
        ".hword 1, " __stringify(cond) "        !"      \
        ".word " __stringify(replacement) "     !"      \
@@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
 
 /* to replace one single instructions by a new instruction */
 #define ALTERNATIVE(from, to, cond, replacement)\
-       .section .altinstructions, "aw" !       \
+       .section .altinstructions, "a"  !       \
+       .align 4                        !       \
        .word (from - .)                !       \
        .hword (to - from)/4, cond      !       \
        .word replacement               !       \
@@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
 
 /* to replace multiple instructions by new code */
 #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\
-       .section .altinstructions, "aw" !       \
+       .section .altinstructions, "a"  !       \
+       .align 4                        !       \
        .word (from - .)                !       \
        .hword -num_instructions, cond  !       \
        .word (new_instr_ptr - .)       !       \
index 75677b526b2bb79f7bb531a1281e24296d3c3c7c..74d17d7e759da9dfa89aa1a504b94de4554db16d 100644 (file)
         */
 #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr)      \
        .section __ex_table,"aw"                        !       \
+       .align 4                                        !       \
        .word (fault_addr - .), (except_addr - .)       !       \
        .previous
 
index 4b6d60b941247e7f2269b67d8eda526f8eab617c..1641ff9a8b83e0bab486f45d398a026d2bc83acb 100644 (file)
 #define        PARISC_BUG_BREAK_ASM    "break 0x1f, 0x1fff"
 #define        PARISC_BUG_BREAK_INSN   0x03ffe01f  /* PARISC_BUG_BREAK_ASM */
 
-#if defined(CONFIG_64BIT)
-#define ASM_WORD_INSN          ".dword\t"
+#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+# define __BUG_REL(val) ".word " __stringify(val) " - ."
 #else
-#define ASM_WORD_INSN          ".word\t"
+# define __BUG_REL(val) ".word " __stringify(val)
 #endif
 
+
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define BUG()                                                          \
        do {                                                            \
                asm volatile("\n"                                       \
                             "1:\t" PARISC_BUG_BREAK_ASM "\n"           \
-                            "\t.pushsection __bug_table,\"aw\"\n"      \
-                            "2:\t" ASM_WORD_INSN "1b, %c0\n"           \
-                            "\t.short %c1, %c2\n"                      \
-                            "\t.org 2b+%c3\n"                          \
+                            "\t.pushsection __bug_table,\"a\"\n"       \
+                            "\t.align 4\n"                             \
+                            "2:\t" __BUG_REL(1b) "\n"                  \
+                            "\t" __BUG_REL(%c0)  "\n"                  \
+                            "\t.short %1, %2\n"                        \
+                            "\t.blockz %3-2*4-2*2\n"                   \
                             "\t.popsection"                            \
                             : : "i" (__FILE__), "i" (__LINE__),        \
-                            "i" (0), "i" (sizeof(struct bug_entry)) ); \
+                            "i" (0), "i" (sizeof(struct bug_entry)) ); \
                unreachable();                                          \
        } while(0)
 
        do {                                                            \
                asm volatile("\n"                                       \
                             "1:\t" PARISC_BUG_BREAK_ASM "\n"           \
-                            "\t.pushsection __bug_table,\"aw\"\n"      \
-                            "2:\t" ASM_WORD_INSN "1b, %c0\n"           \
-                            "\t.short %c1, %c2\n"                      \
-                            "\t.org 2b+%c3\n"                          \
+                            "\t.pushsection __bug_table,\"a\"\n"       \
+                            "\t.align 4\n"                             \
+                            "2:\t" __BUG_REL(1b) "\n"                  \
+                            "\t" __BUG_REL(%c0)  "\n"                  \
+                            "\t.short %1, %2\n"                        \
+                            "\t.blockz %3-2*4-2*2\n"                   \
                             "\t.popsection"                            \
                             : : "i" (__FILE__), "i" (__LINE__),        \
                             "i" (BUGFLAG_WARNING|(flags)),             \
        do {                                                            \
                asm volatile("\n"                                       \
                             "1:\t" PARISC_BUG_BREAK_ASM "\n"           \
-                            "\t.pushsection __bug_table,\"aw\"\n"      \
-                            "2:\t" ASM_WORD_INSN "1b\n"                \
-                            "\t.short %c0\n"                           \
-                            "\t.org 2b+%c1\n"                          \
+                            "\t.pushsection __bug_table,\"a\"\n"       \
+                            "\t.align %2\n"                            \
+                            "2:\t" __BUG_REL(1b) "\n"                  \
+                            "\t.short %0\n"                            \
+                            "\t.blockz %1-4-2\n"                       \
                             "\t.popsection"                            \
                             : : "i" (BUGFLAG_WARNING|(flags)),         \
                             "i" (sizeof(struct bug_entry)) );          \
index 140eaa97bf215dcde034591222b11098f3160d0a..2d73d3c3cd37f8a61b1a9019b8d7821065010db1 100644 (file)
@@ -349,15 +349,7 @@ struct pt_regs;    /* forward declaration... */
 
 #define ELF_HWCAP      0
 
-/* Masks for stack and mmap randomization */
-#define BRK_RND_MASK   (is_32bit_task() ? 0x07ffUL : 0x3ffffUL)
-#define MMAP_RND_MASK  (is_32bit_task() ? 0x1fffUL : 0x3ffffUL)
-#define STACK_RND_MASK MMAP_RND_MASK
-
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *);
-#define arch_randomize_brk arch_randomize_brk
-
+#define STACK_RND_MASK 0x7ff   /* 8MB of VA */
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
index af2a598bc0f819cc912129cbdecd56b642f663ab..94428798b6aa63e8d4b0878cc7555826cf080e47 100644 (file)
@@ -15,10 +15,12 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
        asm_volatile_goto("1:\n\t"
                 "nop\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
+                ".align %1\n\t"
                 ".word 1b - ., %l[l_yes] - .\n\t"
                 __stringify(ASM_ULONG_INSN) " %c0 - .\n\t"
                 ".popsection\n\t"
-                : :  "i" (&((char *)key)[branch]) :  : l_yes);
+                : : "i" (&((char *)key)[branch]), "i" (sizeof(long))
+                : : l_yes);
 
        return false;
 l_yes:
@@ -30,10 +32,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
        asm_volatile_goto("1:\n\t"
                 "b,n %l[l_yes]\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
+                ".align %1\n\t"
                 ".word 1b - ., %l[l_yes] - .\n\t"
                 __stringify(ASM_ULONG_INSN) " %c0 - .\n\t"
                 ".popsection\n\t"
-                : :  "i" (&((char *)key)[branch]) :  : l_yes);
+                : : "i" (&((char *)key)[branch]), "i" (sizeof(long))
+                : : l_yes);
 
        return false;
 l_yes:
index ee9e071859b2f44c2f4138436204a44a909ff7c0..47ebc4c91eaff319df410b82906585994e602905 100644 (file)
@@ -55,7 +55,7 @@
 })
 
 #ifdef CONFIG_SMP
-# define __lock_aligned __section(".data..lock_aligned")
+# define __lock_aligned __section(".data..lock_aligned") __aligned(16)
 #endif
 
 #endif /* __PARISC_LDCW_H */
index c05d121cf5d0f25cc581bd7027acc6d618ca5a9f..982aca20f56f5356927363da73600064657c5e39 100644 (file)
@@ -47,6 +47,8 @@
 
 #ifndef __ASSEMBLY__
 
+struct rlimit;
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack);
 unsigned long calc_max_stack_size(unsigned long stack_max);
 
 /*
index 2bf660eabe421e4490a45e0cb8d2a7bfc0065248..4165079898d9e7af239a31a1bc77821e6081706a 100644 (file)
@@ -41,6 +41,7 @@ struct exception_table_entry {
 
 #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
        ".section __ex_table,\"aw\"\n"                     \
+       ".align 4\n"                                       \
        ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
        ".previous\n"
 
index 87245c584784ec1f0f877fbe0be54ee136df5456..8d94739d75c67c80fbea76fe3b5c08f1f8b1ea83 100644 (file)
@@ -75,7 +75,6 @@
 
 /* We now return you to your regularly scheduled HPUX. */
 
-#define ENOSYM         215     /* symbol does not exist in executable */
 #define        ENOTSOCK        216     /* Socket operation on non-socket */
 #define        EDESTADDRREQ    217     /* Destination address required */
 #define        EMSGSIZE        218     /* Message too long */
 #define        ETIMEDOUT       238     /* Connection timed out */
 #define        ECONNREFUSED    239     /* Connection refused */
 #define        EREFUSED        ECONNREFUSED    /* for HP's NFS apparently */
-#define        EREMOTERELEASE  240     /* Remote peer released connection */
 #define        EHOSTDOWN       241     /* Host is down */
 #define        EHOSTUNREACH    242     /* No route to host */
 
index 29e2750f86a410f58da62d30ccc4e25f2e901908..e95a977ba5f376eb813d4c7806d205a92f539880 100644 (file)
@@ -383,7 +383,7 @@ show_cpuinfo (struct seq_file *m, void *v)
        char cpu_name[60], *p;
 
        /* strip PA path from CPU name to not confuse lscpu */
-       strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
+       strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
        p = strrchr(cpu_name, '[');
        if (p)
                *(--p) = 0;
index ab896eff7a1de9515407031c76d4afb1552fe355..98af719d5f85b2b24a4c7af70415e97e90c73803 100644 (file)
@@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max)
  * indicating that "current" should be used instead of a passed-in
  * value from the exec bprm as done with arch_pick_mmap_layout().
  */
-static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
 {
        unsigned long stack_base;
 
index 58694d1989c23351772602e369028402bced2030..548051b0b4aff692741847a04b09208d1e68d279 100644 (file)
@@ -130,6 +130,7 @@ SECTIONS
        RO_DATA(8)
 
        /* unwind info */
+       . = ALIGN(4);
        .PARISC.unwind : {
                __start___unwind = .;
                *(.PARISC.unwind)
index dc17896a001a9235d8cf21374f323917e46d1e97..c15eadbb998343cb74aad00c48145e6d602302ee 100644 (file)
@@ -228,7 +228,6 @@ typedef struct thread_struct thread_struct;
        execve_tail();                                                  \
 } while (0)
 
-/* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
 struct seq_file;
index cc364fce6aa96bb7deea8eed54694bb93419ecc2..ba75f6bee77423be0f9a54e0afdaa88cbb342728 100644 (file)
@@ -666,6 +666,7 @@ static int __init ipl_init(void)
                                                &ipl_ccw_attr_group_lpar);
                break;
        case IPL_TYPE_ECKD:
+       case IPL_TYPE_ECKD_DUMP:
                rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group);
                break;
        case IPL_TYPE_FCP:
index 77fd24e6cbb643bccb79777da90481c748f4feaf..39a91b00438a7f6ba48fb541d8f24b51070391d2 100644 (file)
@@ -279,12 +279,6 @@ static int paicrypt_event_init(struct perf_event *event)
        if (IS_ERR(cpump))
                return PTR_ERR(cpump);
 
-       /* Event initialization sets last_tag to 0. When later on the events
-        * are deleted and re-added, do not reset the event count value to zero.
-        * Events are added, deleted and re-added when 2 or more events
-        * are active at the same time.
-        */
-       event->hw.last_tag = 0;
        event->destroy = paicrypt_event_destroy;
 
        if (a->sample_period) {
@@ -318,6 +312,11 @@ static void paicrypt_start(struct perf_event *event, int flags)
 {
        u64 sum;
 
+       /* Event initialization sets last_tag to 0. When later on the events
+        * are deleted and re-added, do not reset the event count value to zero.
+        * Events are added, deleted and re-added when 2 or more events
+        * are active at the same time.
+        */
        if (!event->hw.last_tag) {
                event->hw.last_tag = 1;
                sum = paicrypt_getall(event);           /* Get current value */
index 8ba0f1a3a39dc0b7b4a2a2e300604a1b7968208c..e7013a2e8960508566083cfa9344e6daa60638fb 100644 (file)
@@ -260,7 +260,6 @@ static int paiext_event_init(struct perf_event *event)
        rc = paiext_alloc(a, event);
        if (rc)
                return rc;
-       event->hw.last_tag = 0;
        event->destroy = paiext_event_destroy;
 
        if (a->sample_period) {
index a08f794a0e79ac4af4ce49bd1dbb01397d0da78c..ce1c777227b4ef0f1cceb8ba238c1784aa39b674 100644 (file)
@@ -4660,7 +4660,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
        if (pmu->intel_cap.pebs_output_pt_available)
                pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
        else
-               pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
+               pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
 
        intel_pmu_check_event_constraints(pmu->event_constraints,
                                          pmu->num_counters,
index 21556ad87f4ba86a076c828b2a88daf76a02c038..8f3a4d16bb791f37ac924858668d37eb49446f91 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <asm/apic.h>
 #include <asm/desc.h>
+#include <asm/e820/api.h>
 #include <asm/sev.h>
 #include <asm/ibt.h>
 #include <asm/hypervisor.h>
@@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu)
 
 static int __init hv_pci_init(void)
 {
-       int gen2vm = efi_enabled(EFI_BOOT);
+       bool gen2vm = efi_enabled(EFI_BOOT);
 
        /*
-        * For Generation-2 VM, we exit from pci_arch_init() by returning 0.
-        * The purpose is to suppress the harmless warning:
+        * A Generation-2 VM doesn't support legacy PCI/PCIe, so both
+        * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() ->
+        * pcibios_init() doesn't call pcibios_resource_survey() ->
+        * e820__reserve_resources_late(); as a result, any emulated persistent
+        * memory of E820_TYPE_PRAM (12) via the kernel parameter
+        * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be
+        * detected by register_e820_pmem(). Fix this by directly calling
+        * e820__reserve_resources_late() here: e820__reserve_resources_late()
+        * depends on e820__reserve_resources(), which has been called earlier
+        * from setup_arch(). Note: e820__reserve_resources_late() also adds
+        * any memory of E820_TYPE_PMEM (7) into iomem_resource, and
+        * acpi_nfit_register_region() -> acpi_nfit_insert_resource() ->
+        * region_intersects() returns REGION_INTERSECTS, so the memory of
+        * E820_TYPE_PMEM won't get added twice.
+        *
+        * We return 0 here so that pci_arch_init() won't print the warning:
         * "PCI: Fatal: No config space access function found"
         */
-       if (gen2vm)
+       if (gen2vm) {
+               e820__reserve_resources_late();
                return 0;
+       }
 
        /* For Generation-1 VM, we'll proceed in pci_arch_init().  */
        return 1;
index c8a7fc23f63c672456721888db9e293fbf70dafb..f896eed4516c7e3c131751ab5865aa70cfa2d645 100644 (file)
@@ -16,6 +16,9 @@
 #include <asm/x86_init.h>
 #include <asm/cpufeature.h>
 #include <asm/irq_vectors.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
 
 #ifdef CONFIG_ACPI_APEI
 # include <asm/pgtable_types.h>
@@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
        if (!cpu_has(c, X86_FEATURE_MWAIT) ||
            boot_option_idle_override == IDLE_NOMWAIT)
                *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
+
+       if (xen_initial_domain()) {
+               /*
+                * When Linux is running as Xen dom0, the hypervisor is the
+                * entity in charge of the processor power management, and so
+                * Xen needs to check the OS capabilities reported in the
+                * processor capabilities buffer matches what the hypervisor
+                * driver supports.
+                */
+               xen_sanitize_proc_cap_bits(cap);
+       }
 }
 
 static inline bool acpi_has_cpu_in_madt(void)
index 7048dfacc04b2413acba1054fb4412c7048b8249..a9088250770f2a3d6320040f815f1b9e6cba653b 100644 (file)
@@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode)
 
 enum xen_lazy_mode xen_get_lazy_mode(void);
 
+#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI)
+void xen_sanitize_proc_cap_bits(uint32_t *buf);
+#else
+static inline void xen_sanitize_proc_cap_bits(uint32_t *buf)
+{
+       BUG();
+}
+#endif
+
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
index d0918a75cb00a02bbc01804c16970fd7b91ab3d2..1a0dd80d81ac301a0fbc9e0d86e14866004ad18a 100644 (file)
@@ -63,6 +63,7 @@ int acpi_fix_pin2_polarity __initdata;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+static bool has_lapic_cpus __initdata;
 static bool acpi_support_online_capable;
 #endif
 
@@ -232,6 +233,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
        if (!acpi_is_processor_usable(processor->lapic_flags))
                return 0;
 
+       /*
+        * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure
+        * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID
+        * in x2APIC must be equal or greater than 0xff.
+        */
+       if (has_lapic_cpus && apic_id < 0xff)
+               return 0;
+
        /*
         * We need to register disabled CPU as well to permit
         * counting disabled CPUs. This allows us to size
@@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 
 static int __init acpi_parse_madt_lapic_entries(void)
 {
-       int count;
-       int x2count = 0;
-       int ret;
-       struct acpi_subtable_proc madt_proc[2];
+       int count, x2count = 0;
 
        if (!boot_cpu_has(X86_FEATURE_APIC))
                return -ENODEV;
@@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void)
                                      acpi_parse_sapic, MAX_LOCAL_APIC);
 
        if (!count) {
-               memset(madt_proc, 0, sizeof(madt_proc));
-               madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
-               madt_proc[0].handler = acpi_parse_lapic;
-               madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
-               madt_proc[1].handler = acpi_parse_x2apic;
-               ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
-                               sizeof(struct acpi_table_madt),
-                               madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
-               if (ret < 0) {
-                       pr_err("Error parsing LAPIC/X2APIC entries\n");
-                       return ret;
-               }
-
-               count = madt_proc[0].count;
-               x2count = madt_proc[1].count;
+               count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
+                                       acpi_parse_lapic, MAX_LOCAL_APIC);
+               has_lapic_cpus = count > 0;
+               x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
+                                       acpi_parse_x2apic, MAX_LOCAL_APIC);
        }
        if (!count && !x2count) {
                pr_err("No LAPIC entries present\n");
index 9373ec01c5ae1730784cb4d0c7dfa9f24f39d58b..13b45b9c806daef10e93428294d0705fe86b9dea 100644 (file)
@@ -104,8 +104,6 @@ struct cont_desc {
        size_t               size;
 };
 
-static u32 ucode_new_rev;
-
 /*
  * Microcode patch container file is prepended to the initrd in cpio
  * format. See Documentation/arch/x86/microcode.rst
@@ -442,12 +440,11 @@ static int __apply_microcode_amd(struct microcode_amd *mc)
  *
  * Returns true if container found (sets @desc), false otherwise.
  */
-static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
+static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, size_t size)
 {
        struct cont_desc desc = { 0 };
        struct microcode_amd *mc;
        bool ret = false;
-       u32 rev, dummy;
 
        desc.cpuid_1_eax = cpuid_1_eax;
 
@@ -457,22 +454,15 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
        if (!mc)
                return ret;
 
-       native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
        /*
         * Allow application of the same revision to pick up SMT-specific
         * changes even if the revision of the other SMT thread is already
         * up-to-date.
         */
-       if (rev > mc->hdr.patch_id)
+       if (old_rev > mc->hdr.patch_id)
                return ret;
 
-       if (!__apply_microcode_amd(mc)) {
-               ucode_new_rev = mc->hdr.patch_id;
-               ret = true;
-       }
-
-       return ret;
+       return !__apply_microcode_amd(mc);
 }
 
 static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
@@ -506,9 +496,12 @@ static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpi
        *ret = cp;
 }
 
-void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
+void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax)
 {
        struct cpio_data cp = { };
+       u32 dummy;
+
+       native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy);
 
        /* Needed in load_microcode_amd() */
        ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax;
@@ -517,7 +510,8 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
        if (!(cp.data && cp.size))
                return;
 
-       early_apply_microcode(cpuid_1_eax, cp.data, cp.size);
+       if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size))
+               native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
 }
 
 static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
@@ -625,10 +619,8 @@ void reload_ucode_amd(unsigned int cpu)
        rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
        if (rev < mc->hdr.patch_id) {
-               if (!__apply_microcode_amd(mc)) {
-                       ucode_new_rev = mc->hdr.patch_id;
-                       pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
-               }
+               if (!__apply_microcode_amd(mc))
+                       pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id);
        }
 }
 
@@ -649,8 +641,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
        if (p && (p->patch_id == csig->rev))
                uci->mc = p->data;
 
-       pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
-
        return 0;
 }
 
@@ -691,8 +681,6 @@ static enum ucode_state apply_microcode_amd(int cpu)
        rev = mc_amd->hdr.patch_id;
        ret = UCODE_UPDATED;
 
-       pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
-
 out:
        uci->cpu_sig.rev = rev;
        c->microcode     = rev;
@@ -935,11 +923,6 @@ struct microcode_ops * __init init_amd_microcode(void)
                pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
                return NULL;
        }
-
-       if (ucode_new_rev)
-               pr_info_once("microcode updated early to new patch_level=0x%08x\n",
-                            ucode_new_rev);
-
        return &microcode_amd_ops;
 }
 
index 666d25bbc5ad200ef9c8fe4b6f69c1accc5b2fb9..232026a239a68131a2304fa0431faf26d202cb88 100644 (file)
@@ -41,8 +41,6 @@
 
 #include "internal.h"
 
-#define DRIVER_VERSION "2.2"
-
 static struct microcode_ops    *microcode_ops;
 bool dis_ucode_ldr = true;
 
@@ -77,6 +75,8 @@ static u32 final_levels[] = {
        0, /* T-101 terminator */
 };
 
+struct early_load_data early_data;
+
 /*
  * Check the current patch level on this CPU.
  *
@@ -155,9 +155,9 @@ void __init load_ucode_bsp(void)
                return;
 
        if (intel)
-               load_ucode_intel_bsp();
+               load_ucode_intel_bsp(&early_data);
        else
-               load_ucode_amd_bsp(cpuid_1_eax);
+               load_ucode_amd_bsp(&early_data, cpuid_1_eax);
 }
 
 void load_ucode_ap(void)
@@ -828,6 +828,11 @@ static int __init microcode_init(void)
        if (!microcode_ops)
                return -ENODEV;
 
+       pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
+
+       if (early_data.new_rev)
+               pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
+
        microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0);
        if (IS_ERR(microcode_pdev))
                return PTR_ERR(microcode_pdev);
@@ -846,8 +851,6 @@ static int __init microcode_init(void)
        cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
                          mc_cpu_online, mc_cpu_down_prep);
 
-       pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
-
        return 0;
 
  out_pdev:
index 6024feb98d29dbba1ea45c35660fdf1b577582cf..070426b9895feddb95cadb81aa92a5991cdad428 100644 (file)
@@ -339,16 +339,9 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
 static enum ucode_state apply_microcode_early(struct ucode_cpu_info *uci)
 {
        struct microcode_intel *mc = uci->mc;
-       enum ucode_state ret;
-       u32 cur_rev, date;
+       u32 cur_rev;
 
-       ret = __apply_microcode(uci, mc, &cur_rev);
-       if (ret == UCODE_UPDATED) {
-               date = mc->hdr.date;
-               pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n",
-                            cur_rev, mc->hdr.rev, date & 0xffff, date >> 24, (date >> 16) & 0xff);
-       }
-       return ret;
+       return __apply_microcode(uci, mc, &cur_rev);
 }
 
 static __init bool load_builtin_intel_microcode(struct cpio_data *cp)
@@ -413,13 +406,17 @@ static int __init save_builtin_microcode(void)
 early_initcall(save_builtin_microcode);
 
 /* Load microcode on BSP from initrd or builtin blobs */
-void __init load_ucode_intel_bsp(void)
+void __init load_ucode_intel_bsp(struct early_load_data *ed)
 {
        struct ucode_cpu_info uci;
 
+       ed->old_rev = intel_get_microcode_revision();
+
        uci.mc = get_microcode_blob(&uci, false);
        if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED)
                ucode_patch_va = UCODE_BSP_LOADED;
+
+       ed->new_rev = uci.cpu_sig.rev;
 }
 
 void load_ucode_intel_ap(void)
index f8047b12329a9749bd069d230dcbce6752acd525..21776c529fa97a127836bf86a467a4e0bac47603 100644 (file)
@@ -37,6 +37,12 @@ struct microcode_ops {
                                use_nmi         : 1;
 };
 
+struct early_load_data {
+       u32 old_rev;
+       u32 new_rev;
+};
+
+extern struct early_load_data early_data;
 extern struct ucode_cpu_info ucode_cpu_info[];
 struct cpio_data find_microcode_in_initrd(const char *path);
 
@@ -92,14 +98,14 @@ extern bool dis_ucode_ldr;
 extern bool force_minrev;
 
 #ifdef CONFIG_CPU_SUP_AMD
-void load_ucode_amd_bsp(unsigned int family);
+void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family);
 void load_ucode_amd_ap(unsigned int family);
 int save_microcode_in_initrd_amd(unsigned int family);
 void reload_ucode_amd(unsigned int cpu);
 struct microcode_ops *init_amd_microcode(void);
 void exit_amd_microcode(void);
 #else /* CONFIG_CPU_SUP_AMD */
-static inline void load_ucode_amd_bsp(unsigned int family) { }
+static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { }
 static inline void load_ucode_amd_ap(unsigned int family) { }
 static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
 static inline void reload_ucode_amd(unsigned int cpu) { }
@@ -108,12 +114,12 @@ static inline void exit_amd_microcode(void) { }
 #endif /* !CONFIG_CPU_SUP_AMD */
 
 #ifdef CONFIG_CPU_SUP_INTEL
-void load_ucode_intel_bsp(void);
+void load_ucode_intel_bsp(struct early_load_data *ed);
 void load_ucode_intel_ap(void);
 void reload_ucode_intel(void);
 struct microcode_ops *init_intel_microcode(void);
 #else /* CONFIG_CPU_SUP_INTEL */
-static inline void load_ucode_intel_bsp(void) { }
+static inline void load_ucode_intel_bsp(struct early_load_data *ed) { }
 static inline void load_ucode_intel_ap(void) { }
 static inline void reload_ucode_intel(void) { }
 static inline struct microcode_ops *init_intel_microcode(void) { return NULL; }
index e6bba12c759cb793a2a3eb0ff367bb51a6b502f0..01fa06dd06b66c9324c670e4847c0d503dbd5691 100644 (file)
@@ -262,11 +262,14 @@ static uint32_t  __init ms_hyperv_platform(void)
 static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
 {
        static atomic_t nmi_cpu = ATOMIC_INIT(-1);
+       unsigned int old_cpu, this_cpu;
 
        if (!unknown_nmi_panic)
                return NMI_DONE;
 
-       if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
+       old_cpu = -1;
+       this_cpu = raw_smp_processor_id();
+       if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu))
                return NMI_HANDLED;
 
        return NMI_DONE;
index cacf2ede62175d40dabf24d5606fcef9d363cc1e..23d8aaf8d9fd1950fcd6c3a43f5af4a013d57a31 100644 (file)
@@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
        frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
        uc_flags = frame_uc_flags(regs);
 
-       if (setup_signal_shadow_stack(ksig))
-               return -EFAULT;
-
        if (!user_access_begin(frame, sizeof(*frame)))
                return -EFAULT;
 
@@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
                        return -EFAULT;
        }
 
+       if (setup_signal_shadow_stack(ksig))
+               return -EFAULT;
+
        /* Set up registers for signal handler */
        regs->di = ksig->sig;
        /* In case the signal handler was declared without prototypes */
index e4cfb7adb64581d0630d61dc0f5a287245da7d38..750aec178b6abfb08472e68bf8558c786d8ab64b 100644 (file)
@@ -425,6 +425,8 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 
 void bdev_add(struct block_device *bdev, dev_t dev)
 {
+       if (bdev_stable_writes(bdev))
+               mapping_set_stable_writes(bdev->bd_inode->i_mapping);
        bdev->bd_dev = dev;
        bdev->bd_inode->i_rdev = dev;
        bdev->bd_inode->i_ino = dev;
index 5c0246b9e52287ff9aae57efe93480d774d49b49..4ccf1994b97adbf8e8f9add7e3ff67d3bb5b3567 100644 (file)
@@ -502,6 +502,16 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
        return ret;
 }
 
+static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev)
+{
+       ivpu_boot_dpu_active_drive(vdev, false);
+       ivpu_boot_pwr_island_isolation_drive(vdev, true);
+       ivpu_boot_pwr_island_trickle_drive(vdev, false);
+       ivpu_boot_pwr_island_drive(vdev, false);
+
+       return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0);
+}
+
 static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
 {
        u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
@@ -600,25 +610,17 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
 
 static int ivpu_hw_37xx_reset(struct ivpu_device *vdev)
 {
-       int ret;
-       u32 val;
-
-       if (IVPU_WA(punit_disabled))
-               return 0;
+       int ret = 0;
 
-       ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
-       if (ret) {
-               ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
-               return ret;
+       if (ivpu_boot_pwr_domain_disable(vdev)) {
+               ivpu_err(vdev, "Failed to disable power domain\n");
+               ret = -EIO;
        }
 
-       val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET);
-       val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
-       REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val);
-
-       ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
-       if (ret)
-               ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+       if (ivpu_pll_disable(vdev)) {
+               ivpu_err(vdev, "Failed to disable PLL\n");
+               ret = -EIO;
+       }
 
        return ret;
 }
@@ -651,10 +653,6 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
 {
        int ret;
 
-       ret = ivpu_hw_37xx_reset(vdev);
-       if (ret)
-               ivpu_warn(vdev, "Failed to reset HW: %d\n", ret);
-
        ret = ivpu_hw_37xx_d0i3_disable(vdev);
        if (ret)
                ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
@@ -722,11 +720,11 @@ static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
 {
        int ret = 0;
 
-       if (!ivpu_hw_37xx_is_idle(vdev) && ivpu_hw_37xx_reset(vdev))
-               ivpu_err(vdev, "Failed to reset the VPU\n");
+       if (!ivpu_hw_37xx_is_idle(vdev))
+               ivpu_warn(vdev, "VPU not idle during power down\n");
 
-       if (ivpu_pll_disable(vdev)) {
-               ivpu_err(vdev, "Failed to disable PLL\n");
+       if (ivpu_hw_37xx_reset(vdev)) {
+               ivpu_err(vdev, "Failed to reset VPU\n");
                ret = -EIO;
        }
 
index 0ace218783c813142f09ea2f90b3c06d324b16bd..e9b16cbc26f49eb79bb1f336c3903c8f5f86b2c4 100644 (file)
@@ -250,9 +250,6 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
 {
        int ret;
 
-       ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n",
-                atomic_read(&vdev->drm.dev->power.usage_count));
-
        ret = pm_runtime_get_if_active(vdev->drm.dev, false);
        drm_WARN_ON(&vdev->drm, ret < 0);
 
index 0b7a01f38b65cc15bd2f60535286f362564ba4fd..d321ca7160d978c68ce8ea337b838ccf2b281b76 100644 (file)
@@ -2031,7 +2031,7 @@ static int acpi_video_bus_add(struct acpi_device *device)
         * HP ZBook Fury 16 G10 requires ACPI video's child devices have _PS0
         * evaluated to have functional panel brightness control.
         */
-       acpi_device_fix_up_power_extended(device);
+       acpi_device_fix_up_power_children(device);
 
        pr_info("%s [%s] (multi-head: %s  rom: %s  post: %s)\n",
               ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device),
index f007116a8427628526ea001f5ee5fd4ec73e09fb..3b4d048c49417303bf5c4451e94e6d9f3dd77e56 100644 (file)
@@ -397,6 +397,19 @@ void acpi_device_fix_up_power_extended(struct acpi_device *adev)
 }
 EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_extended);
 
+/**
+ * acpi_device_fix_up_power_children - Force a device's children into D0.
+ * @adev: Parent device object whose children's power state is to be fixed up.
+ *
+ * Call acpi_device_fix_up_power() for @adev's children so long as they
+ * are reported as present and enabled.
+ */
+void acpi_device_fix_up_power_children(struct acpi_device *adev)
+{
+       acpi_dev_for_each_child(adev, fix_up_power_if_applicable, NULL);
+}
+EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_children);
+
 int acpi_device_update_power(struct acpi_device *device, int *state_p)
 {
        int state;
index 3a34a8c425fe4a673119d5ac7d9fef87ed145f38..55437f5e0c3aee4bf8e404b2e3b0977238401eb0 100644 (file)
@@ -592,7 +592,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
        while (1) {
 
                if (cx->entry_method == ACPI_CSTATE_HALT)
-                       safe_halt();
+                       raw_safe_halt();
                else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
                        io_idle(cx->address);
                } else
index 15a3bdbd0755d45045d4224fcbbe9ab15dae2fa0..9bd9f79cd40990fee75134397500d123816062bd 100644 (file)
@@ -447,6 +447,13 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
                        DMI_MATCH(DMI_BOARD_NAME, "B1402CBA"),
                },
        },
+       {
+               /* Asus ExpertBook B1402CVA */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_BOARD_NAME, "B1402CVA"),
+               },
+       },
        {
                /* Asus ExpertBook B1502CBA */
                .matches = {
index c10ff8985203ab13da2997372bfa2fd8b48fd660..0a0f483124c3a56af47abb498e67193236416b18 100644 (file)
@@ -1055,9 +1055,14 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
                 * Ask the sd driver to issue START STOP UNIT on runtime suspend
                 * and resume and shutdown only. For system level suspend/resume,
                 * devices power state is handled directly by libata EH.
+                * Given that disks are always spun up on system resume, also
+                * make sure that the sd driver forces runtime suspended disks
+                * to be resumed to correctly reflect the power state of the
+                * device.
                 */
-               sdev->manage_runtime_start_stop = true;
-               sdev->manage_shutdown = true;
+               sdev->manage_runtime_start_stop = 1;
+               sdev->manage_shutdown = 1;
+               sdev->force_runtime_start_on_system_start = 1;
        }
 
        /*
index 25a63d043c8e1f442001304b5aef2606f0c7ac17..0f77e042406619577335ffbfbf6d7075988234ea 100644 (file)
@@ -82,6 +82,9 @@ static int isapnp_init_one(struct pnp_dev *idev, const struct pnp_device_id *dev
        if (pnp_port_valid(idev, 1)) {
                ctl_addr = devm_ioport_map(&idev->dev,
                                           pnp_port_start(idev, 1), 1);
+               if (!ctl_addr)
+                       return -ENOMEM;
+
                ap->ioaddr.altstatus_addr = ctl_addr;
                ap->ioaddr.ctl_addr = ctl_addr;
                ap->ops = &isapnp_port_ops;
index a6dc3997bf5c50cfbe4dd23d0836ed0f29b00fbd..442a0ebeb953e983d053d4c70d919e3a8aee213e 100644 (file)
@@ -1093,9 +1093,10 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info)
                return -ENOMEM;
        hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
                                DPLL_CMD_PIN_ID_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(msg);
                return -EMSGSIZE;
-
+       }
        pin = dpll_pin_find_from_nlattr(info);
        if (!IS_ERR(pin)) {
                ret = dpll_msg_add_pin_handle(msg, pin);
@@ -1123,8 +1124,10 @@ int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info)
                return -ENOMEM;
        hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
                                DPLL_CMD_PIN_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(msg);
                return -EMSGSIZE;
+       }
        ret = dpll_cmd_pin_get_one(msg, pin, info->extack);
        if (ret) {
                nlmsg_free(msg);
@@ -1256,8 +1259,10 @@ int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info)
                return -ENOMEM;
        hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
                                DPLL_CMD_DEVICE_ID_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(msg);
                return -EMSGSIZE;
+       }
 
        dpll = dpll_device_find_from_nlattr(info);
        if (!IS_ERR(dpll)) {
@@ -1284,8 +1289,10 @@ int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info)
                return -ENOMEM;
        hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
                                DPLL_CMD_DEVICE_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(msg);
                return -EMSGSIZE;
+       }
 
        ret = dpll_device_get_one(dpll, msg, info->extack);
        if (ret) {
index 7edf2c95282fa2bae047cffac250b55337fdba02..e779d866022b9fe5dd1f95fed78a005709450860 100644 (file)
@@ -1519,9 +1519,9 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
        sdev->use_10_for_rw = 1;
 
        if (sbp2_param_exclusive_login) {
-               sdev->manage_system_start_stop = true;
-               sdev->manage_runtime_start_stop = true;
-               sdev->manage_shutdown = true;
+               sdev->manage_system_start_stop = 1;
+               sdev->manage_runtime_start_stop = 1;
+               sdev->manage_shutdown = 1;
        }
 
        if (sdev->type == TYPE_ROM)
index 74d00b0c83fea7c1727b8b94f9a621de40b16022..4a98a859d44d34c9b27b81de7a90cdc779104e07 100644 (file)
@@ -131,7 +131,7 @@ config RASPBERRYPI_FIRMWARE
 
 config FW_CFG_SYSFS
        tristate "QEMU fw_cfg device support in sysfs"
-       depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || SPARC || X86)
+       depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || RISCV || SPARC || X86)
        depends on HAS_IOPORT_MAP
        default n
        help
index 3f2f7bf6e33526edeaa3a74288d0b14b79013aaf..5b439d04079c841e1bd698f63d96d1b428f6b2ed 100644 (file)
@@ -101,7 +101,7 @@ retry:
         * overlap on physical address level.
         */
        list_for_each_entry(entry, &accepting_list, list) {
-               if (entry->end < range.start)
+               if (entry->end <= range.start)
                        continue;
                if (entry->start >= range.end)
                        continue;
index a69399a6b7c0052fc5f66948e7928bbc9969ef3f..1448f61173b357f90802c905f6c5076895491626 100644 (file)
@@ -211,7 +211,7 @@ static void fw_cfg_io_cleanup(void)
 
 /* arch-specific ctrl & data register offsets are not available in ACPI, DT */
 #if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF))
-# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
+# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
 #  define FW_CFG_DMA_OFF 0x10
index afec09930efa953c88f34f94dc6818c3c6f2c448..9d92ca1576771bc236f73f507df7c1de473cef8c 100644 (file)
@@ -248,6 +248,7 @@ extern int amdgpu_umsch_mm;
 extern int amdgpu_seamless;
 
 extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
 
 #define AMDGPU_VM_MAX_NUM_CTX                  4096
 #define AMDGPU_SG_THRESHOLD                    (256*1024*1024)
index df3ecfa9e13f5d87d3e67397e22d0d339c62a809..e50be65000303ac7ff71130fc0363dc7f1c03d29 100644 (file)
@@ -207,7 +207,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
        }
 
        for (i = 0; i < p->nchunks; i++) {
-               struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+               struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
                struct drm_amdgpu_cs_chunk user_chunk;
                uint32_t __user *cdata;
 
index 3095a3a864af713c57ebcee2b192dbc99866e7fe..8f24cabe21554688126ad061dc7d6f288c1e4fad 100644 (file)
@@ -207,6 +207,7 @@ int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
 int amdgpu_umsch_mm;
 int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -961,6 +962,15 @@ module_param_named(seamless, amdgpu_seamless, int, 0444);
 MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
 module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
 
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture.  This provides an aperture in the GPU's internal
+ * address space for direct access to system memory.  Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
index 32b701cc0376d3451f480e50dd0405e142ebedc9..a21045d018f2b6efe6a281c1cec74fd25357c064 100644 (file)
@@ -1473,6 +1473,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
                                topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
                                                                topology->nodes[i].num_links : node_num_links;
                        }
+                       /* popluate the connected port num info if supported and available */
+                       if (ta_port_num_support && topology->nodes[i].num_links) {
+                               memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+                                      sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+                       }
 
                        /* reflect the topology information for bi-directionality */
                        if (requires_reflection && topology->nodes[i].num_hops)
index 5d36ad3f48c74ac298ddec9c27cef9494c19979a..c4d9cbde55b9bc58799aa1acc9fe4eea29d1a98a 100644 (file)
@@ -150,6 +150,7 @@ struct psp_xgmi_node_info {
        uint8_t                                 is_sharing_enabled;
        enum ta_xgmi_assigned_sdma_engine       sdma_engine;
        uint8_t                                 num_links;
+       struct xgmi_connected_port_num          port_num[TA_XGMI__MAX_PORT_NUM];
 };
 
 struct psp_xgmi_topology_info {
index 84e5987b14e05ecd2c52c9d93635f4d182a34a5f..a3dc68e989108e52c71a24bd97fff44f6183f8db 100644 (file)
@@ -1188,7 +1188,7 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
                        }
 
                        if (block_obj->hw_ops->query_ras_error_count)
-                               block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+                               block_obj->hw_ops->query_ras_error_count(adev, err_data);
 
                        if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
                            (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
index 65949cc7abb93243aea94860cf87eef2327e849a..07d930339b0781bd28f2e8d0b308f09617c0eeb6 100644 (file)
@@ -398,6 +398,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
  * amdgpu_uvd_entity_init - init entity
  *
  * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
  *
  * Initialize the entity used for handle management in the kernel driver.
  */
index 0954447f689d9e5477c67a1bf71fb9f53cde063b..59acf424a078f0eb5e0182ed5dc082d86d4421a1 100644 (file)
@@ -230,6 +230,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
  * amdgpu_vce_entity_init - init entity
  *
  * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
  *
  * Initialize the entity used for handle management in the kernel driver.
  */
index 0ec7b061d7c2035ac21a1a8b9c858de58d126396..a5a05c16c10d7be2ea1b86fbdcf76699551a8fd8 100644 (file)
@@ -675,7 +675,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
        amdgpu_gmc_set_agp_default(adev, mc);
        amdgpu_gmc_vram_location(adev, &adev->gmc, base);
        amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
-       if (!amdgpu_sriov_vf(adev))
+       if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
                amdgpu_gmc_agp_location(adev, mc);
 
        /* base offset of vram pages */
index 6dce9b29f675631c2049d1f2ef50b5ea64bff7fc..23d7b548d13f446766c0adc6f051f9b492111efb 100644 (file)
@@ -640,8 +640,9 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
        amdgpu_gmc_set_agp_default(adev, mc);
        amdgpu_gmc_vram_location(adev, &adev->gmc, base);
        amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
-       if (!amdgpu_sriov_vf(adev) ||
-           (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)))
+       if (!amdgpu_sriov_vf(adev) &&
+           (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) &&
+           (amdgpu_agp == 1))
                amdgpu_gmc_agp_location(adev, mc);
 
        /* base offset of vram pages */
index bde25eb4ed8e2cb1e3f0adf62897cfab076db7db..2ac5820e9c9241431b1fda853dddc9242c04790e 100644 (file)
@@ -1630,7 +1630,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
        } else {
                amdgpu_gmc_vram_location(adev, mc, base);
                amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
-               if (!amdgpu_sriov_vf(adev))
+               if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
                        amdgpu_gmc_agp_location(adev, mc);
        }
        /* base offset of vram pages */
@@ -2170,8 +2170,6 @@ static int gmc_v9_0_sw_fini(void *handle)
 
        if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
                amdgpu_gmc_sysfs_fini(adev);
-       adev->gmc.num_mem_partitions = 0;
-       kfree(adev->gmc.mem_partitions);
 
        amdgpu_gmc_ras_fini(adev);
        amdgpu_gem_force_release(adev);
@@ -2185,6 +2183,9 @@ static int gmc_v9_0_sw_fini(void *handle)
        amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
        amdgpu_bo_fini(adev);
 
+       adev->gmc.num_mem_partitions = 0;
+       kfree(adev->gmc.mem_partitions);
+
        return 0;
 }
 
index ea142611be1c06ac9f396d40b4252a82f49fb370..9b0146732e13ced30b38336fc76e0d46922ff77e 100644 (file)
@@ -130,6 +130,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
        uint64_t value;
        int i;
 
+       if (amdgpu_sriov_vf(adev))
+               return;
+
        inst_mask = adev->aid_mask;
        for_each_inst(i, inst_mask) {
                /* Program the AGP BAR */
@@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
                WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
                             adev->gmc.agp_end >> 24);
 
-               if (amdgpu_sriov_vf(adev))
-                       return;
-
                /* Program the system aperture low logical page number. */
                WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
                        min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
index 6f99f6754c119efddf9cc97e2db21f57bd9ad8bb..ee97814ebd994721f4cb161b0e3df40693086265 100644 (file)
@@ -2079,7 +2079,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
        struct dmub_srv_create_params create_params;
        struct dmub_srv_region_params region_params;
        struct dmub_srv_region_info region_info;
-       struct dmub_srv_fb_params fb_params;
+       struct dmub_srv_memory_params memory_params;
        struct dmub_srv_fb_info *fb_info;
        struct dmub_srv *dmub_srv;
        const struct dmcub_firmware_header_v1_0 *hdr;
@@ -2182,6 +2182,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
                adev->dm.dmub_fw->data +
                le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
                PSP_HEADER_BYTES;
+       region_params.is_mailbox_in_inbox = false;
 
        status = dmub_srv_calc_region_info(dmub_srv, &region_params,
                                           &region_info);
@@ -2205,10 +2206,10 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
                return r;
 
        /* Rebase the regions on the framebuffer address. */
-       memset(&fb_params, 0, sizeof(fb_params));
-       fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr;
-       fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr;
-       fb_params.region_info = &region_info;
+       memset(&memory_params, 0, sizeof(memory_params));
+       memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr;
+       memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr;
+       memory_params.region_info = &region_info;
 
        adev->dm.dmub_fb_info =
                kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL);
@@ -2220,7 +2221,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
                return -ENOMEM;
        }
 
-       status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info);
+       status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info);
        if (status != DMUB_STATUS_OK) {
                DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
                return -EINVAL;
@@ -7481,6 +7482,9 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
        int i;
        int result = -EIO;
 
+       if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported)
+               return result;
+
        cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
 
        if (!cmd.payloads)
@@ -9603,14 +9607,14 @@ static bool should_reset_plane(struct drm_atomic_state *state,
        struct drm_plane *other;
        struct drm_plane_state *old_other_state, *new_other_state;
        struct drm_crtc_state *new_crtc_state;
+       struct amdgpu_device *adev = drm_to_adev(plane->dev);
        int i;
 
        /*
-        * TODO: Remove this hack once the checks below are sufficient
-        * enough to determine when we need to reset all the planes on
-        * the stream.
+        * TODO: Remove this hack for all asics once it proves that the
+        * fast updates works fine on DCN3.2+.
         */
-       if (state->allow_modeset)
+       if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset)
                return true;
 
        /* Exit early if we know that we're adding or removing the plane. */
index ed784cf27d396f10fe507a1a4a41a009f01e15a5..c7a29bb737e24d0394e770529f1d3f43d0333aae 100644 (file)
@@ -536,11 +536,8 @@ bool dm_helpers_dp_read_dpcd(
 
        struct amdgpu_dm_connector *aconnector = link->priv;
 
-       if (!aconnector) {
-               drm_dbg_dp(aconnector->base.dev,
-                          "Failed to find connector for link!\n");
+       if (!aconnector)
                return false;
-       }
 
        return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data,
                                size) == size;
index d3b13d362edacc676c8b70a8a10bfa028728f7f3..11da0eebee6c4b0afd34ee5b7a499e046c4be413 100644 (file)
@@ -1604,31 +1604,31 @@ enum dc_status dm_dp_mst_is_port_support_mode(
        unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
        unsigned int max_compressed_bw_in_kbps = 0;
        struct dc_dsc_bw_range bw_range = {0};
-       struct drm_dp_mst_topology_mgr *mst_mgr;
+       uint16_t full_pbn = aconnector->mst_output_port->full_pbn;
 
        /*
-        * check if the mode could be supported if DSC pass-through is supported
-        * AND check if there enough bandwidth available to support the mode
-        * with DSC enabled.
+        * Consider the case with the depth of the mst topology tree is equal or less than 2
+        * A. When dsc bitstream can be transmitted along the entire path
+        *    1. dsc is possible between source and branch/leaf device (common dsc params is possible), AND
+        *    2. dsc passthrough supported at MST branch, or
+        *    3. dsc decoding supported at leaf MST device
+        *    Use maximum dsc compression as bw constraint
+        * B. When dsc bitstream cannot be transmitted along the entire path
+        *    Use native bw as bw constraint
         */
        if (is_dsc_common_config_possible(stream, &bw_range) &&
-           aconnector->mst_output_port->passthrough_aux) {
-               mst_mgr = aconnector->mst_output_port->mgr;
-               mutex_lock(&mst_mgr->lock);
-
+          (aconnector->mst_output_port->passthrough_aux ||
+           aconnector->dsc_aux == &aconnector->mst_output_port->aux)) {
                cur_link_settings = stream->link->verified_link_cap;
 
                upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
-                                                              &cur_link_settings
-                                                              );
-               down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
+                                                              &cur_link_settings);
+               down_link_bw_in_kbps = kbps_from_pbn(full_pbn);
 
                /* pick the bottleneck */
                end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
                                            down_link_bw_in_kbps);
 
-               mutex_unlock(&mst_mgr->lock);
-
                /*
                 * use the maximum dsc compression bandwidth as the required
                 * bandwidth for the mode
@@ -1643,8 +1643,7 @@ enum dc_status dm_dp_mst_is_port_support_mode(
                /* check if mode could be supported within full_pbn */
                bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
                pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
-
-               if (pbn > aconnector->mst_output_port->full_pbn)
+               if (pbn > full_pbn)
                        return DC_FAIL_BANDWIDTH_VALIDATE;
        }
 
index 0fa4fcd00de2c982ebc2634c7a74f19585c3b721..507a7cf56711f3d83d62e64e4c7f417d8b8ab398 100644 (file)
@@ -820,22 +820,22 @@ static void dcn35_set_idle_state(struct clk_mgr *clk_mgr_base, bool allow_idle)
 
        if (dc->config.disable_ips == DMUB_IPS_ENABLE ||
                dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) {
-               val |= DMUB_IPS1_ALLOW_MASK;
-               val |= DMUB_IPS2_ALLOW_MASK;
-       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
                val = val & ~DMUB_IPS1_ALLOW_MASK;
                val = val & ~DMUB_IPS2_ALLOW_MASK;
-       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
-               val |= DMUB_IPS1_ALLOW_MASK;
-               val = val & ~DMUB_IPS2_ALLOW_MASK;
-       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
                val |= DMUB_IPS1_ALLOW_MASK;
                val |= DMUB_IPS2_ALLOW_MASK;
+       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
+               val = val & ~DMUB_IPS1_ALLOW_MASK;
+               val |= DMUB_IPS2_ALLOW_MASK;
+       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+               val = val & ~DMUB_IPS1_ALLOW_MASK;
+               val = val & ~DMUB_IPS2_ALLOW_MASK;
        }
 
        if (!allow_idle) {
-               val = val & ~DMUB_IPS1_ALLOW_MASK;
-               val = val & ~DMUB_IPS2_ALLOW_MASK;
+               val |= DMUB_IPS1_ALLOW_MASK;
+               val |= DMUB_IPS2_ALLOW_MASK;
        }
 
        dcn35_smu_write_ips_scratch(clk_mgr, val);
index 7b9bf5cb45299974757bd16d608e41d444ef3b0f..76b47f17812797b95f0bf3fe5ef5ed966cbcacc3 100644 (file)
@@ -3178,7 +3178,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
                        struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(&context->res_ctx,
                                        context->streams[i]);
 
-                       if (otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
+                       if (otg_master && otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
                                resource_build_test_pattern_params(&context->res_ctx, otg_master);
                }
        }
@@ -4934,8 +4934,8 @@ bool dc_dmub_is_ips_idle_state(struct dc *dc)
        if (dc->hwss.get_idle_state)
                idle_state = dc->hwss.get_idle_state(dc);
 
-       if ((idle_state & DMUB_IPS1_ALLOW_MASK) ||
-               (idle_state & DMUB_IPS2_ALLOW_MASK))
+       if (!(idle_state & DMUB_IPS1_ALLOW_MASK) ||
+               !(idle_state & DMUB_IPS2_ALLOW_MASK))
                return true;
 
        return false;
index 1d48278cba96c587be346e32c4ce46bfe6aaf85e..a1f1d100399275aba741c7dcd852c30fbcac27cf 100644 (file)
@@ -5190,6 +5190,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
        sec_next = sec_pipe->next_odm_pipe;
        sec_prev = sec_pipe->prev_odm_pipe;
 
+       if (pri_pipe == NULL)
+               return false;
+
        *sec_pipe = *pri_pipe;
 
        sec_pipe->top_pipe = sec_top;
index e4c007203318bf0b9ad3f3f9b1d2fd73973ca4f9..0e07699c1e83529acfbf48ba2f36bea06a0cd884 100644 (file)
@@ -1202,11 +1202,11 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
                allow_state = dc->hwss.get_idle_state(dc);
                dc->hwss.set_idle_state(dc, false);
 
-               if (allow_state & DMUB_IPS2_ALLOW_MASK) {
+               if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) {
                        // Wait for evaluation time
                        udelay(dc->debug.ips2_eval_delay_us);
                        commit_state = dc->hwss.get_idle_state(dc);
-                       if (commit_state & DMUB_IPS2_COMMIT_MASK) {
+                       if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) {
                                // Tell PMFW to exit low power state
                                dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
 
@@ -1216,7 +1216,7 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
                                for (i = 0; i < max_num_polls; ++i) {
                                        commit_state = dc->hwss.get_idle_state(dc);
-                                       if (!(commit_state & DMUB_IPS2_COMMIT_MASK))
+                                       if (commit_state & DMUB_IPS2_COMMIT_MASK)
                                                break;
 
                                        udelay(1);
@@ -1235,10 +1235,10 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
                }
 
                dc_dmub_srv_notify_idle(dc, false);
-               if (allow_state & DMUB_IPS1_ALLOW_MASK) {
+               if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) {
                        for (i = 0; i < max_num_polls; ++i) {
                                commit_state = dc->hwss.get_idle_state(dc);
-                               if (!(commit_state & DMUB_IPS1_COMMIT_MASK))
+                               if (commit_state & DMUB_IPS1_COMMIT_MASK)
                                        break;
 
                                udelay(1);
index cea666ea66c6144cad038aa9a8d833b2d36b0a78..fcb825e4f1bb8f8dfba4107bc3bb9c642aa726f1 100644 (file)
@@ -177,6 +177,7 @@ struct dc_panel_patch {
        unsigned int disable_fams;
        unsigned int skip_avmute;
        unsigned int mst_start_top_delay;
+       unsigned int remove_sink_ext_caps;
 };
 
 struct dc_edid_caps {
index 001f9eb66920751dbbe992d450579b9b4790b370..62a8f0b56006201d6b8db01a0641d5a355887cbb 100644 (file)
@@ -261,12 +261,6 @@ static void enc35_stream_encoder_enable(
                        /* invalid mode ! */
                        ASSERT_CRITICAL(false);
                }
-
-               REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
-               REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
-       } else {
-               REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
-               REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
        }
 }
 
@@ -436,6 +430,8 @@ static void enc35_disable_fifo(struct stream_encoder *enc)
        struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
        REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
+       REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+       REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
 }
 
 static void enc35_enable_fifo(struct stream_encoder *enc)
@@ -443,6 +439,8 @@ static void enc35_enable_fifo(struct stream_encoder *enc)
        struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
        REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+       REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+       REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
 
        enc35_reset_fifo(enc, true);
        enc35_reset_fifo(enc, false);
index d6f0f857c05af8d6d9298225b58f10291cd5c92a..f2fe523f914f1179106426684e010cfea1b170ef 100644 (file)
@@ -1088,6 +1088,9 @@ static bool detect_link_and_local_sink(struct dc_link *link,
                if (sink->edid_caps.panel_patch.skip_scdc_overwrite)
                        link->ctx->dc->debug.hdmi20_disable = true;
 
+               if (sink->edid_caps.panel_patch.remove_sink_ext_caps)
+                       link->dpcd_sink_ext_caps.raw = 0;
+
                if (dc_is_hdmi_signal(link->connector_signal))
                        read_scdc_caps(link->ddc, link->local_sink);
 
index 9665ada0f894b253619dc57e1919924e8afa7b52..df63aa8f01e98d1e8efe45ba295bf5232f326f5d 100644 (file)
@@ -195,6 +195,7 @@ struct dmub_srv_region_params {
        uint32_t vbios_size;
        const uint8_t *fw_inst_const;
        const uint8_t *fw_bss_data;
+       bool is_mailbox_in_inbox;
 };
 
 /**
@@ -214,20 +215,25 @@ struct dmub_srv_region_params {
  */
 struct dmub_srv_region_info {
        uint32_t fb_size;
+       uint32_t inbox_size;
        uint8_t num_regions;
        struct dmub_region regions[DMUB_WINDOW_TOTAL];
 };
 
 /**
- * struct dmub_srv_fb_params - parameters used for driver fb setup
+ * struct dmub_srv_memory_params - parameters used for driver fb setup
  * @region_info: region info calculated by dmub service
- * @cpu_addr: base cpu address for the framebuffer
- * @gpu_addr: base gpu virtual address for the framebuffer
+ * @cpu_fb_addr: base cpu address for the framebuffer
+ * @cpu_inbox_addr: base cpu address for the gart
+ * @gpu_fb_addr: base gpu virtual address for the framebuffer
+ * @gpu_inbox_addr: base gpu virtual address for the gart
  */
-struct dmub_srv_fb_params {
+struct dmub_srv_memory_params {
        const struct dmub_srv_region_info *region_info;
-       void *cpu_addr;
-       uint64_t gpu_addr;
+       void *cpu_fb_addr;
+       void *cpu_inbox_addr;
+       uint64_t gpu_fb_addr;
+       uint64_t gpu_inbox_addr;
 };
 
 /**
@@ -563,8 +569,8 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
  *   DMUB_STATUS_OK - success
  *   DMUB_STATUS_INVALID - unspecified error
  */
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
-                                      const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+                                      const struct dmub_srv_memory_params *params,
                                       struct dmub_srv_fb_info *out);
 
 /**
index e43e8d4bfe375e93f9e75d285e57bb41cffc44de..22fc4ba96defd1c0abd6e031d8a4f8cc07f7d4a9 100644 (file)
@@ -434,7 +434,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
        uint32_t fw_state_size = DMUB_FW_STATE_SIZE;
        uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE;
        uint32_t scratch_mem_size = DMUB_SCRATCH_MEM_SIZE;
-
+       uint32_t previous_top = 0;
        if (!dmub->sw_init)
                return DMUB_STATUS_INVALID;
 
@@ -459,8 +459,15 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
        bios->base = dmub_align(stack->top, 256);
        bios->top = bios->base + params->vbios_size;
 
-       mail->base = dmub_align(bios->top, 256);
-       mail->top = mail->base + DMUB_MAILBOX_SIZE;
+       if (params->is_mailbox_in_inbox) {
+               mail->base = 0;
+               mail->top = mail->base + DMUB_MAILBOX_SIZE;
+               previous_top = bios->top;
+       } else {
+               mail->base = dmub_align(bios->top, 256);
+               mail->top = mail->base + DMUB_MAILBOX_SIZE;
+               previous_top = mail->top;
+       }
 
        fw_info = dmub_get_fw_meta_info(params);
 
@@ -479,7 +486,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
                        dmub->fw_version = fw_info->fw_version;
        }
 
-       trace_buff->base = dmub_align(mail->top, 256);
+       trace_buff->base = dmub_align(previous_top, 256);
        trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64);
 
        fw_state->base = dmub_align(trace_buff->top, 256);
@@ -490,11 +497,14 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
 
        out->fb_size = dmub_align(scratch_mem->top, 4096);
 
+       if (params->is_mailbox_in_inbox)
+               out->inbox_size = dmub_align(mail->top, 4096);
+
        return DMUB_STATUS_OK;
 }
 
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
-                                      const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+                                      const struct dmub_srv_memory_params *params,
                                       struct dmub_srv_fb_info *out)
 {
        uint8_t *cpu_base;
@@ -509,8 +519,8 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
        if (params->region_info->num_regions != DMUB_NUM_WINDOWS)
                return DMUB_STATUS_INVALID;
 
-       cpu_base = (uint8_t *)params->cpu_addr;
-       gpu_base = params->gpu_addr;
+       cpu_base = (uint8_t *)params->cpu_fb_addr;
+       gpu_base = params->gpu_fb_addr;
 
        for (i = 0; i < DMUB_NUM_WINDOWS; ++i) {
                const struct dmub_region *reg =
@@ -518,6 +528,12 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
 
                out->fb[i].cpu_addr = cpu_base + reg->base;
                out->fb[i].gpu_addr = gpu_base + reg->base;
+
+               if (i == DMUB_WINDOW_4_MAILBOX && params->cpu_inbox_addr != 0) {
+                       out->fb[i].cpu_addr = (uint8_t *)params->cpu_inbox_addr + reg->base;
+                       out->fb[i].gpu_addr = params->gpu_inbox_addr + reg->base;
+               }
+
                out->fb[i].size = reg->top - reg->base;
        }
 
@@ -707,9 +723,16 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub)
                return DMUB_STATUS_INVALID;
 
        if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
-               dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
-               dmub->inbox1_rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub);
-               dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+               uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+               uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub);
+
+               if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) {
+                       return DMUB_STATUS_HW_FAILURE;
+               } else {
+                       dmub->inbox1_rb.rptr = rptr;
+                       dmub->inbox1_rb.wrpt = wptr;
+                       dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+               }
        }
 
        return DMUB_STATUS_OK;
@@ -743,6 +766,11 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
        if (!dmub->hw_init)
                return DMUB_STATUS_INVALID;
 
+       if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
+           dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
+               return DMUB_STATUS_HW_FAILURE;
+       }
+
        if (dmub_rb_push_front(&dmub->inbox1_rb, cmd))
                return DMUB_STATUS_OK;
 
index dab35d878a905ce50ae013d6f4e9a750599183ce..fef2d290f3f2526b2a196649e7e74a0df154b6ec 100644 (file)
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x8
+#define SMU_METRICS_TABLE_VERSION 0x9
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -211,6 +211,14 @@ typedef struct __attribute__((packed, aligned(4))) {
   //XGMI Data tranfser size
   uint64_t XgmiReadDataSizeAcc[8];//in KByte
   uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+
+  //PCIE BW Data and error count
+  uint32_t PcieBandwidth[4];
+  uint32_t PCIeL0ToRecoveryCountAcc;      // The Pcie counter itself is accumulated
+  uint32_t PCIenReplayAAcc;               // The Pcie counter itself is accumulated
+  uint32_t PCIenReplayARolloverCountAcc;  // The Pcie counter itself is accumulated
+  uint32_t PCIeNAKSentCountAcc;           // The Pcie counter itself is accumulated
+  uint32_t PCIeNAKReceivedCountAcc;       // The Pcie counter itself is accumulated
 } MetricsTable_t;
 
 #define SMU_VF_METRICS_TABLE_VERSION 0x3
index 891605d4975f4e4460f83cb153f887a54cc16146..0e5a77c3c2e216362b2e5363f1ec25f62940ede3 100644 (file)
@@ -1454,7 +1454,7 @@ static int smu_v13_0_6_register_irq_handler(struct smu_context *smu)
 
 static int smu_v13_0_6_notify_unload(struct smu_context *smu)
 {
-       if (smu->smc_fw_version <= 0x553500)
+       if (amdgpu_in_reset(smu->adev))
                return 0;
 
        dev_dbg(smu->adev->dev, "Notify PMFW about driver unload");
@@ -2095,6 +2095,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
                        smu_v13_0_6_get_current_pcie_link_speed(smu);
                gpu_metrics->pcie_bandwidth_acc =
                                SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+               gpu_metrics->pcie_bandwidth_inst =
+                               SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+               gpu_metrics->pcie_l0_to_recov_count_acc =
+                               metrics->PCIeL0ToRecoveryCountAcc;
+               gpu_metrics->pcie_replay_count_acc =
+                               metrics->PCIenReplayAAcc;
+               gpu_metrics->pcie_replay_rover_count_acc =
+                               metrics->PCIenReplayARolloverCountAcc;
        }
 
        gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
index 2aee32344f4a2554fb8ba491f0734656ccfc187d..772f3b049c1694408d7acc89f4235a1c8c657ea9 100644 (file)
@@ -174,6 +174,17 @@ to_ast_sil164_connector(struct drm_connector *connector)
        return container_of(connector, struct ast_sil164_connector, base);
 }
 
+struct ast_bmc_connector {
+       struct drm_connector base;
+       struct drm_connector *physical_connector;
+};
+
+static inline struct ast_bmc_connector *
+to_ast_bmc_connector(struct drm_connector *connector)
+{
+       return container_of(connector, struct ast_bmc_connector, base);
+}
+
 /*
  * Device
  */
@@ -218,7 +229,7 @@ struct ast_device {
                } astdp;
                struct {
                        struct drm_encoder encoder;
-                       struct drm_connector connector;
+                       struct ast_bmc_connector bmc_connector;
                } bmc;
        } output;
 
index cb96149842851aa6fa66506e542a34f85c64d638..c20534d0ef7c8af580e4e6a3eb2c4c2bd49842eb 100644 (file)
@@ -1767,6 +1767,30 @@ static const struct drm_encoder_funcs ast_bmc_encoder_funcs = {
        .destroy = drm_encoder_cleanup,
 };
 
+static int ast_bmc_connector_helper_detect_ctx(struct drm_connector *connector,
+                                              struct drm_modeset_acquire_ctx *ctx,
+                                              bool force)
+{
+       struct ast_bmc_connector *bmc_connector = to_ast_bmc_connector(connector);
+       struct drm_connector *physical_connector = bmc_connector->physical_connector;
+
+       /*
+        * Most user-space compositors cannot handle more than one connected
+        * connector per CRTC. Hence, we only mark the BMC as connected if the
+        * physical connector is disconnected. If the physical connector's status
+        * is connected or unknown, the BMC remains disconnected. This has no
+        * effect on the output of the BMC.
+        *
+        * FIXME: Remove this logic once user-space compositors can handle more
+        *        than one connector per CRTC. The BMC should always be connected.
+        */
+
+       if (physical_connector && physical_connector->status == connector_status_disconnected)
+               return connector_status_connected;
+
+       return connector_status_disconnected;
+}
+
 static int ast_bmc_connector_helper_get_modes(struct drm_connector *connector)
 {
        return drm_add_modes_noedid(connector, 4096, 4096);
@@ -1774,6 +1798,7 @@ static int ast_bmc_connector_helper_get_modes(struct drm_connector *connector)
 
 static const struct drm_connector_helper_funcs ast_bmc_connector_helper_funcs = {
        .get_modes = ast_bmc_connector_helper_get_modes,
+       .detect_ctx = ast_bmc_connector_helper_detect_ctx,
 };
 
 static const struct drm_connector_funcs ast_bmc_connector_funcs = {
@@ -1784,12 +1809,33 @@ static const struct drm_connector_funcs ast_bmc_connector_funcs = {
        .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static int ast_bmc_output_init(struct ast_device *ast)
+static int ast_bmc_connector_init(struct drm_device *dev,
+                                 struct ast_bmc_connector *bmc_connector,
+                                 struct drm_connector *physical_connector)
+{
+       struct drm_connector *connector = &bmc_connector->base;
+       int ret;
+
+       ret = drm_connector_init(dev, connector, &ast_bmc_connector_funcs,
+                                DRM_MODE_CONNECTOR_VIRTUAL);
+       if (ret)
+               return ret;
+
+       drm_connector_helper_add(connector, &ast_bmc_connector_helper_funcs);
+
+       bmc_connector->physical_connector = physical_connector;
+
+       return 0;
+}
+
+static int ast_bmc_output_init(struct ast_device *ast,
+                              struct drm_connector *physical_connector)
 {
        struct drm_device *dev = &ast->base;
        struct drm_crtc *crtc = &ast->crtc;
        struct drm_encoder *encoder = &ast->output.bmc.encoder;
-       struct drm_connector *connector = &ast->output.bmc.connector;
+       struct ast_bmc_connector *bmc_connector = &ast->output.bmc.bmc_connector;
+       struct drm_connector *connector = &bmc_connector->base;
        int ret;
 
        ret = drm_encoder_init(dev, encoder,
@@ -1799,13 +1845,10 @@ static int ast_bmc_output_init(struct ast_device *ast)
                return ret;
        encoder->possible_crtcs = drm_crtc_mask(crtc);
 
-       ret = drm_connector_init(dev, connector, &ast_bmc_connector_funcs,
-                                DRM_MODE_CONNECTOR_VIRTUAL);
+       ret = ast_bmc_connector_init(dev, bmc_connector, physical_connector);
        if (ret)
                return ret;
 
-       drm_connector_helper_add(connector, &ast_bmc_connector_helper_funcs);
-
        ret = drm_connector_attach_encoder(connector, encoder);
        if (ret)
                return ret;
@@ -1864,6 +1907,7 @@ static const struct drm_mode_config_funcs ast_mode_config_funcs = {
 int ast_mode_config_init(struct ast_device *ast)
 {
        struct drm_device *dev = &ast->base;
+       struct drm_connector *physical_connector = NULL;
        int ret;
 
        ret = drmm_mode_config_init(dev);
@@ -1904,23 +1948,27 @@ int ast_mode_config_init(struct ast_device *ast)
                ret = ast_vga_output_init(ast);
                if (ret)
                        return ret;
+               physical_connector = &ast->output.vga.vga_connector.base;
        }
        if (ast->tx_chip_types & AST_TX_SIL164_BIT) {
                ret = ast_sil164_output_init(ast);
                if (ret)
                        return ret;
+               physical_connector = &ast->output.sil164.sil164_connector.base;
        }
        if (ast->tx_chip_types & AST_TX_DP501_BIT) {
                ret = ast_dp501_output_init(ast);
                if (ret)
                        return ret;
+               physical_connector = &ast->output.dp501.connector;
        }
        if (ast->tx_chip_types & AST_TX_ASTDP_BIT) {
                ret = ast_astdp_output_init(ast);
                if (ret)
                        return ret;
+               physical_connector = &ast->output.astdp.connector;
        }
-       ret = ast_bmc_output_init(ast);
+       ret = ast_bmc_output_init(ast, physical_connector);
        if (ret)
                return ret;
 
index d8856d1581fdb3507c0ff8ff3ea8a1ae5be92862..e9994c9db799bfb9a555edbaea2d84103db53e79 100644 (file)
@@ -5,7 +5,7 @@ termcolor==2.3.0
 certifi==2023.7.22
 charset-normalizer==3.2.0
 idna==3.4
-pip==23.2.1
+pip==23.3
 python-gitlab==3.15.0
 requests==2.31.0
 requests-toolbelt==1.0.0
@@ -13,5 +13,5 @@ ruamel.yaml==0.17.32
 ruamel.yaml.clib==0.2.7
 setuptools==68.0.0
 tenacity==8.2.3
-urllib3==2.0.4
-wheel==0.41.1
\ No newline at end of file
+urllib3==2.0.7
+wheel==0.41.1
index d5c15292ae93781335a8e8a441be94f9bb235f04..3d92f66e550c3885d1639c9450487228758afd76 100644 (file)
@@ -336,6 +336,12 @@ static const struct dmi_system_id orientation_data[] = {
                  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"),
                },
                .driver_data = (void *)&lcd1200x1920_rightside_up,
+       }, {    /* Lenovo Legion Go 8APU1 */
+               .matches = {
+                 DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                 DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Legion Go 8APU1"),
+               },
+               .driver_data = (void *)&lcd1600x2560_leftside_up,
        }, {    /* Lenovo Yoga Book X90F / X90L */
                .matches = {
                  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
index 7b4628f4f124040eae309b7367d2f26c59933e53..851b312bd84494cca9e074bb5f3fb2dcf49472bf 100644 (file)
@@ -1161,6 +1161,14 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
        intel_connector->port = port;
        drm_dp_mst_get_port_malloc(port);
 
+       /*
+        * TODO: set the AUX for the actual MST port decompressing the stream.
+        * At the moment the driver only supports enabling this globally in the
+        * first downstream MST branch, via intel_dp's (root port) AUX.
+        */
+       intel_connector->dp.dsc_decompression_aux = &intel_dp->aux;
+       intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector);
+
        connector = &intel_connector->base;
        ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
                                 DRM_MODE_CONNECTOR_DisplayPort);
@@ -1172,14 +1180,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 
        drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs);
 
-       /*
-        * TODO: set the AUX for the actual MST port decompressing the stream.
-        * At the moment the driver only supports enabling this globally in the
-        * first downstream MST branch, via intel_dp's (root port) AUX.
-        */
-       intel_connector->dp.dsc_decompression_aux = &intel_dp->aux;
-       intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector);
-
        for_each_pipe(dev_priv, pipe) {
                struct drm_encoder *enc =
                        &intel_dp->mst_encoders[pipe]->base.base;
index ed32bf5b15464e63efceedc7752e851dea8ad212..ba1186fc524f84c9d5d0c8a1481b6d0bf301607c 100644 (file)
@@ -982,8 +982,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
 
 err:
        i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret);
-       intel_gt_release_all(i915);
-
        return ret;
 }
 
@@ -1002,15 +1000,6 @@ int intel_gt_tiles_init(struct drm_i915_private *i915)
        return 0;
 }
 
-void intel_gt_release_all(struct drm_i915_private *i915)
-{
-       struct intel_gt *gt;
-       unsigned int id;
-
-       for_each_gt(gt, i915, id)
-               i915->gt[id] = NULL;
-}
-
 void intel_gt_info_print(const struct intel_gt_info *info,
                         struct drm_printer *p)
 {
index 8a0e2c745e1f9e40051cf0313134f5aa58a78f07..802de2c6decb7b0e78f206ac4edb20c7e6c58941 100644 (file)
@@ -782,7 +782,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        ret = i915_driver_mmio_probe(i915);
        if (ret < 0)
-               goto out_tiles_cleanup;
+               goto out_runtime_pm_put;
 
        ret = i915_driver_hw_probe(i915);
        if (ret < 0)
@@ -842,8 +842,6 @@ out_cleanup_hw:
        i915_ggtt_driver_late_release(i915);
 out_cleanup_mmio:
        i915_driver_mmio_release(i915);
-out_tiles_cleanup:
-       intel_gt_release_all(i915);
 out_runtime_pm_put:
        enable_rpm_wakeref_asserts(&i915->runtime_pm);
        i915_driver_late_release(i915);
index 1ccd1edd693c54748a6c474fedbe064e8e33907a..4c0528794e7a7ef7eb05f69ebce8a94db64ce863 100644 (file)
@@ -406,6 +406,7 @@ static const struct dpu_perf_cfg sc8280xp_perf_data = {
        .min_llcc_ib = 0,
        .min_dram_ib = 800000,
        .danger_lut_tbl = {0xf, 0xffff, 0x0},
+       .safe_lut_tbl = {0xfe00, 0xfe00, 0xffff},
        .qos_lut_tbl = {
                {.nentry = ARRAY_SIZE(sc8180x_qos_linear),
                .entries = sc8180x_qos_linear
index 11d9fc2c6bf5e07845a43e40035d0b39f9810350..ec933d597e20cfc1eed66439f69a2297f46f1b9a 100644 (file)
@@ -844,8 +844,7 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 
        return 0;
 fail:
-       if (mdp5_kms)
-               mdp5_destroy(mdp5_kms);
+       mdp5_destroy(mdp5_kms);
        return ret;
 }
 
index e329e03e068d527339fb31ca3cc8dbcd4e2bd83a..1b88fb52726f244b28a95c6c8133bb0a864cc3a9 100644 (file)
@@ -365,9 +365,11 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp,
        /* reset video pattern flag on disconnect */
        if (!hpd) {
                dp->panel->video_test = false;
-               drm_dp_set_subconnector_property(dp->dp_display.connector,
-                                                connector_status_disconnected,
-                                                dp->panel->dpcd, dp->panel->downstream_ports);
+               if (!dp->dp_display.is_edp)
+                       drm_dp_set_subconnector_property(dp->dp_display.connector,
+                                                        connector_status_disconnected,
+                                                        dp->panel->dpcd,
+                                                        dp->panel->downstream_ports);
        }
 
        dp->dp_display.is_connected = hpd;
@@ -396,8 +398,11 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp)
 
        dp_link_process_request(dp->link);
 
-       drm_dp_set_subconnector_property(dp->dp_display.connector, connector_status_connected,
-                                        dp->panel->dpcd, dp->panel->downstream_ports);
+       if (!dp->dp_display.is_edp)
+               drm_dp_set_subconnector_property(dp->dp_display.connector,
+                                                connector_status_connected,
+                                                dp->panel->dpcd,
+                                                dp->panel->downstream_ports);
 
        edid = dp->panel->edid;
 
index 40e7344180e3e0e8d4f99804835d5ab51a12a842..e3bdd7dd4cdc7de80b1ca1e2cd1f60386c31594c 100644 (file)
@@ -345,6 +345,9 @@ struct drm_connector *dp_drm_connector_init(struct msm_dp *dp_display, struct dr
        if (IS_ERR(connector))
                return connector;
 
+       if (!dp_display->is_edp)
+               drm_connector_attach_dp_subconnector_property(connector);
+
        drm_connector_attach_encoder(connector, encoder);
 
        return connector;
index 3b1ed02f644d2821221f54f4a46f611692a4c413..89a6344bc8653d61a3dea4860cb3320f1bcf7a15 100644 (file)
@@ -918,7 +918,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy,
        if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2)) {
                if (phy->cphy_mode) {
                        vreg_ctrl_0 = 0x45;
-                       vreg_ctrl_1 = 0x45;
+                       vreg_ctrl_1 = 0x41;
                        glbl_rescode_top_ctrl = 0x00;
                        glbl_rescode_bot_ctrl = 0x00;
                } else {
index 2aae7d107f3356e08b55b6b05bf7cf96205318a0..3f217b57829347413a5e570ba3441fc758e915ae 100644 (file)
@@ -288,8 +288,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
        if (ret)
                goto err_msm_uninit;
 
-       drm_kms_helper_poll_init(ddev);
-
        if (priv->kms_init) {
                drm_kms_helper_poll_init(ddev);
                msm_fbdev_setup(ddev);
index 82b267c111470af7a09726d2dab5d9aa632d3b7c..460459af272d6ffa4fe8f9a8770d72220751a78c 100644 (file)
@@ -14,7 +14,7 @@ struct nvkm_event {
        int index_nr;
 
        spinlock_t refs_lock;
-       spinlock_t list_lock;
+       rwlock_t list_lock;
        int *refs;
 
        struct list_head ntfy;
@@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
                int types_nr, int index_nr, struct nvkm_event *event)
 {
        spin_lock_init(&event->refs_lock);
-       spin_lock_init(&event->list_lock);
+       rwlock_init(&event->list_lock);
        return __nvkm_event_init(func, subdev, types_nr, index_nr, event);
 }
 
index d8c92521226d97c582e4b6f64c69fc90b469762a..f28f9a857458682f80f0a8687ed2edd92fa921d9 100644 (file)
@@ -726,6 +726,11 @@ nouveau_display_create(struct drm_device *dev)
 
        if (nouveau_modeset != 2) {
                ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp);
+               /* no display hw */
+               if (ret == -ENODEV) {
+                       ret = 0;
+                       goto disp_create_err;
+               }
 
                if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) {
                        nouveau_display_create_properties(dev);
index a6c877135598f7cd7447bfc50465b51e2d0fd4fc..61fed7792e415cb72d9c75664d9a9e19af7539ef 100644 (file)
@@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy)
 static void
 nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy)
 {
-       spin_lock_irq(&ntfy->event->list_lock);
+       write_lock_irq(&ntfy->event->list_lock);
        list_del_init(&ntfy->head);
-       spin_unlock_irq(&ntfy->event->list_lock);
+       write_unlock_irq(&ntfy->event->list_lock);
 }
 
 static void
 nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy)
 {
-       spin_lock_irq(&ntfy->event->list_lock);
+       write_lock_irq(&ntfy->event->list_lock);
        list_add_tail(&ntfy->head, &ntfy->event->ntfy);
-       spin_unlock_irq(&ntfy->event->list_lock);
+       write_unlock_irq(&ntfy->event->list_lock);
 }
 
 static void
@@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
                return;
 
        nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id);
-       spin_lock_irqsave(&event->list_lock, flags);
+       read_lock_irqsave(&event->list_lock, flags);
 
        list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) {
                if (ntfy->id == id && ntfy->bits & bits) {
@@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
                }
        }
 
-       spin_unlock_irqrestore(&event->list_lock, flags);
+       read_unlock_irqrestore(&event->list_lock, flags);
 }
 
 void
index 3adbb05ff587ba93ca6c618679aba7af9261ae29..d088e636edc31c407582bdbe8ab0ee28653956bf 100644 (file)
@@ -539,7 +539,7 @@ r535_fifo_runl_ctor(struct nvkm_fifo *fifo)
        struct nvkm_runl *runl;
        struct nvkm_engn *engn;
        u32 cgids = 2048;
-       u32 chids = 2048 / CHID_PER_USERD;
+       u32 chids = 2048;
        int ret;
        NV2080_CTRL_FIFO_GET_DEVICE_INFO_TABLE_PARAMS *ctrl;
 
index e31f9641114b7127bdfe92e4e4df680ac16a9aee..dc44f5c7833f60c165d5c01a0b2afd03e3e45294 100644 (file)
@@ -689,8 +689,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc)
        struct nvfw_gsp_rpc *rpc;
 
        rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64)));
-       if (!rpc)
-               return NULL;
+       if (IS_ERR(rpc))
+               return ERR_CAST(rpc);
 
        rpc->header_version = 0x03000000;
        rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V';
@@ -1159,7 +1159,7 @@ static void
 r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
                                                 MUX_METHOD_DATA_ELEMENT *part)
 {
-       acpi_handle iter = NULL, handle_mux;
+       acpi_handle iter = NULL, handle_mux = NULL;
        acpi_status status;
        unsigned long long value;
 
index 9323e7b9e38493461e716561bfd73673ef2bd332..be8f48e3c1db87832de479e720efef2b6999360c 100644 (file)
@@ -1709,6 +1709,7 @@ static const struct panel_desc auo_b101uan08_3_desc = {
        .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
                      MIPI_DSI_MODE_LPM,
        .init_cmds = auo_b101uan08_3_init_cmd,
+       .lp11_before_reset = true,
 };
 
 static const struct drm_display_mode boe_tv105wum_nw0_default_mode = {
@@ -1766,11 +1767,11 @@ static const struct panel_desc starry_qfh032011_53g_desc = {
 };
 
 static const struct drm_display_mode starry_himax83102_j02_default_mode = {
-       .clock = 161600,
+       .clock = 162850,
        .hdisplay = 1200,
-       .hsync_start = 1200 + 40,
-       .hsync_end = 1200 + 40 + 20,
-       .htotal = 1200 + 40 + 20 + 40,
+       .hsync_start = 1200 + 50,
+       .hsync_end = 1200 + 50 + 20,
+       .htotal = 1200 + 50 + 20 + 50,
        .vdisplay = 1920,
        .vsync_start = 1920 + 116,
        .vsync_end = 1920 + 116 + 8,
index 6cd32b9090876585c7e7610813a37d2c3c1647c5..9367a4572dcf645037803cb629ae0e1c5ff692ab 100644 (file)
@@ -2379,13 +2379,13 @@ static const struct panel_desc innolux_g070y2_t02 = {
 static const struct display_timing innolux_g101ice_l01_timing = {
        .pixelclock = { 60400000, 71100000, 74700000 },
        .hactive = { 1280, 1280, 1280 },
-       .hfront_porch = { 41, 80, 100 },
-       .hback_porch = { 40, 79, 99 },
-       .hsync_len = { 1, 1, 1 },
+       .hfront_porch = { 30, 60, 70 },
+       .hback_porch = { 30, 60, 70 },
+       .hsync_len = { 22, 40, 60 },
        .vactive = { 800, 800, 800 },
-       .vfront_porch = { 5, 11, 14 },
-       .vback_porch = { 4, 11, 14 },
-       .vsync_len = { 1, 1, 1 },
+       .vfront_porch = { 3, 8, 14 },
+       .vback_porch = { 3, 8, 14 },
+       .vsync_len = { 4, 7, 12 },
        .flags = DISPLAY_FLAGS_DE_HIGH,
 };
 
@@ -2402,6 +2402,7 @@ static const struct panel_desc innolux_g101ice_l01 = {
                .disable = 200,
        },
        .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+       .bus_flags = DRM_BUS_FLAG_DE_HIGH,
        .connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
index 066299894d0480bc4ec7c6aac371c3d3e9bd1c9c..a13473b2d54c40a8a3ba62a547d39166613865df 100644 (file)
@@ -247,14 +247,22 @@ static inline void vop_cfg_done(struct vop *vop)
        VOP_REG_SET(vop, common, cfg_done, 1);
 }
 
-static bool has_rb_swapped(uint32_t format)
+static bool has_rb_swapped(uint32_t version, uint32_t format)
 {
        switch (format) {
        case DRM_FORMAT_XBGR8888:
        case DRM_FORMAT_ABGR8888:
-       case DRM_FORMAT_BGR888:
        case DRM_FORMAT_BGR565:
                return true;
+       /*
+        * full framework (IP version 3.x) only need rb swapped for RGB888 and
+        * little framework (IP version 2.x) only need rb swapped for BGR888,
+        * check for 3.x to also only rb swap BGR888 for unknown vop version
+        */
+       case DRM_FORMAT_RGB888:
+               return VOP_MAJOR(version) == 3;
+       case DRM_FORMAT_BGR888:
+               return VOP_MAJOR(version) != 3;
        default:
                return false;
        }
@@ -1030,7 +1038,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
        VOP_WIN_SET(vop, win, dsp_info, dsp_info);
        VOP_WIN_SET(vop, win, dsp_st, dsp_st);
 
-       rb_swap = has_rb_swapped(fb->format->format);
+       rb_swap = has_rb_swapped(vop->data->version, fb->format->format);
        VOP_WIN_SET(vop, win, rb_swap, rb_swap);
 
        /*
index 3ca45975c686eee3563dabb287caf4c1c21a10ac..d9e9829b22001aa468c1fd837fc692ce807d313f 100644 (file)
@@ -345,6 +345,8 @@ static const struct apple_non_apple_keyboard non_apple_keyboards[] = {
        { "AONE" },
        { "GANSS" },
        { "Hailuck" },
+       { "Jamesdonkey" },
+       { "A3R" },
 };
 
 static bool apple_is_non_apple_keyboard(struct hid_device *hdev)
index fd61dba882338e022685fca453e2d70a0e093106..78cdfb8b9a7aeb6f03b6dc58157cb0b925b4ac76 100644 (file)
@@ -381,7 +381,7 @@ static int asus_raw_event(struct hid_device *hdev,
        return 0;
 }
 
-static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size)
+static int asus_kbd_set_report(struct hid_device *hdev, const u8 *buf, size_t buf_size)
 {
        unsigned char *dmabuf;
        int ret;
@@ -404,7 +404,7 @@ static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size
 
 static int asus_kbd_init(struct hid_device *hdev)
 {
-       u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54,
+       const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54,
                     0x65, 0x63, 0x68, 0x2e, 0x49, 0x6e, 0x63, 0x2e, 0x00 };
        int ret;
 
@@ -418,7 +418,7 @@ static int asus_kbd_init(struct hid_device *hdev)
 static int asus_kbd_get_functions(struct hid_device *hdev,
                                  unsigned char *kbd_func)
 {
-       u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 };
+       const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 };
        u8 *readbuf;
        int ret;
 
@@ -449,7 +449,7 @@ static int asus_kbd_get_functions(struct hid_device *hdev,
 
 static int rog_nkey_led_init(struct hid_device *hdev)
 {
-       u8 buf_init_start[] = { FEATURE_KBD_LED_REPORT_ID1, 0xB9 };
+       const u8 buf_init_start[] = { FEATURE_KBD_LED_REPORT_ID1, 0xB9 };
        u8 buf_init2[] = { FEATURE_KBD_LED_REPORT_ID1, 0x41, 0x53, 0x55, 0x53, 0x20,
                                0x54, 0x65, 0x63, 0x68, 0x2e, 0x49, 0x6e, 0x63, 0x2e, 0x00 };
        u8 buf_init3[] = { FEATURE_KBD_LED_REPORT_ID1,
@@ -1000,6 +1000,24 @@ static int asus_start_multitouch(struct hid_device *hdev)
        return 0;
 }
 
+static int __maybe_unused asus_resume(struct hid_device *hdev) {
+       struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
+       int ret = 0;
+
+       if (drvdata->kbd_backlight) {
+               const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0xba, 0xc5, 0xc4,
+                               drvdata->kbd_backlight->cdev.brightness };
+               ret = asus_kbd_set_report(hdev, buf, sizeof(buf));
+               if (ret < 0) {
+                       hid_err(hdev, "Asus failed to set keyboard backlight: %d\n", ret);
+                       goto asus_resume_err;
+               }
+       }
+
+asus_resume_err:
+       return ret;
+}
+
 static int __maybe_unused asus_reset_resume(struct hid_device *hdev)
 {
        struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
@@ -1294,6 +1312,7 @@ static struct hid_driver asus_driver = {
        .input_configured       = asus_input_configured,
 #ifdef CONFIG_PM
        .reset_resume           = asus_reset_resume,
+       .resume                                 = asus_resume,
 #endif
        .event                  = asus_event,
        .raw_event              = asus_raw_event
index 8992e3c1e7698eeaeeb7711876d66022ef5ec9d0..e0181218ad857862aba07f6ff0ffe30df42bfb16 100644 (file)
@@ -702,15 +702,22 @@ static void hid_close_report(struct hid_device *device)
  * Free a device structure, all reports, and all fields.
  */
 
-static void hid_device_release(struct device *dev)
+void hiddev_free(struct kref *ref)
 {
-       struct hid_device *hid = to_hid_device(dev);
+       struct hid_device *hid = container_of(ref, struct hid_device, ref);
 
        hid_close_report(hid);
        kfree(hid->dev_rdesc);
        kfree(hid);
 }
 
+static void hid_device_release(struct device *dev)
+{
+       struct hid_device *hid = to_hid_device(dev);
+
+       kref_put(&hid->ref, hiddev_free);
+}
+
 /*
  * Fetch a report description item from the data stream. We support long
  * items, though they are not used yet.
@@ -2846,6 +2853,7 @@ struct hid_device *hid_allocate_device(void)
        spin_lock_init(&hdev->debug_list_lock);
        sema_init(&hdev->driver_input_lock, 1);
        mutex_init(&hdev->ll_open_lock);
+       kref_init(&hdev->ref);
 
        hid_bpf_device_init(hdev);
 
index e7ef1ea107c9e618b035552ebf45154ff43a30df..7dd83ec74f8a9df75c6e966b3876f8481166a22f 100644 (file)
@@ -1135,6 +1135,7 @@ static int hid_debug_events_open(struct inode *inode, struct file *file)
                goto out;
        }
        list->hdev = (struct hid_device *) inode->i_private;
+       kref_get(&list->hdev->ref);
        file->private_data = list;
        mutex_init(&list->read_mutex);
 
@@ -1227,6 +1228,8 @@ static int hid_debug_events_release(struct inode *inode, struct file *file)
        list_del(&list->node);
        spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags);
        kfifo_free(&list->hid_debug_fifo);
+
+       kref_put(&list->hdev->ref, hiddev_free);
        kfree(list);
 
        return 0;
index 558eb08c19ef9da28b233e4418353a32b0172260..281b3a7187cec2ff355aae17a4cec2b59d2102ca 100644 (file)
@@ -21,6 +21,10 @@ MODULE_DESCRIPTION("HID driver for Glorious PC Gaming Race mice");
  * Glorious Model O and O- specify the const flag in the consumer input
  * report descriptor, which leads to inputs being ignored. Fix this
  * by patching the descriptor.
+ *
+ * Glorious Model I incorrectly specifes the Usage Minimum for its
+ * keyboard HID report, causing keycodes to be misinterpreted.
+ * Fix this by setting Usage Minimum to 0 in that report.
  */
 static __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
@@ -32,6 +36,10 @@ static __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                rdesc[85] = rdesc[113] = rdesc[141] = \
                        HID_MAIN_ITEM_VARIABLE | HID_MAIN_ITEM_RELATIVE;
        }
+       if (*rsize == 156 && rdesc[41] == 1) {
+               hid_info(hdev, "patching Glorious Model I keyboard report descriptor\n");
+               rdesc[41] = 0;
+       }
        return rdesc;
 }
 
@@ -44,6 +52,8 @@ static void glorious_update_name(struct hid_device *hdev)
                model = "Model O"; break;
        case USB_DEVICE_ID_GLORIOUS_MODEL_D:
                model = "Model D"; break;
+       case USB_DEVICE_ID_GLORIOUS_MODEL_I:
+               model = "Model I"; break;
        }
 
        snprintf(hdev->name, sizeof(hdev->name), "%s %s", "Glorious", model);
@@ -66,10 +76,12 @@ static int glorious_probe(struct hid_device *hdev,
 }
 
 static const struct hid_device_id glorious_devices[] = {
-       { HID_USB_DEVICE(USB_VENDOR_ID_GLORIOUS,
+       { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH,
                USB_DEVICE_ID_GLORIOUS_MODEL_O) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_GLORIOUS,
+       { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH,
                USB_DEVICE_ID_GLORIOUS_MODEL_D) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LAVIEW,
+               USB_DEVICE_ID_GLORIOUS_MODEL_I) },
        { }
 };
 MODULE_DEVICE_TABLE(hid, glorious_devices);
index f7973ccd84a287017db736ca4dcb9baf150b1b2b..c6e4e0d1f2147e6221c10e607859354d2c1a32be 100644 (file)
 #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_010A 0x010a
 #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100
 
-#define USB_VENDOR_ID_GLORIOUS  0x258a
-#define USB_DEVICE_ID_GLORIOUS_MODEL_D 0x0033
-#define USB_DEVICE_ID_GLORIOUS_MODEL_O 0x0036
-
 #define I2C_VENDOR_ID_GOODIX           0x27c6
 #define I2C_DEVICE_ID_GOODIX_01F0      0x01f0
 
 #define USB_VENDOR_ID_LABTEC           0x1020
 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006
 
+#define USB_VENDOR_ID_LAVIEW           0x22D4
+#define USB_DEVICE_ID_GLORIOUS_MODEL_I 0x1503
+
 #define USB_VENDOR_ID_LCPOWER          0x1241
 #define USB_DEVICE_ID_LCPOWER_LC1000   0xf767
 
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2         0xc534
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1      0xc539
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1    0xc53f
-#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2    0xc547
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY 0xc53a
 #define USB_DEVICE_ID_SPACETRAVELLER   0xc623
 #define USB_DEVICE_ID_SPACENAVIGATOR   0xc626
 #define USB_VENDOR_ID_SIGMATEL         0x066F
 #define USB_DEVICE_ID_SIGMATEL_STMP3780        0x3780
 
+#define USB_VENDOR_ID_SINOWEALTH  0x258a
+#define USB_DEVICE_ID_GLORIOUS_MODEL_D 0x0033
+#define USB_DEVICE_ID_GLORIOUS_MODEL_O 0x0036
+
 #define USB_VENDOR_ID_SIS_TOUCH                0x0457
 #define USB_DEVICE_ID_SIS9200_TOUCH    0x9200
 #define USB_DEVICE_ID_SIS817_TOUCH     0x0817
index 8afe3be683ba251617e5b5f2b9477738ce2f13d7..e6a8b6d8eab707da539cbc209f205d0ef02bba67 100644 (file)
@@ -1695,12 +1695,11 @@ static int logi_dj_raw_event(struct hid_device *hdev,
                }
                /*
                 * Mouse-only receivers send unnumbered mouse data. The 27 MHz
-                * receiver uses 6 byte packets, the nano receiver 8 bytes,
-                * the lightspeed receiver (Pro X Superlight) 13 bytes.
+                * receiver uses 6 byte packets, the nano receiver 8 bytes.
                 */
                if (djrcv_dev->unnumbered_application == HID_GD_MOUSE &&
-                   size <= 13){
-                       u8 mouse_report[14];
+                   size <= 8) {
+                       u8 mouse_report[9];
 
                        /* Prepend report id */
                        mouse_report[0] = REPORT_TYPE_MOUSE;
@@ -1984,10 +1983,6 @@ static const struct hid_device_id logi_dj_receivers[] = {
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1),
         .driver_data = recvr_type_gaming_hidpp},
-       { /* Logitech lightspeed receiver (0xc547) */
-         HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
-               USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2),
-        .driver_data = recvr_type_gaming_hidpp},
 
        { /* Logitech 27 MHz HID++ 1.0 receiver (0xc513) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER),
index 72883e0ce75758c948683b0cee7377ecfd5c19fa..aef0785c91cc2d19385b54c8689be4e875aa85c9 100644 (file)
@@ -1142,6 +1142,8 @@ static int mcp2221_probe(struct hid_device *hdev,
        if (ret)
                return ret;
 
+       hid_device_io_start(hdev);
+
        /* Set I2C bus clock diviser */
        if (i2c_clk_freq > 400)
                i2c_clk_freq = 400;
@@ -1157,12 +1159,12 @@ static int mcp2221_probe(struct hid_device *hdev,
        snprintf(mcp->adapter.name, sizeof(mcp->adapter.name),
                        "MCP2221 usb-i2c bridge");
 
+       i2c_set_adapdata(&mcp->adapter, mcp);
        ret = devm_i2c_add_adapter(&hdev->dev, &mcp->adapter);
        if (ret) {
                hid_err(hdev, "can't add usb-i2c adapter: %d\n", ret);
                return ret;
        }
-       i2c_set_adapdata(&mcp->adapter, mcp);
 
 #if IS_REACHABLE(CONFIG_GPIOLIB)
        /* Setup GPIO chip */
index e098cc7b3944375387bb8003ca6ab4807bfb3eb9..fd5b0637dad683e7b20c929974c958e79936880c 100644 (file)
@@ -2046,6 +2046,11 @@ static const struct hid_device_id mt_devices[] = {
                MT_USB_DEVICE(USB_VENDOR_ID_HANVON_ALT,
                        USB_DEVICE_ID_HANVON_ALT_MULTITOUCH) },
 
+       /* HONOR GLO-GXXX panel */
+       { .driver_data = MT_CLS_VTL,
+               HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+                       0x347d, 0x7853) },
+
        /* Ilitek dual touch panel */
        {  .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_ILITEK,
index 5a48fcaa32f007cddda7f18bf1b79261553f675a..ea472923fab07841ba6ee136da95a4a060690c4d 100644 (file)
@@ -33,6 +33,7 @@ static const struct hid_device_id hid_quirks[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2), HID_QUIRK_NO_INIT_REPORTS },
        { HID_USB_DEVICE(USB_VENDOR_ID_ALPS, USB_DEVICE_ID_IBM_GAMEPAD), HID_QUIRK_BADPAD },
        { HID_USB_DEVICE(USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE), HID_QUIRK_ALWAYS_POLL },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM), HID_QUIRK_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC), HID_QUIRK_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM), HID_QUIRK_NOGET },
index affcfb243f0f52a5848a48016eac8e78ef709f7e..35f762872b8a58c2f7e8fd4867bb0e139aea5cf0 100644 (file)
@@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val)
 {
        struct dw_i2c_dev *dev = context;
 
-       *val = readl_relaxed(dev->base + reg);
+       *val = readl(dev->base + reg);
 
        return 0;
 }
@@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val)
 {
        struct dw_i2c_dev *dev = context;
 
-       writel_relaxed(val, dev->base + reg);
+       writel(val, dev->base + reg);
 
        return 0;
 }
@@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val)
 {
        struct dw_i2c_dev *dev = context;
 
-       *val = swab32(readl_relaxed(dev->base + reg));
+       *val = swab32(readl(dev->base + reg));
 
        return 0;
 }
@@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val)
 {
        struct dw_i2c_dev *dev = context;
 
-       writel_relaxed(swab32(val), dev->base + reg);
+       writel(swab32(val), dev->base + reg);
 
        return 0;
 }
@@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val)
 {
        struct dw_i2c_dev *dev = context;
 
-       *val = readw_relaxed(dev->base + reg) |
-               (readw_relaxed(dev->base + reg + 2) << 16);
+       *val = readw(dev->base + reg) |
+               (readw(dev->base + reg + 2) << 16);
 
        return 0;
 }
@@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val)
 {
        struct dw_i2c_dev *dev = context;
 
-       writew_relaxed(val, dev->base + reg);
-       writew_relaxed(val >> 16, dev->base + reg + 2);
+       writew(val, dev->base + reg);
+       writew(val >> 16, dev->base + reg + 2);
 
        return 0;
 }
index 041a76f71a49cc5db369dcb4bc1340ca4c04d154..e106af83cef4da5626e534325805aad70be2f617 100644 (file)
@@ -771,8 +771,8 @@ static int ocores_i2c_resume(struct device *dev)
        return ocores_init(dev, i2c);
 }
 
-static DEFINE_SIMPLE_DEV_PM_OPS(ocores_i2c_pm,
-                               ocores_i2c_suspend, ocores_i2c_resume);
+static DEFINE_NOIRQ_DEV_PM_OPS(ocores_i2c_pm,
+                              ocores_i2c_suspend, ocores_i2c_resume);
 
 static struct platform_driver ocores_i2c_driver = {
        .probe   = ocores_i2c_probe,
index 1d76482427492113886af4ae84943092106e8479..76f79b68cef84548b86def688b6ba95f4aa46335 100644 (file)
@@ -265,6 +265,9 @@ struct pxa_i2c {
        u32                     hs_mask;
 
        struct i2c_bus_recovery_info recovery;
+       struct pinctrl          *pinctrl;
+       struct pinctrl_state    *pinctrl_default;
+       struct pinctrl_state    *pinctrl_recovery;
 };
 
 #define _IBMR(i2c)     ((i2c)->reg_ibmr)
@@ -1299,12 +1302,13 @@ static void i2c_pxa_prepare_recovery(struct i2c_adapter *adap)
         */
        gpiod_set_value(i2c->recovery.scl_gpiod, ibmr & IBMR_SCLS);
        gpiod_set_value(i2c->recovery.sda_gpiod, ibmr & IBMR_SDAS);
+
+       WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery));
 }
 
 static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
 {
        struct pxa_i2c *i2c = adap->algo_data;
-       struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
        u32 isr;
 
        /*
@@ -1318,7 +1322,7 @@ static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
                i2c_pxa_do_reset(i2c);
        }
 
-       WARN_ON(pinctrl_select_state(bri->pinctrl, bri->pins_default));
+       WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default));
 
        dev_dbg(&i2c->adap.dev, "recovery: IBMR 0x%08x ISR 0x%08x\n",
                readl(_IBMR(i2c)), readl(_ISR(i2c)));
@@ -1340,20 +1344,76 @@ static int i2c_pxa_init_recovery(struct pxa_i2c *i2c)
        if (IS_ENABLED(CONFIG_I2C_PXA_SLAVE))
                return 0;
 
-       bri->pinctrl = devm_pinctrl_get(dev);
-       if (PTR_ERR(bri->pinctrl) == -ENODEV) {
-               bri->pinctrl = NULL;
+       i2c->pinctrl = devm_pinctrl_get(dev);
+       if (PTR_ERR(i2c->pinctrl) == -ENODEV)
+               i2c->pinctrl = NULL;
+       if (IS_ERR(i2c->pinctrl))
+               return PTR_ERR(i2c->pinctrl);
+
+       if (!i2c->pinctrl)
+               return 0;
+
+       i2c->pinctrl_default = pinctrl_lookup_state(i2c->pinctrl,
+                                                   PINCTRL_STATE_DEFAULT);
+       i2c->pinctrl_recovery = pinctrl_lookup_state(i2c->pinctrl, "recovery");
+
+       if (IS_ERR(i2c->pinctrl_default) || IS_ERR(i2c->pinctrl_recovery)) {
+               dev_info(dev, "missing pinmux recovery information: %ld %ld\n",
+                        PTR_ERR(i2c->pinctrl_default),
+                        PTR_ERR(i2c->pinctrl_recovery));
+               return 0;
+       }
+
+       /*
+        * Claiming GPIOs can influence the pinmux state, and may glitch the
+        * I2C bus. Do this carefully.
+        */
+       bri->scl_gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN);
+       if (bri->scl_gpiod == ERR_PTR(-EPROBE_DEFER))
+               return -EPROBE_DEFER;
+       if (IS_ERR(bri->scl_gpiod)) {
+               dev_info(dev, "missing scl gpio recovery information: %pe\n",
+                        bri->scl_gpiod);
+               return 0;
+       }
+
+       /*
+        * We have SCL. Pull SCL low and wait a bit so that SDA glitches
+        * have no effect.
+        */
+       gpiod_direction_output(bri->scl_gpiod, 0);
+       udelay(10);
+       bri->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_OUT_HIGH_OPEN_DRAIN);
+
+       /* Wait a bit in case of a SDA glitch, and then release SCL. */
+       udelay(10);
+       gpiod_direction_output(bri->scl_gpiod, 1);
+
+       if (bri->sda_gpiod == ERR_PTR(-EPROBE_DEFER))
+               return -EPROBE_DEFER;
+
+       if (IS_ERR(bri->sda_gpiod)) {
+               dev_info(dev, "missing sda gpio recovery information: %pe\n",
+                        bri->sda_gpiod);
                return 0;
        }
-       if (IS_ERR(bri->pinctrl))
-               return PTR_ERR(bri->pinctrl);
 
        bri->prepare_recovery = i2c_pxa_prepare_recovery;
        bri->unprepare_recovery = i2c_pxa_unprepare_recovery;
+       bri->recover_bus = i2c_generic_scl_recovery;
 
        i2c->adap.bus_recovery_info = bri;
 
-       return 0;
+       /*
+        * Claiming GPIOs can change the pinmux state, which confuses the
+        * pinctrl since pinctrl's idea of the current setting is unaffected
+        * by the pinmux change caused by claiming the GPIO. Work around that
+        * by switching pinctrl to the GPIO state here. We do it this way to
+        * avoid glitching the I2C bus.
+        */
+       pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery);
+
+       return pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default);
 }
 
 static int i2c_pxa_probe(struct platform_device *dev)
index a8c89df1a997866c31238d94bb5cd26c5acf8dcb..9a7a74239eabb7cd3d2a3a077316d54833ec18cb 100644 (file)
@@ -2379,12 +2379,12 @@ retry_baser:
                break;
        }
 
+       if (!shr)
+               gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
+
        its_write_baser(its, baser, val);
        tmp = baser->val;
 
-       if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE)
-               tmp &= ~GITS_BASER_SHAREABILITY_MASK;
-
        if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
                /*
                 * Shareability didn't stick. Just use
@@ -2394,10 +2394,9 @@ retry_baser:
                 * non-cacheable as well.
                 */
                shr = tmp & GITS_BASER_SHAREABILITY_MASK;
-               if (!shr) {
+               if (!shr)
                        cache = GITS_BASER_nC;
-                       gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
-               }
+
                goto retry_baser;
        }
 
@@ -2609,6 +2608,11 @@ static int its_alloc_tables(struct its_node *its)
                /* erratum 24313: ignore memory access type */
                cache = GITS_BASER_nCnB;
 
+       if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) {
+               cache = GITS_BASER_nC;
+               shr = 0;
+       }
+
        for (i = 0; i < GITS_BASER_NR_REGS; i++) {
                struct its_baser *baser = its->tables + i;
                u64 val = its_read_baser(its, baser);
index 974b84f6bd6af7ab0003d507f7dbcae62d641767..ba1be15cfd8ea36c1426c4db541346665d54d4e1 100644 (file)
@@ -75,19 +75,6 @@ static ssize_t max_brightness_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(max_brightness);
 
-static ssize_t color_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       const char *color_text = "invalid";
-       struct led_classdev *led_cdev = dev_get_drvdata(dev);
-
-       if (led_cdev->color < LED_COLOR_ID_MAX)
-               color_text = led_colors[led_cdev->color];
-
-       return sysfs_emit(buf, "%s\n", color_text);
-}
-static DEVICE_ATTR_RO(color);
-
 #ifdef CONFIG_LEDS_TRIGGERS
 static BIN_ATTR(trigger, 0644, led_trigger_read, led_trigger_write, 0);
 static struct bin_attribute *led_trigger_bin_attrs[] = {
@@ -102,7 +89,6 @@ static const struct attribute_group led_trigger_group = {
 static struct attribute *led_class_attrs[] = {
        &dev_attr_brightness.attr,
        &dev_attr_max_brightness.attr,
-       &dev_attr_color.attr,
        NULL,
 };
 
index 26159679121824345f842e6c297b2a36b4db437b..196cdacce38f253ffee057da72221b017b94565b 100644 (file)
@@ -293,16 +293,16 @@ static void btree_complete_write(struct btree *b, struct btree_write *w)
        w->journal      = NULL;
 }
 
-static void btree_node_write_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_write_unlock)
 {
-       struct btree *b = container_of(cl, struct btree, io);
+       closure_type(b, struct btree, io);
 
        up(&b->io_mutex);
 }
 
-static void __btree_node_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(__btree_node_write_done)
 {
-       struct btree *b = container_of(cl, struct btree, io);
+       closure_type(b, struct btree, io);
        struct btree_write *w = btree_prev_write(b);
 
        bch_bbio_free(b->bio, b->c);
@@ -315,12 +315,12 @@ static void __btree_node_write_done(struct closure *cl)
        closure_return_with_destructor(cl, btree_node_write_unlock);
 }
 
-static void btree_node_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_write_done)
 {
-       struct btree *b = container_of(cl, struct btree, io);
+       closure_type(b, struct btree, io);
 
        bio_free_pages(b->bio);
-       __btree_node_write_done(cl);
+       __btree_node_write_done(&cl->work);
 }
 
 static void btree_node_write_endio(struct bio *bio)
index c182c21de2e8199d1d57817e27e149e0d772d2b7..7ff14bd2feb8bba0215b4075f34e8f97940c7bc9 100644 (file)
@@ -723,11 +723,11 @@ static void journal_write_endio(struct bio *bio)
        closure_put(&w->c->journal.io);
 }
 
-static void journal_write(struct closure *cl);
+static CLOSURE_CALLBACK(journal_write);
 
-static void journal_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_done)
 {
-       struct journal *j = container_of(cl, struct journal, io);
+       closure_type(j, struct journal, io);
        struct journal_write *w = (j->cur == j->w)
                ? &j->w[1]
                : &j->w[0];
@@ -736,19 +736,19 @@ static void journal_write_done(struct closure *cl)
        continue_at_nobarrier(cl, journal_write, bch_journal_wq);
 }
 
-static void journal_write_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_unlock)
        __releases(&c->journal.lock)
 {
-       struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+       closure_type(c, struct cache_set, journal.io);
 
        c->journal.io_in_flight = 0;
        spin_unlock(&c->journal.lock);
 }
 
-static void journal_write_unlocked(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_unlocked)
        __releases(c->journal.lock)
 {
-       struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+       closure_type(c, struct cache_set, journal.io);
        struct cache *ca = c->cache;
        struct journal_write *w = c->journal.cur;
        struct bkey *k = &c->journal.key;
@@ -823,12 +823,12 @@ static void journal_write_unlocked(struct closure *cl)
        continue_at(cl, journal_write_done, NULL);
 }
 
-static void journal_write(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write)
 {
-       struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+       closure_type(c, struct cache_set, journal.io);
 
        spin_lock(&c->journal.lock);
-       journal_write_unlocked(cl);
+       journal_write_unlocked(&cl->work);
 }
 
 static void journal_try_write(struct cache_set *c)
index 9f32901fdad10243857f7301a560723a7f54802c..ebd500bdf0b2fb2b3562bd491b2a8b61ea992e2a 100644 (file)
@@ -35,16 +35,16 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
 
 /* Moving GC - IO loop */
 
-static void moving_io_destructor(struct closure *cl)
+static CLOSURE_CALLBACK(moving_io_destructor)
 {
-       struct moving_io *io = container_of(cl, struct moving_io, cl);
+       closure_type(io, struct moving_io, cl);
 
        kfree(io);
 }
 
-static void write_moving_finish(struct closure *cl)
+static CLOSURE_CALLBACK(write_moving_finish)
 {
-       struct moving_io *io = container_of(cl, struct moving_io, cl);
+       closure_type(io, struct moving_io, cl);
        struct bio *bio = &io->bio.bio;
 
        bio_free_pages(bio);
@@ -89,9 +89,9 @@ static void moving_init(struct moving_io *io)
        bch_bio_map(bio, NULL);
 }
 
-static void write_moving(struct closure *cl)
+static CLOSURE_CALLBACK(write_moving)
 {
-       struct moving_io *io = container_of(cl, struct moving_io, cl);
+       closure_type(io, struct moving_io, cl);
        struct data_insert_op *op = &io->op;
 
        if (!op->status) {
@@ -113,9 +113,9 @@ static void write_moving(struct closure *cl)
        continue_at(cl, write_moving_finish, op->wq);
 }
 
-static void read_moving_submit(struct closure *cl)
+static CLOSURE_CALLBACK(read_moving_submit)
 {
-       struct moving_io *io = container_of(cl, struct moving_io, cl);
+       closure_type(io, struct moving_io, cl);
        struct bio *bio = &io->bio.bio;
 
        bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
index a9b1f3896249b3da67587076cbf816306709a99f..83d112bd2b1c0e336d21cfbd8f50eb716a2a4c4b 100644 (file)
@@ -25,7 +25,7 @@
 
 struct kmem_cache *bch_search_cache;
 
-static void bch_data_insert_start(struct closure *cl);
+static CLOSURE_CALLBACK(bch_data_insert_start);
 
 static unsigned int cache_mode(struct cached_dev *dc)
 {
@@ -55,9 +55,9 @@ static void bio_csum(struct bio *bio, struct bkey *k)
 
 /* Insert data into cache */
 
-static void bch_data_insert_keys(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_keys)
 {
-       struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+       closure_type(op, struct data_insert_op, cl);
        atomic_t *journal_ref = NULL;
        struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
        int ret;
@@ -136,9 +136,9 @@ out:
        continue_at(cl, bch_data_insert_keys, op->wq);
 }
 
-static void bch_data_insert_error(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_error)
 {
-       struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+       closure_type(op, struct data_insert_op, cl);
 
        /*
         * Our data write just errored, which means we've got a bunch of keys to
@@ -163,7 +163,7 @@ static void bch_data_insert_error(struct closure *cl)
 
        op->insert_keys.top = dst;
 
-       bch_data_insert_keys(cl);
+       bch_data_insert_keys(&cl->work);
 }
 
 static void bch_data_insert_endio(struct bio *bio)
@@ -184,9 +184,9 @@ static void bch_data_insert_endio(struct bio *bio)
        bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
 }
 
-static void bch_data_insert_start(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_start)
 {
-       struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+       closure_type(op, struct data_insert_op, cl);
        struct bio *bio = op->bio, *n;
 
        if (op->bypass)
@@ -305,16 +305,16 @@ err:
  * If op->bypass is true, instead of inserting the data it invalidates the
  * region of the cache represented by op->bio and op->inode.
  */
-void bch_data_insert(struct closure *cl)
+CLOSURE_CALLBACK(bch_data_insert)
 {
-       struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+       closure_type(op, struct data_insert_op, cl);
 
        trace_bcache_write(op->c, op->inode, op->bio,
                           op->writeback, op->bypass);
 
        bch_keylist_init(&op->insert_keys);
        bio_get(op->bio);
-       bch_data_insert_start(cl);
+       bch_data_insert_start(&cl->work);
 }
 
 /*
@@ -575,9 +575,9 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
        return n == bio ? MAP_DONE : MAP_CONTINUE;
 }
 
-static void cache_lookup(struct closure *cl)
+static CLOSURE_CALLBACK(cache_lookup)
 {
-       struct search *s = container_of(cl, struct search, iop.cl);
+       closure_type(s, struct search, iop.cl);
        struct bio *bio = &s->bio.bio;
        struct cached_dev *dc;
        int ret;
@@ -698,9 +698,9 @@ static void do_bio_hook(struct search *s,
        bio_cnt_set(bio, 3);
 }
 
-static void search_free(struct closure *cl)
+static CLOSURE_CALLBACK(search_free)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
 
        atomic_dec(&s->iop.c->search_inflight);
 
@@ -749,20 +749,20 @@ static inline struct search *search_alloc(struct bio *bio,
 
 /* Cached devices */
 
-static void cached_dev_bio_complete(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_bio_complete)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
        struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
        cached_dev_put(dc);
-       search_free(cl);
+       search_free(&cl->work);
 }
 
 /* Process reads */
 
-static void cached_dev_read_error_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_error_done)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
 
        if (s->iop.replace_collision)
                bch_mark_cache_miss_collision(s->iop.c, s->d);
@@ -770,12 +770,12 @@ static void cached_dev_read_error_done(struct closure *cl)
        if (s->iop.bio)
                bio_free_pages(s->iop.bio);
 
-       cached_dev_bio_complete(cl);
+       cached_dev_bio_complete(&cl->work);
 }
 
-static void cached_dev_read_error(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_error)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
        struct bio *bio = &s->bio.bio;
 
        /*
@@ -801,9 +801,9 @@ static void cached_dev_read_error(struct closure *cl)
        continue_at(cl, cached_dev_read_error_done, NULL);
 }
 
-static void cached_dev_cache_miss_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_cache_miss_done)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
        struct bcache_device *d = s->d;
 
        if (s->iop.replace_collision)
@@ -812,13 +812,13 @@ static void cached_dev_cache_miss_done(struct closure *cl)
        if (s->iop.bio)
                bio_free_pages(s->iop.bio);
 
-       cached_dev_bio_complete(cl);
+       cached_dev_bio_complete(&cl->work);
        closure_put(&d->cl);
 }
 
-static void cached_dev_read_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_done)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
        struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
        /*
@@ -858,9 +858,9 @@ static void cached_dev_read_done(struct closure *cl)
        continue_at(cl, cached_dev_cache_miss_done, NULL);
 }
 
-static void cached_dev_read_done_bh(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_done_bh)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
        struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
        bch_mark_cache_accounting(s->iop.c, s->d,
@@ -955,13 +955,13 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s)
 
 /* Process writes */
 
-static void cached_dev_write_complete(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_write_complete)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
        struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
        up_read_non_owner(&dc->writeback_lock);
-       cached_dev_bio_complete(cl);
+       cached_dev_bio_complete(&cl->work);
 }
 
 static void cached_dev_write(struct cached_dev *dc, struct search *s)
@@ -1048,9 +1048,9 @@ insert_data:
        continue_at(cl, cached_dev_write_complete, NULL);
 }
 
-static void cached_dev_nodata(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_nodata)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
        struct bio *bio = &s->bio.bio;
 
        if (s->iop.flush_journal)
@@ -1265,9 +1265,9 @@ static int flash_dev_cache_miss(struct btree *b, struct search *s,
        return MAP_CONTINUE;
 }
 
-static void flash_dev_nodata(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_nodata)
 {
-       struct search *s = container_of(cl, struct search, cl);
+       closure_type(s, struct search, cl);
 
        if (s->iop.flush_journal)
                bch_journal_meta(s->iop.c, cl);
index 38ab4856eaab0dd9f73698db5dd30e012070f0d4..46bbef00aebb9d5e1cb0965b44df266a4908c582 100644 (file)
@@ -34,7 +34,7 @@ struct data_insert_op {
 };
 
 unsigned int bch_get_congested(const struct cache_set *c);
-void bch_data_insert(struct closure *cl);
+CLOSURE_CALLBACK(bch_data_insert);
 
 void bch_cached_dev_request_init(struct cached_dev *dc);
 void cached_dev_submit_bio(struct bio *bio);
index bfe1685dbae5748feafceb1ea7d98572a11e7c76..1402096b8076dbf6ed0c3d259ee923e0b8388de6 100644 (file)
@@ -327,9 +327,9 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
        submit_bio(bio);
 }
 
-static void bch_write_bdev_super_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(bch_write_bdev_super_unlock)
 {
-       struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
+       closure_type(dc, struct cached_dev, sb_write);
 
        up(&dc->sb_write_mutex);
 }
@@ -363,9 +363,9 @@ static void write_super_endio(struct bio *bio)
        closure_put(&ca->set->sb_write);
 }
 
-static void bcache_write_super_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(bcache_write_super_unlock)
 {
-       struct cache_set *c = container_of(cl, struct cache_set, sb_write);
+       closure_type(c, struct cache_set, sb_write);
 
        up(&c->sb_write_mutex);
 }
@@ -407,9 +407,9 @@ static void uuid_endio(struct bio *bio)
        closure_put(cl);
 }
 
-static void uuid_io_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(uuid_io_unlock)
 {
-       struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
+       closure_type(c, struct cache_set, uuid_write);
 
        up(&c->uuid_write_mutex);
 }
@@ -1344,9 +1344,9 @@ void bch_cached_dev_release(struct kobject *kobj)
        module_put(THIS_MODULE);
 }
 
-static void cached_dev_free(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_free)
 {
-       struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+       closure_type(dc, struct cached_dev, disk.cl);
 
        if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
                cancel_writeback_rate_update_dwork(dc);
@@ -1378,9 +1378,9 @@ static void cached_dev_free(struct closure *cl)
        kobject_put(&dc->disk.kobj);
 }
 
-static void cached_dev_flush(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_flush)
 {
-       struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+       closure_type(dc, struct cached_dev, disk.cl);
        struct bcache_device *d = &dc->disk;
 
        mutex_lock(&bch_register_lock);
@@ -1499,9 +1499,9 @@ void bch_flash_dev_release(struct kobject *kobj)
        kfree(d);
 }
 
-static void flash_dev_free(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_free)
 {
-       struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+       closure_type(d, struct bcache_device, cl);
 
        mutex_lock(&bch_register_lock);
        atomic_long_sub(bcache_dev_sectors_dirty(d),
@@ -1512,9 +1512,9 @@ static void flash_dev_free(struct closure *cl)
        kobject_put(&d->kobj);
 }
 
-static void flash_dev_flush(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_flush)
 {
-       struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+       closure_type(d, struct bcache_device, cl);
 
        mutex_lock(&bch_register_lock);
        bcache_device_unlink(d);
@@ -1670,9 +1670,9 @@ void bch_cache_set_release(struct kobject *kobj)
        module_put(THIS_MODULE);
 }
 
-static void cache_set_free(struct closure *cl)
+static CLOSURE_CALLBACK(cache_set_free)
 {
-       struct cache_set *c = container_of(cl, struct cache_set, cl);
+       closure_type(c, struct cache_set, cl);
        struct cache *ca;
 
        debugfs_remove(c->debug);
@@ -1711,9 +1711,9 @@ static void cache_set_free(struct closure *cl)
        kobject_put(&c->kobj);
 }
 
-static void cache_set_flush(struct closure *cl)
+static CLOSURE_CALLBACK(cache_set_flush)
 {
-       struct cache_set *c = container_of(cl, struct cache_set, caching);
+       closure_type(c, struct cache_set, caching);
        struct cache *ca = c->cache;
        struct btree *b;
 
@@ -1808,9 +1808,9 @@ static void conditional_stop_bcache_device(struct cache_set *c,
        }
 }
 
-static void __cache_set_unregister(struct closure *cl)
+static CLOSURE_CALLBACK(__cache_set_unregister)
 {
-       struct cache_set *c = container_of(cl, struct cache_set, caching);
+       closure_type(c, struct cache_set, caching);
        struct cached_dev *dc;
        struct bcache_device *d;
        size_t i;
index 3accfdaee6b192956fa760bf2ec38a8039d0b88d..8827a6f130ad7fbc58ae8e41ec833f6c1361f7b6 100644 (file)
@@ -341,16 +341,16 @@ static void dirty_init(struct keybuf_key *w)
        bch_bio_map(bio, NULL);
 }
 
-static void dirty_io_destructor(struct closure *cl)
+static CLOSURE_CALLBACK(dirty_io_destructor)
 {
-       struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+       closure_type(io, struct dirty_io, cl);
 
        kfree(io);
 }
 
-static void write_dirty_finish(struct closure *cl)
+static CLOSURE_CALLBACK(write_dirty_finish)
 {
-       struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+       closure_type(io, struct dirty_io, cl);
        struct keybuf_key *w = io->bio.bi_private;
        struct cached_dev *dc = io->dc;
 
@@ -400,9 +400,9 @@ static void dirty_endio(struct bio *bio)
        closure_put(&io->cl);
 }
 
-static void write_dirty(struct closure *cl)
+static CLOSURE_CALLBACK(write_dirty)
 {
-       struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+       closure_type(io, struct dirty_io, cl);
        struct keybuf_key *w = io->bio.bi_private;
        struct cached_dev *dc = io->dc;
 
@@ -462,9 +462,9 @@ static void read_dirty_endio(struct bio *bio)
        dirty_endio(bio);
 }
 
-static void read_dirty_submit(struct closure *cl)
+static CLOSURE_CALLBACK(read_dirty_submit)
 {
-       struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+       closure_type(io, struct dirty_io, cl);
 
        closure_bio_submit(io->dc->disk.c, &io->bio, cl);
 
index 62eb27639c9b855c006ad7e13234403ba78b28a3..f03d7dba270c52f1313af3ee079cd977a207b34d 100644 (file)
@@ -254,7 +254,7 @@ enum evict_result {
 
 typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
 
-static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
 {
        unsigned long tested = 0;
        struct list_head *h = lru->cursor;
@@ -295,7 +295,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
 
                h = h->next;
 
-               cond_resched();
+               if (!no_sleep)
+                       cond_resched();
        }
 
        return NULL;
@@ -382,7 +383,10 @@ struct dm_buffer {
  */
 
 struct buffer_tree {
-       struct rw_semaphore lock;
+       union {
+               struct rw_semaphore lock;
+               rwlock_t spinlock;
+       } u;
        struct rb_root root;
 } ____cacheline_aligned_in_smp;
 
@@ -393,9 +397,12 @@ struct dm_buffer_cache {
         * on the locks.
         */
        unsigned int num_locks;
+       bool no_sleep;
        struct buffer_tree trees[];
 };
 
+static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
+
 static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
 {
        return dm_hash_locks_index(block, num_locks);
@@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
 
 static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
 {
-       down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+       if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+               read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+       else
+               down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
 {
-       up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+       if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+               read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+       else
+               up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
 {
-       down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+       if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+               write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+       else
+               down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
 {
-       up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+       if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+               write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+       else
+               up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 /*
@@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool
 
 static void __lh_lock(struct lock_history *lh, unsigned int index)
 {
-       if (lh->write)
-               down_write(&lh->cache->trees[index].lock);
-       else
-               down_read(&lh->cache->trees[index].lock);
+       if (lh->write) {
+               if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+                       write_lock_bh(&lh->cache->trees[index].u.spinlock);
+               else
+                       down_write(&lh->cache->trees[index].u.lock);
+       } else {
+               if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+                       read_lock_bh(&lh->cache->trees[index].u.spinlock);
+               else
+                       down_read(&lh->cache->trees[index].u.lock);
+       }
 }
 
 static void __lh_unlock(struct lock_history *lh, unsigned int index)
 {
-       if (lh->write)
-               up_write(&lh->cache->trees[index].lock);
-       else
-               up_read(&lh->cache->trees[index].lock);
+       if (lh->write) {
+               if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+                       write_unlock_bh(&lh->cache->trees[index].u.spinlock);
+               else
+                       up_write(&lh->cache->trees[index].u.lock);
+       } else {
+               if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+                       read_unlock_bh(&lh->cache->trees[index].u.spinlock);
+               else
+                       up_read(&lh->cache->trees[index].u.lock);
+       }
 }
 
 /*
@@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
        return le_to_buffer(le);
 }
 
-static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
+static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
 {
        unsigned int i;
 
        bc->num_locks = num_locks;
+       bc->no_sleep = no_sleep;
 
        for (i = 0; i < bc->num_locks; i++) {
-               init_rwsem(&bc->trees[i].lock);
+               if (no_sleep)
+                       rwlock_init(&bc->trees[i].u.spinlock);
+               else
+                       init_rwsem(&bc->trees[i].u.lock);
                bc->trees[i].root = RB_ROOT;
        }
 
@@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
        struct lru_entry *le;
        struct dm_buffer *b;
 
-       le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
+       le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
        if (!le)
                return NULL;
 
@@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
        struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};
 
        while (true) {
-               le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
+               le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
                if (!le)
                        break;
 
@@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
 {
        unsigned int i;
 
+       BUG_ON(bc->no_sleep);
        for (i = 0; i < bc->num_locks; i++) {
-               down_write(&bc->trees[i].lock);
+               down_write(&bc->trees[i].u.lock);
                __remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
-               up_write(&bc->trees[i].lock);
+               up_write(&bc->trees[i].u.lock);
        }
 }
 
@@ -979,8 +1017,6 @@ struct dm_bufio_client {
        struct dm_buffer_cache cache; /* must be last member */
 };
 
-static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
-
 /*----------------------------------------------------------------*/
 
 #define dm_bufio_in_request()  (!!current->bio_list)
@@ -1871,7 +1907,8 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
        if (need_submit)
                submit_io(b, REQ_OP_READ, read_endio);
 
-       wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
+       if (nf != NF_GET)       /* we already tested this condition above */
+               wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
        if (b->read_error) {
                int error = blk_status_to_errno(b->read_error);
@@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
                r = -ENOMEM;
                goto bad_client;
        }
-       cache_init(&c->cache, num_locks);
+       cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);
 
        c->bdev = bdev;
        c->block_size = block_size;
index 6de107aff331947910dc3478510a9c4fa05e341d..2ae8560b6a14ad9e6b2313fa17d1bb98a209af5a 100644 (file)
@@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
        unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
        gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
        unsigned int remaining_size;
-       unsigned int order = MAX_ORDER - 1;
+       unsigned int order = MAX_ORDER;
 
 retry:
        if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
index efd510984e25937e2ed5d25d6feb9187edfa62bd..5eabdb06c6498b103f24e7316e3de2587f6bca45 100644 (file)
@@ -33,7 +33,7 @@ struct delay_c {
        struct work_struct flush_expired_bios;
        struct list_head delayed_bios;
        struct task_struct *worker;
-       atomic_t may_delay;
+       bool may_delay;
 
        struct delay_class read;
        struct delay_class write;
@@ -73,39 +73,6 @@ static inline bool delay_is_fast(struct delay_c *dc)
        return !!dc->worker;
 }
 
-static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
-{
-       struct dm_delay_info *delayed, *next;
-
-       mutex_lock(&delayed_bios_lock);
-       list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
-               if (flush_all || time_after_eq(jiffies, delayed->expires)) {
-                       struct bio *bio = dm_bio_from_per_bio_data(delayed,
-                                               sizeof(struct dm_delay_info));
-                       list_del(&delayed->list);
-                       dm_submit_bio_remap(bio, NULL);
-                       delayed->class->ops--;
-               }
-       }
-       mutex_unlock(&delayed_bios_lock);
-}
-
-static int flush_worker_fn(void *data)
-{
-       struct delay_c *dc = data;
-
-       while (1) {
-               flush_delayed_bios_fast(dc, false);
-               if (unlikely(list_empty(&dc->delayed_bios))) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       schedule();
-               } else
-                       cond_resched();
-       }
-
-       return 0;
-}
-
 static void flush_bios(struct bio *bio)
 {
        struct bio *n;
@@ -118,36 +85,61 @@ static void flush_bios(struct bio *bio)
        }
 }
 
-static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
+static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
 {
        struct dm_delay_info *delayed, *next;
+       struct bio_list flush_bio_list;
        unsigned long next_expires = 0;
-       unsigned long start_timer = 0;
-       struct bio_list flush_bios = { };
+       bool start_timer = false;
+       bio_list_init(&flush_bio_list);
 
        mutex_lock(&delayed_bios_lock);
        list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+               cond_resched();
                if (flush_all || time_after_eq(jiffies, delayed->expires)) {
                        struct bio *bio = dm_bio_from_per_bio_data(delayed,
                                                sizeof(struct dm_delay_info));
                        list_del(&delayed->list);
-                       bio_list_add(&flush_bios, bio);
+                       bio_list_add(&flush_bio_list, bio);
                        delayed->class->ops--;
                        continue;
                }
 
-               if (!start_timer) {
-                       start_timer = 1;
-                       next_expires = delayed->expires;
-               } else
-                       next_expires = min(next_expires, delayed->expires);
+               if (!delay_is_fast(dc)) {
+                       if (!start_timer) {
+                               start_timer = true;
+                               next_expires = delayed->expires;
+                       } else {
+                               next_expires = min(next_expires, delayed->expires);
+                       }
+               }
        }
        mutex_unlock(&delayed_bios_lock);
 
        if (start_timer)
                queue_timeout(dc, next_expires);
 
-       return bio_list_get(&flush_bios);
+       flush_bios(bio_list_get(&flush_bio_list));
+}
+
+static int flush_worker_fn(void *data)
+{
+       struct delay_c *dc = data;
+
+       while (!kthread_should_stop()) {
+               flush_delayed_bios(dc, false);
+               mutex_lock(&delayed_bios_lock);
+               if (unlikely(list_empty(&dc->delayed_bios))) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       mutex_unlock(&delayed_bios_lock);
+                       schedule();
+               } else {
+                       mutex_unlock(&delayed_bios_lock);
+                       cond_resched();
+               }
+       }
+
+       return 0;
 }
 
 static void flush_expired_bios(struct work_struct *work)
@@ -155,10 +147,7 @@ static void flush_expired_bios(struct work_struct *work)
        struct delay_c *dc;
 
        dc = container_of(work, struct delay_c, flush_expired_bios);
-       if (delay_is_fast(dc))
-               flush_delayed_bios_fast(dc, false);
-       else
-               flush_bios(flush_delayed_bios(dc, false));
+       flush_delayed_bios(dc, false);
 }
 
 static void delay_dtr(struct dm_target *ti)
@@ -177,8 +166,7 @@ static void delay_dtr(struct dm_target *ti)
        if (dc->worker)
                kthread_stop(dc->worker);
 
-       if (!delay_is_fast(dc))
-               mutex_destroy(&dc->timer_lock);
+       mutex_destroy(&dc->timer_lock);
 
        kfree(dc);
 }
@@ -236,7 +224,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
        ti->private = dc;
        INIT_LIST_HEAD(&dc->delayed_bios);
-       atomic_set(&dc->may_delay, 1);
+       mutex_init(&dc->timer_lock);
+       dc->may_delay = true;
        dc->argc = argc;
 
        ret = delay_class_ctr(ti, &dc->read, argv);
@@ -282,12 +271,12 @@ out:
                                            "dm-delay-flush-worker");
                if (IS_ERR(dc->worker)) {
                        ret = PTR_ERR(dc->worker);
+                       dc->worker = NULL;
                        goto bad;
                }
        } else {
                timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
                INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
-               mutex_init(&dc->timer_lock);
                dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
                if (!dc->kdelayd_wq) {
                        ret = -EINVAL;
@@ -312,7 +301,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
        struct dm_delay_info *delayed;
        unsigned long expires = 0;
 
-       if (!c->delay || !atomic_read(&dc->may_delay))
+       if (!c->delay)
                return DM_MAPIO_REMAPPED;
 
        delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
@@ -321,6 +310,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
        delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
 
        mutex_lock(&delayed_bios_lock);
+       if (unlikely(!dc->may_delay)) {
+               mutex_unlock(&delayed_bios_lock);
+               return DM_MAPIO_REMAPPED;
+       }
        c->ops++;
        list_add_tail(&delayed->list, &dc->delayed_bios);
        mutex_unlock(&delayed_bios_lock);
@@ -337,21 +330,20 @@ static void delay_presuspend(struct dm_target *ti)
 {
        struct delay_c *dc = ti->private;
 
-       atomic_set(&dc->may_delay, 0);
+       mutex_lock(&delayed_bios_lock);
+       dc->may_delay = false;
+       mutex_unlock(&delayed_bios_lock);
 
-       if (delay_is_fast(dc))
-               flush_delayed_bios_fast(dc, true);
-       else {
+       if (!delay_is_fast(dc))
                del_timer_sync(&dc->delay_timer);
-               flush_bios(flush_delayed_bios(dc, true));
-       }
+       flush_delayed_bios(dc, true);
 }
 
 static void delay_resume(struct dm_target *ti)
 {
        struct delay_c *dc = ti->private;
 
-       atomic_set(&dc->may_delay, 1);
+       dc->may_delay = true;
 }
 
 static int delay_map(struct dm_target *ti, struct bio *bio)
index 120153e44ae0d374b011407f209968c4788c21d9..f57fb821528d7ef60639397772721bb5800f0c0d 100644 (file)
@@ -434,7 +434,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b
 
        remaining_size = size;
 
-       order = MAX_ORDER - 1;
+       order = MAX_ORDER;
        while (remaining_size) {
                struct page *pages;
                unsigned size_to_add, to_copy;
index 3ef9f018da60ce7f96dc932b028bc03bc0f5eca2..b475200d8586a6cb561419017606b70ccb5eae0a 100644 (file)
@@ -24,7 +24,8 @@ bool verity_fec_is_enabled(struct dm_verity *v)
  */
 static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io)
 {
-       return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io);
+       return (struct dm_verity_fec_io *)
+               ((char *)io + io->v->ti->per_io_data_size - sizeof(struct dm_verity_fec_io));
 }
 
 /*
@@ -185,7 +186,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
 {
        if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
                                 data, 1 << v->data_dev_block_bits,
-                                verity_io_real_digest(v, io))))
+                                verity_io_real_digest(v, io), true)))
                return 0;
 
        return memcmp(verity_io_real_digest(v, io), want_digest,
@@ -386,7 +387,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
        /* Always re-validate the corrected block against the expected hash */
        r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
                        1 << v->data_dev_block_bits,
-                       verity_io_real_digest(v, io));
+                       verity_io_real_digest(v, io), true);
        if (unlikely(r < 0))
                return r;
 
index 26adcfea030229b9b2b6a91667b56c0c69646d3a..14e58ae705218f71923b99bdfc1d195e6a45e658 100644 (file)
@@ -135,20 +135,21 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
  * Wrapper for crypto_ahash_init, which handles verity salting.
  */
 static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
-                               struct crypto_wait *wait)
+                               struct crypto_wait *wait, bool may_sleep)
 {
        int r;
 
        ahash_request_set_tfm(req, v->tfm);
-       ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
-                                       CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                       crypto_req_done, (void *)wait);
+       ahash_request_set_callback(req,
+               may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
+               crypto_req_done, (void *)wait);
        crypto_init_wait(wait);
 
        r = crypto_wait_req(crypto_ahash_init(req), wait);
 
        if (unlikely(r < 0)) {
-               DMERR("crypto_ahash_init failed: %d", r);
+               if (r != -ENOMEM)
+                       DMERR("crypto_ahash_init failed: %d", r);
                return r;
        }
 
@@ -179,12 +180,12 @@ out:
 }
 
 int verity_hash(struct dm_verity *v, struct ahash_request *req,
-               const u8 *data, size_t len, u8 *digest)
+               const u8 *data, size_t len, u8 *digest, bool may_sleep)
 {
        int r;
        struct crypto_wait wait;
 
-       r = verity_hash_init(v, req, &wait);
+       r = verity_hash_init(v, req, &wait, may_sleep);
        if (unlikely(r < 0))
                goto out;
 
@@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 
                r = verity_hash(v, verity_io_hash_req(v, io),
                                data, 1 << v->hash_dev_block_bits,
-                               verity_io_real_digest(v, io));
+                               verity_io_real_digest(v, io), !io->in_tasklet);
                if (unlikely(r < 0))
                        goto release_ret_r;
 
@@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io)
                        continue;
                }
 
-               r = verity_hash_init(v, req, &wait);
+               r = verity_hash_init(v, req, &wait, !io->in_tasklet);
                if (unlikely(r < 0))
                        return r;
 
@@ -641,7 +642,6 @@ static void verity_work(struct work_struct *w)
 
        io->in_tasklet = false;
 
-       verity_fec_init_io(io);
        verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
 }
 
@@ -652,7 +652,7 @@ static void verity_tasklet(unsigned long data)
 
        io->in_tasklet = true;
        err = verity_verify_io(io);
-       if (err == -EAGAIN) {
+       if (err == -EAGAIN || err == -ENOMEM) {
                /* fallback to retrying with work-queue */
                INIT_WORK(&io->work, verity_work);
                queue_work(io->v->verify_wq, &io->work);
@@ -667,7 +667,9 @@ static void verity_end_io(struct bio *bio)
        struct dm_verity_io *io = bio->bi_private;
 
        if (bio->bi_status &&
-           (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) {
+           (!verity_fec_is_enabled(io->v) ||
+            verity_is_system_shutting_down() ||
+            (bio->bi_opf & REQ_RAHEAD))) {
                verity_finish_io(io, bio->bi_status);
                return;
        }
@@ -791,6 +793,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
        bio->bi_private = io;
        io->iter = bio->bi_iter;
 
+       verity_fec_init_io(io);
+
        verity_submit_prefetch(v, io);
 
        submit_bio_noacct(bio);
@@ -1033,7 +1037,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
                goto out;
 
        r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
-                       v->zero_digest);
+                       v->zero_digest, true);
 
 out:
        kfree(req);
index 2f555b4203679a454fad28323a22c3741108fc34..f9d522c870e61665d87271f66c690138db42108f 100644 (file)
@@ -115,12 +115,6 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v,
        return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size;
 }
 
-static inline u8 *verity_io_digest_end(struct dm_verity *v,
-                                      struct dm_verity_io *io)
-{
-       return verity_io_want_digest(v, io) + v->digest_size;
-}
-
 extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
                               struct bvec_iter *iter,
                               int (*process)(struct dm_verity *v,
@@ -128,7 +122,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
                                              u8 *data, size_t len));
 
 extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
-                      const u8 *data, size_t len, u8 *digest);
+                      const u8 *data, size_t len, u8 *digest, bool may_sleep);
 
 extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
                                 sector_t block, u8 *digest, bool *is_zero);
index 13fad15a434c42843fb7de148a608b44bf824d88..f2a05a1c8ffab4a74f2096595ed48a8ad9898db3 100644 (file)
@@ -2,6 +2,7 @@
 config VIDEO_MGB4
        tristate "Digiteq Automotive MGB4 support"
        depends on VIDEO_DEV && PCI && I2C && DMADEVICES && SPI && MTD && IIO
+       depends on COMMON_CLK
        select VIDEOBUF2_DMA_SG
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
index 3efb33fbf40ccb0a316d57ac710d740027ad290f..5bfb8a06202e99f698408c7bce279ffb938aa7b8 100644 (file)
 
 #define MGB4_USER_IRQS 16
 
+#define DIGITEQ_VID 0x1ed8
+#define T100_DID    0x0101
+#define T200_DID    0x0201
+
 ATTRIBUTE_GROUPS(mgb4_pci);
 
 static int flashid;
@@ -151,7 +155,7 @@ static struct spi_master *get_spi_adap(struct platform_device *pdev)
        return dev ? container_of(dev, struct spi_master, dev) : NULL;
 }
 
-static int init_spi(struct mgb4_dev *mgbdev)
+static int init_spi(struct mgb4_dev *mgbdev, u32 devid)
 {
        struct resource spi_resources[] = {
                {
@@ -213,8 +217,13 @@ static int init_spi(struct mgb4_dev *mgbdev)
        snprintf(mgbdev->fw_part_name, sizeof(mgbdev->fw_part_name),
                 "mgb4-fw.%d", flashid);
        mgbdev->partitions[0].name = mgbdev->fw_part_name;
-       mgbdev->partitions[0].size = 0x400000;
-       mgbdev->partitions[0].offset = 0x400000;
+       if (devid == T200_DID) {
+               mgbdev->partitions[0].size = 0x950000;
+               mgbdev->partitions[0].offset = 0x1000000;
+       } else {
+               mgbdev->partitions[0].size = 0x400000;
+               mgbdev->partitions[0].offset = 0x400000;
+       }
        mgbdev->partitions[0].mask_flags = 0;
 
        snprintf(mgbdev->data_part_name, sizeof(mgbdev->data_part_name),
@@ -551,7 +560,7 @@ static int mgb4_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_video_regs;
 
        /* SPI FLASH */
-       rv = init_spi(mgbdev);
+       rv = init_spi(mgbdev, id->device);
        if (rv < 0)
                goto err_cmt_regs;
 
@@ -666,7 +675,8 @@ static void mgb4_remove(struct pci_dev *pdev)
 }
 
 static const struct pci_device_id mgb4_pci_ids[] = {
-       { PCI_DEVICE(0x1ed8, 0x0101), },
+       { PCI_DEVICE(DIGITEQ_VID, T100_DID), },
+       { PCI_DEVICE(DIGITEQ_VID, T200_DID), },
        { 0, }
 };
 MODULE_DEVICE_TABLE(pci, mgb4_pci_ids);
index f8093ba9539e9328bf240b659e77e0cf6b0acd6c..68d05243c3ee5550df25bfecd2e264e9d5e42946 100644 (file)
@@ -373,7 +373,7 @@ int vsp1_pipeline_stop(struct vsp1_pipeline *pipe)
                           (7 << VI6_DPR_SMPPT_TGW_SHIFT) |
                           (VI6_DPR_NODE_UNUSED << VI6_DPR_SMPPT_PT_SHIFT));
 
-       v4l2_subdev_call(&pipe->output->entity.subdev, video, s_stream, 0);
+       vsp1_wpf_stop(pipe->output);
 
        return ret;
 }
index 3b17f5fa4067fb3b9fcfa842403ff5a045c88fd4..ea12c3f12c92a35d811ff2334964bd212a945682 100644 (file)
@@ -43,14 +43,6 @@ static inline void vsp1_rpf_write(struct vsp1_rwpf *rpf,
                               data);
 }
 
-/* -----------------------------------------------------------------------------
- * V4L2 Subdevice Operations
- */
-
-static const struct v4l2_subdev_ops rpf_ops = {
-       .pad    = &vsp1_rwpf_pad_ops,
-};
-
 /* -----------------------------------------------------------------------------
  * VSP1 Entity Operations
  */
@@ -411,7 +403,7 @@ struct vsp1_rwpf *vsp1_rpf_create(struct vsp1_device *vsp1, unsigned int index)
        rpf->entity.index = index;
 
        sprintf(name, "rpf.%u", index);
-       ret = vsp1_entity_init(vsp1, &rpf->entity, name, 2, &rpf_ops,
+       ret = vsp1_entity_init(vsp1, &rpf->entity, name, 2, &vsp1_rwpf_subdev_ops,
                               MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER);
        if (ret < 0)
                return ERR_PTR(ret);
index 22a82d218152fd68305a9cd49717f0f753dce285..e0f87c8103ca56929603cb063e8d79f9a1622c01 100644 (file)
@@ -24,7 +24,7 @@ struct v4l2_rect *vsp1_rwpf_get_crop(struct vsp1_rwpf *rwpf,
 }
 
 /* -----------------------------------------------------------------------------
- * V4L2 Subdevice Pad Operations
+ * V4L2 Subdevice Operations
  */
 
 static int vsp1_rwpf_enum_mbus_code(struct v4l2_subdev *subdev,
@@ -243,7 +243,7 @@ done:
        return ret;
 }
 
-const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops = {
+static const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops = {
        .init_cfg = vsp1_entity_init_cfg,
        .enum_mbus_code = vsp1_rwpf_enum_mbus_code,
        .enum_frame_size = vsp1_rwpf_enum_frame_size,
@@ -253,6 +253,10 @@ const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops = {
        .set_selection = vsp1_rwpf_set_selection,
 };
 
+const struct v4l2_subdev_ops vsp1_rwpf_subdev_ops = {
+       .pad    = &vsp1_rwpf_pad_ops,
+};
+
 /* -----------------------------------------------------------------------------
  * Controls
  */
index eac5c04c22393487d3f13d986393d789c0db4dff..e0d212c70b2f99bfd693a1ea169621a282c73237 100644 (file)
@@ -79,9 +79,11 @@ static inline struct vsp1_rwpf *entity_to_rwpf(struct vsp1_entity *entity)
 struct vsp1_rwpf *vsp1_rpf_create(struct vsp1_device *vsp1, unsigned int index);
 struct vsp1_rwpf *vsp1_wpf_create(struct vsp1_device *vsp1, unsigned int index);
 
+void vsp1_wpf_stop(struct vsp1_rwpf *wpf);
+
 int vsp1_rwpf_init_ctrls(struct vsp1_rwpf *rwpf, unsigned int ncontrols);
 
-extern const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops;
+extern const struct v4l2_subdev_ops vsp1_rwpf_subdev_ops;
 
 struct v4l2_rect *vsp1_rwpf_get_crop(struct vsp1_rwpf *rwpf,
                                     struct v4l2_subdev_state *sd_state);
index d0074ca009209cdc70f3fac6a2df50eff5b02a8a..cab4445eca696e1a3617b0921f81367bacd2b9b8 100644 (file)
@@ -186,17 +186,13 @@ static int wpf_init_controls(struct vsp1_rwpf *wpf)
 }
 
 /* -----------------------------------------------------------------------------
- * V4L2 Subdevice Core Operations
+ * VSP1 Entity Operations
  */
 
-static int wpf_s_stream(struct v4l2_subdev *subdev, int enable)
+void vsp1_wpf_stop(struct vsp1_rwpf *wpf)
 {
-       struct vsp1_rwpf *wpf = to_rwpf(subdev);
        struct vsp1_device *vsp1 = wpf->entity.vsp1;
 
-       if (enable)
-               return 0;
-
        /*
         * Write to registers directly when stopping the stream as there will be
         * no pipeline run to apply the display list.
@@ -204,27 +200,8 @@ static int wpf_s_stream(struct v4l2_subdev *subdev, int enable)
        vsp1_write(vsp1, VI6_WPF_IRQ_ENB(wpf->entity.index), 0);
        vsp1_write(vsp1, wpf->entity.index * VI6_WPF_OFFSET +
                   VI6_WPF_SRCRPF, 0);
-
-       return 0;
 }
 
-/* -----------------------------------------------------------------------------
- * V4L2 Subdevice Operations
- */
-
-static const struct v4l2_subdev_video_ops wpf_video_ops = {
-       .s_stream = wpf_s_stream,
-};
-
-static const struct v4l2_subdev_ops wpf_ops = {
-       .video  = &wpf_video_ops,
-       .pad    = &vsp1_rwpf_pad_ops,
-};
-
-/* -----------------------------------------------------------------------------
- * VSP1 Entity Operations
- */
-
 static void vsp1_wpf_destroy(struct vsp1_entity *entity)
 {
        struct vsp1_rwpf *wpf = entity_to_rwpf(entity);
@@ -583,7 +560,7 @@ struct vsp1_rwpf *vsp1_wpf_create(struct vsp1_device *vsp1, unsigned int index)
        wpf->entity.index = index;
 
        sprintf(name, "wpf.%u", index);
-       ret = vsp1_entity_init(vsp1, &wpf->entity, name, 2, &wpf_ops,
+       ret = vsp1_entity_init(vsp1, &wpf->entity, name, 2, &vsp1_rwpf_subdev_ops,
                               MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER);
        if (ret < 0)
                return ERR_PTR(ret);
index 152dfe593c43a55c2bbd3468a62b8c2504661962..f9a5cffa64b1fdae17aed6367104afedbf3c7bd6 100644 (file)
@@ -1482,6 +1482,8 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
                        blk_mq_requeue_request(req, true);
                else
                        __blk_mq_end_request(req, BLK_STS_OK);
+       } else if (mq->in_recovery) {
+               blk_mq_requeue_request(req, true);
        } else {
                blk_mq_end_request(req, BLK_STS_OK);
        }
index 3d3e0ca5261481a86069e38261678924524448b6..a8c17b4cd737927bba95eea34ded3e28587b8084 100644 (file)
@@ -551,7 +551,9 @@ int mmc_cqe_recovery(struct mmc_host *host)
        cmd.flags        = MMC_RSP_R1B | MMC_CMD_AC;
        cmd.flags       &= ~MMC_RSP_CRC; /* Ignore CRC */
        cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT;
-       mmc_wait_for_cmd(host, &cmd, 0);
+       mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
+
+       mmc_poll_for_busy(host->card, MMC_CQE_RECOVERY_TIMEOUT, true, MMC_BUSY_IO);
 
        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode       = MMC_CMDQ_TASK_MGMT;
@@ -559,10 +561,13 @@ int mmc_cqe_recovery(struct mmc_host *host)
        cmd.flags        = MMC_RSP_R1B | MMC_CMD_AC;
        cmd.flags       &= ~MMC_RSP_CRC; /* Ignore CRC */
        cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT;
-       err = mmc_wait_for_cmd(host, &cmd, 0);
+       err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
 
        host->cqe_ops->cqe_recovery_finish(host);
 
+       if (err)
+               err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
+
        mmc_retune_release(host);
 
        return err;
index b3d7d6d8d654852dfc38c5ee6a1dadce2c7b6ec7..41e94cd1410980071beaf89d91d7abdd7a90bf4f 100644 (file)
@@ -942,8 +942,8 @@ static bool cqhci_clear_all_tasks(struct mmc_host *mmc, unsigned int timeout)
        ret = cqhci_tasks_cleared(cq_host);
 
        if (!ret)
-               pr_debug("%s: cqhci: Failed to clear tasks\n",
-                        mmc_hostname(mmc));
+               pr_warn("%s: cqhci: Failed to clear tasks\n",
+                       mmc_hostname(mmc));
 
        return ret;
 }
@@ -976,7 +976,7 @@ static bool cqhci_halt(struct mmc_host *mmc, unsigned int timeout)
        ret = cqhci_halted(cq_host);
 
        if (!ret)
-               pr_debug("%s: cqhci: Failed to halt\n", mmc_hostname(mmc));
+               pr_warn("%s: cqhci: Failed to halt\n", mmc_hostname(mmc));
 
        return ret;
 }
@@ -984,10 +984,10 @@ static bool cqhci_halt(struct mmc_host *mmc, unsigned int timeout)
 /*
  * After halting we expect to be able to use the command line. We interpret the
  * failure to halt to mean the data lines might still be in use (and the upper
- * layers will need to send a STOP command), so we set the timeout based on a
- * generous command timeout.
+ * layers will need to send a STOP command), however failing to halt complicates
+ * the recovery, so set a timeout that would reasonably allow I/O to complete.
  */
-#define CQHCI_START_HALT_TIMEOUT       5
+#define CQHCI_START_HALT_TIMEOUT       500
 
 static void cqhci_recovery_start(struct mmc_host *mmc)
 {
@@ -1075,28 +1075,28 @@ static void cqhci_recovery_finish(struct mmc_host *mmc)
 
        ok = cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT);
 
-       if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT))
-               ok = false;
-
        /*
         * The specification contradicts itself, by saying that tasks cannot be
         * cleared if CQHCI does not halt, but if CQHCI does not halt, it should
         * be disabled/re-enabled, but not to disable before clearing tasks.
         * Have a go anyway.
         */
-       if (!ok) {
-               pr_debug("%s: cqhci: disable / re-enable\n", mmc_hostname(mmc));
-               cqcfg = cqhci_readl(cq_host, CQHCI_CFG);
-               cqcfg &= ~CQHCI_ENABLE;
-               cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
-               cqcfg |= CQHCI_ENABLE;
-               cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
-               /* Be sure that there are no tasks */
-               ok = cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT);
-               if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT))
-                       ok = false;
-               WARN_ON(!ok);
-       }
+       if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT))
+               ok = false;
+
+       /* Disable to make sure tasks really are cleared */
+       cqcfg = cqhci_readl(cq_host, CQHCI_CFG);
+       cqcfg &= ~CQHCI_ENABLE;
+       cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
+
+       cqcfg = cqhci_readl(cq_host, CQHCI_CFG);
+       cqcfg |= CQHCI_ENABLE;
+       cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
+
+       cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT);
+
+       if (!ok)
+               cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT);
 
        cqhci_recover_mrqs(cq_host);
 
index d8a991b349a823487554394a489edb7a2dd85f66..77911a57b12cfc5ceafbf28447fea83e109017a0 100644 (file)
@@ -1189,6 +1189,32 @@ static void gl9763e_hs400_enhanced_strobe(struct mmc_host *mmc,
        sdhci_writel(host, val, SDHCI_GLI_9763E_HS400_ES_REG);
 }
 
+static void gl9763e_set_low_power_negotiation(struct sdhci_pci_slot *slot,
+                                             bool enable)
+{
+       struct pci_dev *pdev = slot->chip->pdev;
+       u32 value;
+
+       pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
+       value &= ~GLI_9763E_VHS_REV;
+       value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_W);
+       pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
+
+       pci_read_config_dword(pdev, PCIE_GLI_9763E_CFG, &value);
+
+       if (enable)
+               value &= ~GLI_9763E_CFG_LPSN_DIS;
+       else
+               value |= GLI_9763E_CFG_LPSN_DIS;
+
+       pci_write_config_dword(pdev, PCIE_GLI_9763E_CFG, value);
+
+       pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
+       value &= ~GLI_9763E_VHS_REV;
+       value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R);
+       pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
+}
+
 static void sdhci_set_gl9763e_signaling(struct sdhci_host *host,
                                        unsigned int timing)
 {
@@ -1297,6 +1323,9 @@ static int gl9763e_add_host(struct sdhci_pci_slot *slot)
        if (ret)
                goto cleanup;
 
+       /* Disable LPM negotiation to avoid entering L1 state. */
+       gl9763e_set_low_power_negotiation(slot, false);
+
        return 0;
 
 cleanup:
@@ -1340,31 +1369,6 @@ static void gli_set_gl9763e(struct sdhci_pci_slot *slot)
 }
 
 #ifdef CONFIG_PM
-static void gl9763e_set_low_power_negotiation(struct sdhci_pci_slot *slot, bool enable)
-{
-       struct pci_dev *pdev = slot->chip->pdev;
-       u32 value;
-
-       pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
-       value &= ~GLI_9763E_VHS_REV;
-       value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_W);
-       pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
-
-       pci_read_config_dword(pdev, PCIE_GLI_9763E_CFG, &value);
-
-       if (enable)
-               value &= ~GLI_9763E_CFG_LPSN_DIS;
-       else
-               value |= GLI_9763E_CFG_LPSN_DIS;
-
-       pci_write_config_dword(pdev, PCIE_GLI_9763E_CFG, value);
-
-       pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
-       value &= ~GLI_9763E_VHS_REV;
-       value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R);
-       pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
-}
-
 static int gl9763e_runtime_suspend(struct sdhci_pci_chip *chip)
 {
        struct sdhci_pci_slot *slot = chip->slots[0];
index 6b84ba27e6ab0d83f5b20154c376c0e38af3c7a4..6b8a57e2d20f0a75e860f59c3e1737bb55211e52 100644 (file)
@@ -416,12 +416,33 @@ static void sdhci_sprd_request_done(struct sdhci_host *host,
        mmc_request_done(host->mmc, mrq);
 }
 
+static void sdhci_sprd_set_power(struct sdhci_host *host, unsigned char mode,
+                                unsigned short vdd)
+{
+       struct mmc_host *mmc = host->mmc;
+
+       switch (mode) {
+       case MMC_POWER_OFF:
+               mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, 0);
+
+               mmc_regulator_disable_vqmmc(mmc);
+               break;
+       case MMC_POWER_ON:
+               mmc_regulator_enable_vqmmc(mmc);
+               break;
+       case MMC_POWER_UP:
+               mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, vdd);
+               break;
+       }
+}
+
 static struct sdhci_ops sdhci_sprd_ops = {
        .read_l = sdhci_sprd_readl,
        .write_l = sdhci_sprd_writel,
        .write_w = sdhci_sprd_writew,
        .write_b = sdhci_sprd_writeb,
        .set_clock = sdhci_sprd_set_clock,
+       .set_power = sdhci_sprd_set_power,
        .get_max_clock = sdhci_sprd_get_max_clock,
        .get_min_clock = sdhci_sprd_get_min_clock,
        .set_bus_width = sdhci_set_bus_width,
@@ -823,6 +844,10 @@ static int sdhci_sprd_probe(struct platform_device *pdev)
        host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 |
                         SDHCI_SUPPORT_DDR50);
 
+       ret = mmc_regulator_get_supply(host->mmc);
+       if (ret)
+               goto pm_runtime_disable;
+
        ret = sdhci_setup_host(host);
        if (ret)
                goto pm_runtime_disable;
index 51d47eda1c873debda6da094377bcb3367a78f6e..8e6cc0e133b7f19afccd3ecf44bea5ceacb393b1 100644 (file)
@@ -1500,6 +1500,10 @@ done:
 static void bond_setup_by_slave(struct net_device *bond_dev,
                                struct net_device *slave_dev)
 {
+       bool was_up = !!(bond_dev->flags & IFF_UP);
+
+       dev_close(bond_dev);
+
        bond_dev->header_ops        = slave_dev->header_ops;
 
        bond_dev->type              = slave_dev->type;
@@ -1514,6 +1518,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
                bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
                bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
        }
+       if (was_up)
+               dev_open(bond_dev, NULL);
 }
 
 /* On bonding slaves other than the currently active slave, suppress
index 42b1acaca33a966fa65b54dab9ab9304c39d7b63..07a22c74fe810ab0e1372db42335dd42aac603f7 100644 (file)
@@ -577,6 +577,18 @@ static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
        config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100;
 }
 
+static void mv88e6351_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+                                      struct phylink_config *config)
+{
+       unsigned long *supported = config->supported_interfaces;
+
+       /* Translate the default cmode */
+       mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+
+       config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+                                  MAC_1000FD;
+}
+
 static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip)
 {
        u16 reg, val;
@@ -3880,7 +3892,8 @@ static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port)
        struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
-       if (chip->info->ops->pcs_ops->pcs_init) {
+       if (chip->info->ops->pcs_ops &&
+           chip->info->ops->pcs_ops->pcs_init) {
                err = chip->info->ops->pcs_ops->pcs_init(chip, port);
                if (err)
                        return err;
@@ -3895,7 +3908,8 @@ static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port)
 
        mv88e6xxx_teardown_devlink_regions_port(ds, port);
 
-       if (chip->info->ops->pcs_ops->pcs_teardown)
+       if (chip->info->ops->pcs_ops &&
+           chip->info->ops->pcs_ops->pcs_teardown)
                chip->info->ops->pcs_ops->pcs_teardown(chip, port);
 }
 
@@ -4340,7 +4354,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .stu_getnext = mv88e6352_g1_stu_getnext,
        .stu_loadpurge = mv88e6352_g1_stu_loadpurge,
-       .phylink_get_caps = mv88e6185_phylink_get_caps,
+       .phylink_get_caps = mv88e6351_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6172_ops = {
@@ -4440,7 +4454,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .stu_getnext = mv88e6352_g1_stu_getnext,
        .stu_loadpurge = mv88e6352_g1_stu_loadpurge,
-       .phylink_get_caps = mv88e6185_phylink_get_caps,
+       .phylink_get_caps = mv88e6351_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6176_ops = {
@@ -5069,7 +5083,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .stu_getnext = mv88e6352_g1_stu_getnext,
        .stu_loadpurge = mv88e6352_g1_stu_loadpurge,
-       .phylink_get_caps = mv88e6185_phylink_get_caps,
+       .phylink_get_caps = mv88e6351_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6351_ops = {
@@ -5117,7 +5131,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
        .stu_loadpurge = mv88e6352_g1_stu_loadpurge,
        .avb_ops = &mv88e6352_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_get_caps = mv88e6185_phylink_get_caps,
+       .phylink_get_caps = mv88e6351_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
index 045fe133f6ee9957479c67b72b61c8887fbc0afe..5beadabc213618314ad42da120da259415eaa7b3 100644 (file)
@@ -146,7 +146,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data)
        }
 
        queue_work(pdsc->wq, &qcq->work);
-       pds_core_intr_mask(&pdsc->intr_ctrl[irq], PDS_CORE_INTR_MASK_CLEAR);
+       pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR);
 
        return IRQ_HANDLED;
 }
index f3a7deda997245bd3c80070889981f35c01dcd28..e35d3e7006bfc1891a0343643910b915f31ba56a 100644 (file)
@@ -15,7 +15,7 @@
 #define PDSC_DRV_DESCRIPTION   "AMD/Pensando Core Driver"
 
 #define PDSC_WATCHDOG_SECS     5
-#define PDSC_QUEUE_NAME_MAX_SZ  32
+#define PDSC_QUEUE_NAME_MAX_SZ  16
 #define PDSC_ADMINQ_MIN_LENGTH 16      /* must be a power of two */
 #define PDSC_NOTIFYQ_LENGTH    64      /* must be a power of two */
 #define PDSC_TEARDOWN_RECOVERY false
index 7c1b965d61a926df45a88e8c941f0806f12923ad..31940b857e0e501d2d4d220a0ed6a0cfd03098c7 100644 (file)
@@ -261,10 +261,14 @@ static int pdsc_identify(struct pdsc *pdsc)
        struct pds_core_drv_identity drv = {};
        size_t sz;
        int err;
+       int n;
 
        drv.drv_type = cpu_to_le32(PDS_DRIVER_LINUX);
-       snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
-                "%s %s", PDS_CORE_DRV_NAME, utsname()->release);
+       /* Catching the return quiets a Wformat-truncation complaint */
+       n = snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
+                    "%s %s", PDS_CORE_DRV_NAME, utsname()->release);
+       if (n > sizeof(drv.driver_ver_str))
+               dev_dbg(pdsc->dev, "release name truncated, don't care\n");
 
        /* Next let's get some info about the device
         * We use the devcmd_lock at this level in order to
index 57f88c8b37defe17fed6e8b9d82578a4f9701dc2..e9948ea5bbcdbaae713390cca46280e55b548956 100644 (file)
@@ -104,7 +104,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
        struct pds_core_fw_list_info fw_list;
        struct pdsc *pdsc = devlink_priv(dl);
        union pds_core_dev_comp comp;
-       char buf[16];
+       char buf[32];
        int listlen;
        int err;
        int i;
index 614c0278419bcf31e8be085300b34acd00847df4..6b73648b3779368f8a01cbc95f8afb88bc07786d 100644 (file)
@@ -682,10 +682,24 @@ static void xgbe_service(struct work_struct *work)
 static void xgbe_service_timer(struct timer_list *t)
 {
        struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer);
+       struct xgbe_channel *channel;
+       unsigned int i;
 
        queue_work(pdata->dev_workqueue, &pdata->service_work);
 
        mod_timer(&pdata->service_timer, jiffies + HZ);
+
+       if (!pdata->tx_usecs)
+               return;
+
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
+               if (!channel->tx_ring || channel->tx_timer_active)
+                       break;
+               channel->tx_timer_active = 1;
+               mod_timer(&channel->tx_timer,
+                         jiffies + usecs_to_jiffies(pdata->tx_usecs));
+       }
 }
 
 static void xgbe_init_timers(struct xgbe_prv_data *pdata)
index 6e83ff59172a36b66d1442dc32b338edfe3d5773..32fab5e7724626f856da6c17ae9df2c92241b0c1 100644 (file)
@@ -314,10 +314,15 @@ static int xgbe_get_link_ksettings(struct net_device *netdev,
 
        cmd->base.phy_address = pdata->phy.address;
 
-       cmd->base.autoneg = pdata->phy.autoneg;
-       cmd->base.speed = pdata->phy.speed;
-       cmd->base.duplex = pdata->phy.duplex;
+       if (netif_carrier_ok(netdev)) {
+               cmd->base.speed = pdata->phy.speed;
+               cmd->base.duplex = pdata->phy.duplex;
+       } else {
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
+       }
 
+       cmd->base.autoneg = pdata->phy.autoneg;
        cmd->base.port = PORT_NONE;
 
        XGBE_LM_COPY(cmd, supported, lks, supported);
index 32d2c6fac65266baee9bc36477f21418b50f67e0..4a2dc705b52801792652c3ba6a9c945b306c3198 100644 (file)
@@ -1193,7 +1193,19 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata)
        if (pdata->phy.duplex != DUPLEX_FULL)
                return -EINVAL;
 
-       xgbe_set_mode(pdata, mode);
+       /* Force the mode change for SFI in Fixed PHY config.
+        * Fixed PHY configs needs PLL to be enabled while doing mode set.
+        * When the SFP module isn't connected during boot, driver assumes
+        * AN is ON and attempts autonegotiation. However, if the connected
+        * SFP comes up in Fixed PHY config, the link will not come up as
+        * PLL isn't enabled while the initial mode set command is issued.
+        * So, force the mode change for SFI in Fixed PHY configuration to
+        * fix link issues.
+        */
+       if (mode == XGBE_MODE_SFI)
+               xgbe_change_mode(pdata, mode);
+       else
+               xgbe_set_mode(pdata, mode);
 
        return 0;
 }
index 1dee27349367e76e9460b4eaeffdd42eb1946b42..48b6191efa56c70cc82ba0a83bb5ee64e6bb3fe2 100644 (file)
@@ -6889,7 +6889,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
                                       desc_idx, *post_ptr);
                drop_it_no_recycle:
                        /* Other statistics kept track of by card. */
-                       tp->rx_dropped++;
+                       tnapi->rx_dropped++;
                        goto next_pkt;
                }
 
@@ -7918,8 +7918,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 
        segs = skb_gso_segment(skb, tp->dev->features &
                                    ~(NETIF_F_TSO | NETIF_F_TSO6));
-       if (IS_ERR(segs) || !segs)
+       if (IS_ERR(segs) || !segs) {
+               tnapi->tx_dropped++;
                goto tg3_tso_bug_end;
+       }
 
        skb_list_walk_safe(segs, seg, next) {
                skb_mark_not_on_list(seg);
@@ -8190,7 +8192,7 @@ dma_error:
 drop:
        dev_kfree_skb_any(skb);
 drop_nofree:
-       tp->tx_dropped++;
+       tnapi->tx_dropped++;
        return NETDEV_TX_OK;
 }
 
@@ -9405,7 +9407,7 @@ static void __tg3_set_rx_mode(struct net_device *);
 /* tp->lock is held. */
 static int tg3_halt(struct tg3 *tp, int kind, bool silent)
 {
-       int err;
+       int err, i;
 
        tg3_stop_fw(tp);
 
@@ -9426,6 +9428,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent)
 
                /* And make sure the next sample is new data */
                memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
+
+               for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) {
+                       struct tg3_napi *tnapi = &tp->napi[i];
+
+                       tnapi->rx_dropped = 0;
+                       tnapi->tx_dropped = 0;
+               }
        }
 
        return err;
@@ -11975,6 +11984,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
 {
        struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev;
        struct tg3_hw_stats *hw_stats = tp->hw_stats;
+       unsigned long rx_dropped;
+       unsigned long tx_dropped;
+       int i;
 
        stats->rx_packets = old_stats->rx_packets +
                get_stat64(&hw_stats->rx_ucast_packets) +
@@ -12021,8 +12033,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
        stats->rx_missed_errors = old_stats->rx_missed_errors +
                get_stat64(&hw_stats->rx_discards);
 
-       stats->rx_dropped = tp->rx_dropped;
-       stats->tx_dropped = tp->tx_dropped;
+       /* Aggregate per-queue counters. The per-queue counters are updated
+        * by a single writer, race-free. The result computed by this loop
+        * might not be 100% accurate (counters can be updated in the middle of
+        * the loop) but the next tg3_get_nstats() will recompute the current
+        * value so it is acceptable.
+        *
+        * Note that these counters wrap around at 4G on 32bit machines.
+        */
+       rx_dropped = (unsigned long)(old_stats->rx_dropped);
+       tx_dropped = (unsigned long)(old_stats->tx_dropped);
+
+       for (i = 0; i < tp->irq_cnt; i++) {
+               struct tg3_napi *tnapi = &tp->napi[i];
+
+               rx_dropped += tnapi->rx_dropped;
+               tx_dropped += tnapi->tx_dropped;
+       }
+
+       stats->rx_dropped = rx_dropped;
+       stats->tx_dropped = tx_dropped;
 }
 
 static int tg3_get_regs_len(struct net_device *dev)
index ae5c01bd111043d0ba76cfa143a2157e02966a4a..5016475e50054d627cfb541ce18d9808450a200f 100644 (file)
@@ -3018,6 +3018,7 @@ struct tg3_napi {
        u16                             *rx_rcb_prod_idx;
        struct tg3_rx_prodring_set      prodring;
        struct tg3_rx_buffer_desc       *rx_rcb;
+       unsigned long                   rx_dropped;
 
        u32                             tx_prod ____cacheline_aligned;
        u32                             tx_cons;
@@ -3026,6 +3027,7 @@ struct tg3_napi {
        u32                             prodmbox;
        struct tg3_tx_buffer_desc       *tx_ring;
        struct tg3_tx_ring_info         *tx_buffers;
+       unsigned long                   tx_dropped;
 
        dma_addr_t                      status_mapping;
        dma_addr_t                      rx_rcb_mapping;
@@ -3220,8 +3222,6 @@ struct tg3 {
 
 
        /* begin "everything else" cacheline(s) section */
-       unsigned long                   rx_dropped;
-       unsigned long                   tx_dropped;
        struct rtnl_link_stats64        net_stats_prev;
        struct tg3_ethtool_stats        estats_prev;
 
index 5423fe26b4ef021f16714258135cd011c09026a1..78287cfcbf6388f01bfab417c264f41f3a1a16f2 100644 (file)
@@ -432,8 +432,8 @@ static const struct gmac_max_framelen gmac_maxlens[] = {
                .val = CONFIG0_MAXLEN_1536,
        },
        {
-               .max_l3_len = 1542,
-               .val = CONFIG0_MAXLEN_1542,
+               .max_l3_len = 1548,
+               .val = CONFIG0_MAXLEN_1548,
        },
        {
                .max_l3_len = 9212,
@@ -1145,6 +1145,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
        dma_addr_t mapping;
        unsigned short mtu;
        void *buffer;
+       int ret;
 
        mtu  = ETH_HLEN;
        mtu += netdev->mtu;
@@ -1159,9 +1160,30 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
                word3 |= mtu;
        }
 
-       if (skb->ip_summed != CHECKSUM_NONE) {
+       if (skb->len >= ETH_FRAME_LEN) {
+               /* Hardware offloaded checksumming isn't working on frames
+                * bigger than 1514 bytes. A hypothesis about this is that the
+                * checksum buffer is only 1518 bytes, so when the frames get
+                * bigger they get truncated, or the last few bytes get
+                * overwritten by the FCS.
+                *
+                * Just use software checksumming and bypass on bigger frames.
+                */
+               if (skb->ip_summed == CHECKSUM_PARTIAL) {
+                       ret = skb_checksum_help(skb);
+                       if (ret)
+                               return ret;
+               }
+               word1 |= TSS_BYPASS_BIT;
+       } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
                int tcp = 0;
 
+               /* We do not switch off the checksumming on non TCP/UDP
+                * frames: as is shown from tests, the checksumming engine
+                * is smart enough to see that a frame is not actually TCP
+                * or UDP and then just pass it through without any changes
+                * to the frame.
+                */
                if (skb->protocol == htons(ETH_P_IP)) {
                        word1 |= TSS_IP_CHKSUM_BIT;
                        tcp = ip_hdr(skb)->protocol == IPPROTO_TCP;
@@ -1978,15 +2000,6 @@ static int gmac_change_mtu(struct net_device *netdev, int new_mtu)
        return 0;
 }
 
-static netdev_features_t gmac_fix_features(struct net_device *netdev,
-                                          netdev_features_t features)
-{
-       if (netdev->mtu + ETH_HLEN + VLAN_HLEN > MTU_SIZE_BIT_MASK)
-               features &= ~GMAC_OFFLOAD_FEATURES;
-
-       return features;
-}
-
 static int gmac_set_features(struct net_device *netdev,
                             netdev_features_t features)
 {
@@ -2212,7 +2225,6 @@ static const struct net_device_ops gmac_351x_ops = {
        .ndo_set_mac_address    = gmac_set_mac_address,
        .ndo_get_stats64        = gmac_get_stats64,
        .ndo_change_mtu         = gmac_change_mtu,
-       .ndo_fix_features       = gmac_fix_features,
        .ndo_set_features       = gmac_set_features,
 };
 
@@ -2464,11 +2476,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 
        netdev->hw_features = GMAC_OFFLOAD_FEATURES;
        netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO;
-       /* We can handle jumbo frames up to 10236 bytes so, let's accept
-        * payloads of 10236 bytes minus VLAN and ethernet header
+       /* We can receive jumbo frames up to 10236 bytes but only
+        * transmit 2047 bytes so, let's accept payloads of 2047
+        * bytes minus VLAN and ethernet header
         */
        netdev->min_mtu = ETH_MIN_MTU;
-       netdev->max_mtu = 10236 - VLAN_ETH_HLEN;
+       netdev->max_mtu = MTU_SIZE_BIT_MASK - VLAN_ETH_HLEN;
 
        port->freeq_refill = 0;
        netif_napi_add(netdev, &port->napi, gmac_napi_poll);
index 9fdf77d5eb3740982c28f5758b595aec33dbf692..24bb989981f2339476789d8298d06aa7e0c9087a 100644 (file)
@@ -502,7 +502,7 @@ union gmac_txdesc_3 {
 #define SOF_BIT                        0x80000000
 #define EOF_BIT                        0x40000000
 #define EOFIE_BIT              BIT(29)
-#define MTU_SIZE_BIT_MASK      0x1fff
+#define MTU_SIZE_BIT_MASK      0x7ff /* Max MTU 2047 bytes */
 
 /* GMAC Tx Descriptor */
 struct gmac_txdesc {
@@ -787,7 +787,7 @@ union gmac_config0 {
 #define  CONFIG0_MAXLEN_1536   0
 #define  CONFIG0_MAXLEN_1518   1
 #define  CONFIG0_MAXLEN_1522   2
-#define  CONFIG0_MAXLEN_1542   3
+#define  CONFIG0_MAXLEN_1548   3
 #define  CONFIG0_MAXLEN_9k     4       /* 9212 */
 #define  CONFIG0_MAXLEN_10k    5       /* 10236 */
 #define  CONFIG0_MAXLEN_1518__6        6
index 15bab41cee48dfa13b25cd3c145d4df726ea6018..888509cf1f210ecb1ff6771542e85df76c07ce2e 100644 (file)
@@ -516,8 +516,6 @@ struct sk_buff *dpaa2_eth_alloc_skb(struct dpaa2_eth_priv *priv,
 
        memcpy(skb->data, fd_vaddr + fd_offset, fd_length);
 
-       dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd));
-
        return skb;
 }
 
@@ -589,6 +587,7 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
        struct rtnl_link_stats64 *percpu_stats;
        struct dpaa2_eth_drv_stats *percpu_extras;
        struct device *dev = priv->net_dev->dev.parent;
+       bool recycle_rx_buf = false;
        void *buf_data;
        u32 xdp_act;
 
@@ -618,6 +617,8 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
                        dma_unmap_page(dev, addr, priv->rx_buf_size,
                                       DMA_BIDIRECTIONAL);
                        skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
+               } else {
+                       recycle_rx_buf = true;
                }
        } else if (fd_format == dpaa2_fd_sg) {
                WARN_ON(priv->xdp_prog);
@@ -637,6 +638,9 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
                goto err_build_skb;
 
        dpaa2_eth_receive_skb(priv, ch, fd, vaddr, fq, percpu_stats, skb);
+
+       if (recycle_rx_buf)
+               dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd));
        return;
 
 err_build_skb:
@@ -1073,14 +1077,12 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv,
        dma_addr_t addr;
 
        buffer_start = skb->data - dpaa2_eth_needed_headroom(skb);
-
-       /* If there's enough room to align the FD address, do it.
-        * It will help hardware optimize accesses.
-        */
        aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN,
                                  DPAA2_ETH_TX_BUF_ALIGN);
        if (aligned_start >= skb->head)
                buffer_start = aligned_start;
+       else
+               return -ENOMEM;
 
        /* Store a backpointer to the skb at the beginning of the buffer
         * (in the private data area) such that we can release it
@@ -4967,6 +4969,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
        if (err)
                goto err_dl_port_add;
 
+       net_dev->needed_headroom = DPAA2_ETH_SWA_SIZE + DPAA2_ETH_TX_BUF_ALIGN;
+
        err = register_netdev(net_dev);
        if (err < 0) {
                dev_err(dev, "register_netdev() failed\n");
index bfb6c96c3b2f081d8fc428f036213327faa5a3a0..834cba8c3a416390578484ff7075b4626b0d2525 100644 (file)
@@ -740,7 +740,7 @@ static inline bool dpaa2_eth_rx_pause_enabled(u64 link_options)
 
 static inline unsigned int dpaa2_eth_needed_headroom(struct sk_buff *skb)
 {
-       unsigned int headroom = DPAA2_ETH_SWA_SIZE;
+       unsigned int headroom = DPAA2_ETH_SWA_SIZE + DPAA2_ETH_TX_BUF_ALIGN;
 
        /* If we don't have an skb (e.g. XDP buffer), we only need space for
         * the software annotation area
index 276f996f95dcc8b6ab3c5eb51958b95c19e61dd2..2d42e733837b0d5e98de7d2cdab37b23be503d1c 100644 (file)
@@ -254,10 +254,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
        if (block->tx) {
                if (block->tx->q_num < priv->tx_cfg.num_queues)
                        reschedule |= gve_tx_poll(block, budget);
-               else
+               else if (budget)
                        reschedule |= gve_xdp_poll(block, budget);
        }
 
+       if (!budget)
+               return 0;
+
        if (block->rx) {
                work_done = gve_rx_poll(block, budget);
                reschedule |= work_done == budget;
@@ -298,6 +301,9 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
        if (block->tx)
                reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
 
+       if (!budget)
+               return 0;
+
        if (block->rx) {
                work_done = gve_rx_poll_dqo(block, budget);
                reschedule |= work_done == budget;
index e84a066aa1a40a1f5709852c82da3d212a91f85b..73655347902d2ddedbe896841e7b42b32d0d6a89 100644 (file)
@@ -1007,10 +1007,6 @@ int gve_rx_poll(struct gve_notify_block *block, int budget)
 
        feat = block->napi.dev->features;
 
-       /* If budget is 0, do all the work */
-       if (budget == 0)
-               budget = INT_MAX;
-
        if (budget > 0)
                work_done = gve_clean_rx_done(rx, budget, feat);
 
index 6957a865cff37c86a96e4929b986915907781415..9f6ffc4a54f0bb6a46c4f6daee26f429682191e1 100644 (file)
@@ -925,10 +925,6 @@ bool gve_xdp_poll(struct gve_notify_block *block, int budget)
        bool repoll;
        u32 to_do;
 
-       /* If budget is 0, do all the work */
-       if (budget == 0)
-               budget = INT_MAX;
-
        /* Find out how much work there is to be done */
        nic_done = gve_tx_load_event_counter(priv, tx);
        to_do = min_t(u32, (nic_done - tx->done), budget);
index 0b138635bafa9d3a0a17d532346337ffadb2aa5b..c083d1d10767bbfff0c8a58099b5b25dc8a5e7ff 100644 (file)
@@ -503,11 +503,14 @@ static void hns3_get_coal_info(struct hns3_enet_tqp_vector *tqp_vector,
        }
 
        sprintf(result[j++], "%d", i);
-       sprintf(result[j++], "%s", dim_state_str[dim->state]);
+       sprintf(result[j++], "%s", dim->state < ARRAY_SIZE(dim_state_str) ?
+               dim_state_str[dim->state] : "unknown");
        sprintf(result[j++], "%u", dim->profile_ix);
-       sprintf(result[j++], "%s", dim_cqe_mode_str[dim->mode]);
+       sprintf(result[j++], "%s", dim->mode < ARRAY_SIZE(dim_cqe_mode_str) ?
+               dim_cqe_mode_str[dim->mode] : "unknown");
        sprintf(result[j++], "%s",
-               dim_tune_stat_str[dim->tune_state]);
+               dim->tune_state < ARRAY_SIZE(dim_tune_stat_str) ?
+               dim_tune_stat_str[dim->tune_state] : "unknown");
        sprintf(result[j++], "%u", dim->steps_left);
        sprintf(result[j++], "%u", dim->steps_right);
        sprintf(result[j++], "%u", dim->tired);
index 06117502001f922271f67cc9103f896e9122f2e7..b618797a7e8de2563c5821347ecb441e4f0b7ad5 100644 (file)
@@ -5139,7 +5139,7 @@ static int hns3_init_mac_addr(struct net_device *netdev)
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
        struct hnae3_handle *h = priv->ae_handle;
-       u8 mac_addr_temp[ETH_ALEN];
+       u8 mac_addr_temp[ETH_ALEN] = {0};
        int ret = 0;
 
        if (h->ae_algo->ops->get_mac_addr)
index 66e5807903a02a3838ee008e2849cb8180021f80..5ea9e59569effbb56ea5e198eb80cd782c1f7a28 100644 (file)
@@ -61,6 +61,7 @@ static void hclge_sync_fd_table(struct hclge_dev *hdev);
 static void hclge_update_fec_stats(struct hclge_dev *hdev);
 static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
                                      int wait_cnt);
+static int hclge_update_port_info(struct hclge_dev *hdev);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -3041,6 +3042,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 
        if (state != hdev->hw.mac.link) {
                hdev->hw.mac.link = state;
+               if (state == HCLGE_LINK_STATUS_UP)
+                       hclge_update_port_info(hdev);
+
                client->ops->link_status_change(handle, state);
                hclge_config_mac_tnl_int(hdev, state);
                if (rclient && rclient->ops->link_status_change)
@@ -10025,8 +10029,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
        struct hclge_vport_vlan_cfg *vlan, *tmp;
        struct hclge_dev *hdev = vport->back;
 
-       mutex_lock(&hdev->vport_lock);
-
        list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
                if (vlan->vlan_id == vlan_id) {
                        if (is_write_tbl && vlan->hd_tbl_status)
@@ -10041,8 +10043,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
                        break;
                }
        }
-
-       mutex_unlock(&hdev->vport_lock);
 }
 
 void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
@@ -10451,11 +10451,16 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
         * handle mailbox. Just record the vlan id, and remove it after
         * reset finished.
         */
+       mutex_lock(&hdev->vport_lock);
        if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
             test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) {
                set_bit(vlan_id, vport->vlan_del_fail_bmap);
+               mutex_unlock(&hdev->vport_lock);
                return -EBUSY;
+       } else if (!is_kill && test_bit(vlan_id, vport->vlan_del_fail_bmap)) {
+               clear_bit(vlan_id, vport->vlan_del_fail_bmap);
        }
+       mutex_unlock(&hdev->vport_lock);
 
        /* when port base vlan enabled, we use port base vlan as the vlan
         * filter entry. In this case, we don't update vlan filter table
@@ -10470,17 +10475,22 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
        }
 
        if (!ret) {
-               if (!is_kill)
+               if (!is_kill) {
                        hclge_add_vport_vlan_table(vport, vlan_id,
                                                   writen_to_tbl);
-               else if (is_kill && vlan_id != 0)
+               } else if (is_kill && vlan_id != 0) {
+                       mutex_lock(&hdev->vport_lock);
                        hclge_rm_vport_vlan_table(vport, vlan_id, false);
+                       mutex_unlock(&hdev->vport_lock);
+               }
        } else if (is_kill) {
                /* when remove hw vlan filter failed, record the vlan id,
                 * and try to remove it from hw later, to be consistence
                 * with stack
                 */
+               mutex_lock(&hdev->vport_lock);
                set_bit(vlan_id, vport->vlan_del_fail_bmap);
+               mutex_unlock(&hdev->vport_lock);
        }
 
        hclge_set_vport_vlan_fltr_change(vport);
@@ -10520,6 +10530,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
        int i, ret, sync_cnt = 0;
        u16 vlan_id;
 
+       mutex_lock(&hdev->vport_lock);
        /* start from vport 1 for PF is always alive */
        for (i = 0; i < hdev->num_alloc_vport; i++) {
                struct hclge_vport *vport = &hdev->vport[i];
@@ -10530,21 +10541,26 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
                        ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
                                                       vport->vport_id, vlan_id,
                                                       true);
-                       if (ret && ret != -EINVAL)
+                       if (ret && ret != -EINVAL) {
+                               mutex_unlock(&hdev->vport_lock);
                                return;
+                       }
 
                        clear_bit(vlan_id, vport->vlan_del_fail_bmap);
                        hclge_rm_vport_vlan_table(vport, vlan_id, false);
                        hclge_set_vport_vlan_fltr_change(vport);
 
                        sync_cnt++;
-                       if (sync_cnt >= HCLGE_MAX_SYNC_COUNT)
+                       if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) {
+                               mutex_unlock(&hdev->vport_lock);
                                return;
+                       }
 
                        vlan_id = find_first_bit(vport->vlan_del_fail_bmap,
                                                 VLAN_N_VID);
                }
        }
+       mutex_unlock(&hdev->vport_lock);
 
        hclge_sync_vlan_fltr_state(hdev);
 }
@@ -11651,6 +11667,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
                goto err_msi_irq_uninit;
 
        if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
+               clear_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
                if (hnae3_dev_phy_imp_supported(hdev))
                        ret = hclge_update_tp_port_info(hdev);
                else
index a4d68fb216fb92ae4a23b24d323dade62676abea..0aa9beefd1c7ee6c53d9f2623069bae00848e77f 100644 (file)
@@ -1206,6 +1206,8 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
             test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) {
                set_bit(vlan_id, hdev->vlan_del_fail_bmap);
                return -EBUSY;
+       } else if (!is_kill && test_bit(vlan_id, hdev->vlan_del_fail_bmap)) {
+               clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
        }
 
        hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN,
@@ -1233,20 +1235,25 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev)
        int ret, sync_cnt = 0;
        u16 vlan_id;
 
+       if (bitmap_empty(hdev->vlan_del_fail_bmap, VLAN_N_VID))
+               return;
+
+       rtnl_lock();
        vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
        while (vlan_id != VLAN_N_VID) {
                ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q),
                                              vlan_id, true);
                if (ret)
-                       return;
+                       break;
 
                clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
                sync_cnt++;
                if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT)
-                       return;
+                       break;
 
                vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
        }
+       rtnl_unlock();
 }
 
 static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
@@ -1974,8 +1981,18 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
        return HCLGEVF_VECTOR0_EVENT_OTHER;
 }
 
+static void hclgevf_reset_timer(struct timer_list *t)
+{
+       struct hclgevf_dev *hdev = from_timer(hdev, t, reset_timer);
+
+       hclgevf_clear_event_cause(hdev, HCLGEVF_VECTOR0_EVENT_RST);
+       hclgevf_reset_task_schedule(hdev);
+}
+
 static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 {
+#define HCLGEVF_RESET_DELAY    5
+
        enum hclgevf_evt_cause event_cause;
        struct hclgevf_dev *hdev = data;
        u32 clearval;
@@ -1987,7 +2004,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 
        switch (event_cause) {
        case HCLGEVF_VECTOR0_EVENT_RST:
-               hclgevf_reset_task_schedule(hdev);
+               mod_timer(&hdev->reset_timer,
+                         jiffies + msecs_to_jiffies(HCLGEVF_RESET_DELAY));
                break;
        case HCLGEVF_VECTOR0_EVENT_MBX:
                hclgevf_mbx_handler(hdev);
@@ -2930,6 +2948,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
                 HCLGEVF_DRIVER_NAME);
 
        hclgevf_task_schedule(hdev, round_jiffies_relative(HZ));
+       timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0);
 
        return 0;
 
index 81c16b8c8da2961c1dcc800cdf164c1dd9ae2ba1..a73f2bf3a56a6426704c64a20e74403c715ac09f 100644 (file)
@@ -219,6 +219,7 @@ struct hclgevf_dev {
        enum hnae3_reset_type reset_level;
        unsigned long reset_pending;
        enum hnae3_reset_type reset_type;
+       struct timer_list reset_timer;
 
 #define HCLGEVF_RESET_REQUESTED                0
 #define HCLGEVF_RESET_PENDING          1
index bbf7b14079de3cf2dc68cdd67a1f288b38903a79..85c2a634c8f96a1d4d3356b0adf9c1f87f8ed9b8 100644 (file)
@@ -63,6 +63,9 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
                i++;
        }
 
+       /* ensure additional_info will be seen after received_resp */
+       smp_rmb();
+
        if (i >= HCLGEVF_MAX_TRY_TIMES) {
                dev_err(&hdev->pdev->dev,
                        "VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n",
@@ -178,6 +181,10 @@ static void hclgevf_handle_mbx_response(struct hclgevf_dev *hdev,
        resp->resp_status = hclgevf_resp_to_errno(resp_status);
        memcpy(resp->additional_info, req->msg.resp_data,
               HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8));
+
+       /* ensure additional_info will be seen before setting received_resp */
+       smp_wmb();
+
        if (match_id) {
                /* If match_id is not zero, it means PF support match_id.
                 * if the match_id is right, VF get the right response, or
index 08d7edccfb8ddbd6385a7ce843bf652d87526b7f..3f99eb1982452725caa0548f01329350300cd8b9 100644 (file)
@@ -3844,7 +3844,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
        struct i40e_pf *pf = vf->pf;
        struct i40e_vsi *vsi = NULL;
        int aq_ret = 0;
-       int i, ret;
+       int i;
 
        if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
                aq_ret = -EINVAL;
@@ -3868,8 +3868,10 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
        }
 
        cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
-       if (!cfilter)
-               return -ENOMEM;
+       if (!cfilter) {
+               aq_ret = -ENOMEM;
+               goto err_out;
+       }
 
        /* parse destination mac address */
        for (i = 0; i < ETH_ALEN; i++)
@@ -3917,13 +3919,13 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
 
        /* Adding cloud filter programmed as TC filter */
        if (tcf.dst_port)
-               ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
+               aq_ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
        else
-               ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
-       if (ret) {
+               aq_ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+       if (aq_ret) {
                dev_err(&pf->pdev->dev,
                        "VF %d: Failed to add cloud filter, err %pe aq_err %s\n",
-                       vf->vf_id, ERR_PTR(ret),
+                       vf->vf_id, ERR_PTR(aq_ret),
                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
                goto err_free;
        }
index cfb1580f5850b5496dfe4445fac14b35cacca42d..8b7504a9df316ce3be5786b61f603f0183dca5e6 100644 (file)
@@ -1479,14 +1479,14 @@ ice_post_dwnld_pkg_actions(struct ice_hw *hw)
 }
 
 /**
- * ice_download_pkg
+ * ice_download_pkg_with_sig_seg
  * @hw: pointer to the hardware structure
  * @pkg_hdr: pointer to package header
  *
  * Handles the download of a complete package.
  */
 static enum ice_ddp_state
-ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
+ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
 {
        enum ice_aq_err aq_err = hw->adminq.sq_last_status;
        enum ice_ddp_state state = ICE_DDP_PKG_ERR;
@@ -1519,6 +1519,103 @@ ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
                state = ice_post_dwnld_pkg_actions(hw);
 
        ice_release_global_cfg_lock(hw);
+
+       return state;
+}
+
+/**
+ * ice_dwnld_cfg_bufs
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains global config lock and downloads the package configuration buffers
+ * to the firmware.
+ */
+static enum ice_ddp_state
+ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+       enum ice_ddp_state state;
+       struct ice_buf_hdr *bh;
+       int status;
+
+       if (!bufs || !count)
+               return ICE_DDP_PKG_ERR;
+
+       /* If the first buffer's first section has its metadata bit set
+        * then there are no buffers to be downloaded, and the operation is
+        * considered a success.
+        */
+       bh = (struct ice_buf_hdr *)bufs;
+       if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF)
+               return ICE_DDP_PKG_SUCCESS;
+
+       status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE);
+       if (status) {
+               if (status == -EALREADY)
+                       return ICE_DDP_PKG_ALREADY_LOADED;
+               return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status);
+       }
+
+       state = ice_dwnld_cfg_bufs_no_lock(hw, bufs, 0, count, true);
+       if (!state)
+               state = ice_post_dwnld_pkg_actions(hw);
+
+       ice_release_global_cfg_lock(hw);
+
+       return state;
+}
+
+/**
+ * ice_download_pkg_without_sig_seg
+ * @hw: pointer to the hardware structure
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package without signature segment.
+ */
+static enum ice_ddp_state
+ice_download_pkg_without_sig_seg(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+       struct ice_buf_table *ice_buf_tbl;
+
+       ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
+                 ice_seg->hdr.seg_format_ver.major,
+                 ice_seg->hdr.seg_format_ver.minor,
+                 ice_seg->hdr.seg_format_ver.update,
+                 ice_seg->hdr.seg_format_ver.draft);
+
+       ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
+                 le32_to_cpu(ice_seg->hdr.seg_type),
+                 le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id);
+
+       ice_buf_tbl = ice_find_buf_table(ice_seg);
+
+       ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
+                 le32_to_cpu(ice_buf_tbl->buf_count));
+
+       return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
+                                 le32_to_cpu(ice_buf_tbl->buf_count));
+}
+
+/**
+ * ice_download_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_hdr: pointer to package header
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package.
+ */
+static enum ice_ddp_state
+ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr,
+                struct ice_seg *ice_seg)
+{
+       enum ice_ddp_state state;
+
+       if (hw->pkg_has_signing_seg)
+               state = ice_download_pkg_with_sig_seg(hw, pkg_hdr);
+       else
+               state = ice_download_pkg_without_sig_seg(hw, ice_seg);
+
        ice_post_pkg_dwnld_vlan_mode_cfg(hw);
 
        return state;
@@ -2083,7 +2180,7 @@ enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
 
        /* initialize package hints and then download package */
        ice_init_pkg_hints(hw, seg);
-       state = ice_download_pkg(hw, pkg);
+       state = ice_download_pkg(hw, pkg, seg);
        if (state == ICE_DDP_PKG_ALREADY_LOADED) {
                ice_debug(hw, ICE_DBG_INIT,
                          "package previously loaded - no work.\n");
index 835c419ccc7437ccc4c4ad8e4e4c4e2739a77295..86b180cb32a027d38f8422bb8da1fe3a2d1c54ac 100644 (file)
@@ -815,12 +815,6 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv,
        struct ice_pf *pf = d->pf;
        int ret;
 
-       if (prio > ICE_DPLL_PRIO_MAX) {
-               NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d",
-                                  ICE_DPLL_PRIO_MAX);
-               return -EINVAL;
-       }
-
        mutex_lock(&pf->dplls.lock);
        ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack);
        mutex_unlock(&pf->dplls.lock);
@@ -1756,6 +1750,7 @@ ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu,
        }
        d->pf = pf;
        if (cgu) {
+               ice_dpll_update_state(pf, d, true);
                ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d);
                if (ret) {
                        dpll_device_put(d->dpll);
@@ -1796,8 +1791,6 @@ static int ice_dpll_init_worker(struct ice_pf *pf)
        struct ice_dplls *d = &pf->dplls;
        struct kthread_worker *kworker;
 
-       ice_dpll_update_state(pf, &d->eec, true);
-       ice_dpll_update_state(pf, &d->pps, true);
        kthread_init_delayed_work(&d->work, ice_dpll_periodic_work);
        kworker = kthread_create_worker(0, "ice-dplls-%s",
                                        dev_name(ice_pf_to_dev(pf)));
@@ -1830,6 +1823,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
        int num_pins, i, ret = -EINVAL;
        struct ice_hw *hw = &pf->hw;
        struct ice_dpll_pin *pins;
+       unsigned long caps;
        u8 freq_supp_num;
        bool input;
 
@@ -1849,6 +1843,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
        }
 
        for (i = 0; i < num_pins; i++) {
+               caps = 0;
                pins[i].idx = i;
                pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input);
                pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input);
@@ -1861,8 +1856,8 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
                                                      &dp->input_prio[i]);
                        if (ret)
                                return ret;
-                       pins[i].prop.capabilities |=
-                               DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE;
+                       caps |= (DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE |
+                                DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE);
                        pins[i].prop.phase_range.min =
                                pf->dplls.input_phase_adj_max;
                        pins[i].prop.phase_range.max =
@@ -1872,9 +1867,11 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
                                pf->dplls.output_phase_adj_max;
                        pins[i].prop.phase_range.max =
                                -pf->dplls.output_phase_adj_max;
+                       ret = ice_cgu_get_output_pin_state_caps(hw, i, &caps);
+                       if (ret)
+                               return ret;
                }
-               pins[i].prop.capabilities |=
-                       DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+               pins[i].prop.capabilities = caps;
                ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL);
                if (ret)
                        return ret;
index bb32b6d88373e21b7cf888f1527504fd3be958c4..93172e93995b949cc91a6497ee942fb1de0bcee0 100644 (file)
@@ -6,7 +6,6 @@
 
 #include "ice.h"
 
-#define ICE_DPLL_PRIO_MAX      0xF
 #define ICE_DPLL_RCLK_NUM_MAX  4
 
 /** ice_dpll_pin - store info about pins
index cd065ec48c87e62752081c8491bf41acd5c8d63a..280994ee59330a2cd62d99e6c9221872e0fa5ca4 100644 (file)
@@ -569,6 +569,50 @@ resume_traffic:
                dev_dbg(dev, "Problem restarting traffic for LAG node move\n");
 }
 
+/**
+ * ice_lag_build_netdev_list - populate the lag struct's netdev list
+ * @lag: local lag struct
+ * @ndlist: pointer to netdev list to populate
+ */
+static void ice_lag_build_netdev_list(struct ice_lag *lag,
+                                     struct ice_lag_netdev_list *ndlist)
+{
+       struct ice_lag_netdev_list *nl;
+       struct net_device *tmp_nd;
+
+       INIT_LIST_HEAD(&ndlist->node);
+       rcu_read_lock();
+       for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
+               nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
+               if (!nl)
+                       break;
+
+               nl->netdev = tmp_nd;
+               list_add(&nl->node, &ndlist->node);
+       }
+       rcu_read_unlock();
+       lag->netdev_head = &ndlist->node;
+}
+
+/**
+ * ice_lag_destroy_netdev_list - free lag struct's netdev list
+ * @lag: pointer to local lag struct
+ * @ndlist: pointer to lag struct netdev list
+ */
+static void ice_lag_destroy_netdev_list(struct ice_lag *lag,
+                                       struct ice_lag_netdev_list *ndlist)
+{
+       struct ice_lag_netdev_list *entry, *n;
+
+       rcu_read_lock();
+       list_for_each_entry_safe(entry, n, &ndlist->node, node) {
+               list_del(&entry->node);
+               kfree(entry);
+       }
+       rcu_read_unlock();
+       lag->netdev_head = NULL;
+}
+
 /**
  * ice_lag_move_single_vf_nodes - Move Tx scheduling nodes for single VF
  * @lag: primary interface LAG struct
@@ -597,7 +641,6 @@ ice_lag_move_single_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport,
 void ice_lag_move_new_vf_nodes(struct ice_vf *vf)
 {
        struct ice_lag_netdev_list ndlist;
-       struct list_head *tmp, *n;
        u8 pri_port, act_port;
        struct ice_lag *lag;
        struct ice_vsi *vsi;
@@ -621,38 +664,15 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf)
        pri_port = pf->hw.port_info->lport;
        act_port = lag->active_port;
 
-       if (lag->upper_netdev) {
-               struct ice_lag_netdev_list *nl;
-               struct net_device *tmp_nd;
-
-               INIT_LIST_HEAD(&ndlist.node);
-               rcu_read_lock();
-               for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
-                       nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
-                       if (!nl)
-                               break;
-
-                       nl->netdev = tmp_nd;
-                       list_add(&nl->node, &ndlist.node);
-               }
-               rcu_read_unlock();
-       }
-
-       lag->netdev_head = &ndlist.node;
+       if (lag->upper_netdev)
+               ice_lag_build_netdev_list(lag, &ndlist);
 
        if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) &&
            lag->bonded && lag->primary && pri_port != act_port &&
            !list_empty(lag->netdev_head))
                ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx);
 
-       list_for_each_safe(tmp, n, &ndlist.node) {
-               struct ice_lag_netdev_list *entry;
-
-               entry = list_entry(tmp, struct ice_lag_netdev_list, node);
-               list_del(&entry->node);
-               kfree(entry);
-       }
-       lag->netdev_head = NULL;
+       ice_lag_destroy_netdev_list(lag, &ndlist);
 
 new_vf_unlock:
        mutex_unlock(&pf->lag_mutex);
@@ -679,6 +699,29 @@ static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport)
                        ice_lag_move_single_vf_nodes(lag, oldport, newport, i);
 }
 
+/**
+ * ice_lag_move_vf_nodes_cfg - move vf nodes outside LAG netdev event context
+ * @lag: local lag struct
+ * @src_prt: lport value for source port
+ * @dst_prt: lport value for destination port
+ *
+ * This function is used to move nodes during an out-of-netdev-event situation,
+ * primarily when the driver needs to reconfigure or recreate resources.
+ *
+ * Must be called while holding the lag_mutex to avoid lag events from
+ * processing while out-of-sync moves are happening.  Also, paired moves,
+ * such as used in a reset flow, should both be called under the same mutex
+ * lock to avoid changes between start of reset and end of reset.
+ */
+void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt)
+{
+       struct ice_lag_netdev_list ndlist;
+
+       ice_lag_build_netdev_list(lag, &ndlist);
+       ice_lag_move_vf_nodes(lag, src_prt, dst_prt);
+       ice_lag_destroy_netdev_list(lag, &ndlist);
+}
+
 #define ICE_LAG_SRIOV_CP_RECIPE                10
 #define ICE_LAG_SRIOV_TRAIN_PKT_LEN    16
 
@@ -2051,7 +2094,6 @@ void ice_lag_rebuild(struct ice_pf *pf)
 {
        struct ice_lag_netdev_list ndlist;
        struct ice_lag *lag, *prim_lag;
-       struct list_head *tmp, *n;
        u8 act_port, loc_port;
 
        if (!pf->lag || !pf->lag->bonded)
@@ -2063,21 +2105,7 @@ void ice_lag_rebuild(struct ice_pf *pf)
        if (lag->primary) {
                prim_lag = lag;
        } else {
-               struct ice_lag_netdev_list *nl;
-               struct net_device *tmp_nd;
-
-               INIT_LIST_HEAD(&ndlist.node);
-               rcu_read_lock();
-               for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
-                       nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
-                       if (!nl)
-                               break;
-
-                       nl->netdev = tmp_nd;
-                       list_add(&nl->node, &ndlist.node);
-               }
-               rcu_read_unlock();
-               lag->netdev_head = &ndlist.node;
+               ice_lag_build_netdev_list(lag, &ndlist);
                prim_lag = ice_lag_find_primary(lag);
        }
 
@@ -2107,13 +2135,7 @@ void ice_lag_rebuild(struct ice_pf *pf)
 
        ice_clear_rdma_cap(pf);
 lag_rebuild_out:
-       list_for_each_safe(tmp, n, &ndlist.node) {
-               struct ice_lag_netdev_list *entry;
-
-               entry = list_entry(tmp, struct ice_lag_netdev_list, node);
-               list_del(&entry->node);
-               kfree(entry);
-       }
+       ice_lag_destroy_netdev_list(lag, &ndlist);
        mutex_unlock(&pf->lag_mutex);
 }
 
index 9557e8605a07bf5e59b15232b17b4cf869355e83..ede833dfa65866da00d8f4a6d77a90470906f863 100644 (file)
@@ -65,4 +65,5 @@ int ice_init_lag(struct ice_pf *pf);
 void ice_deinit_lag(struct ice_pf *pf);
 void ice_lag_rebuild(struct ice_pf *pf);
 bool ice_lag_is_switchdev_running(struct ice_pf *pf);
+void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt);
 #endif /* _ICE_LAG_H_ */
index 6607fa6fe55626b97f0de34b58dcb9dfdb8f815c..fb9c93f37e84f7f0e9e1d960522aaae1601f98ca 100644 (file)
@@ -7401,15 +7401,6 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
                goto err_vsi_rebuild;
        }
 
-       /* configure PTP timestamping after VSI rebuild */
-       if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) {
-               if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF)
-                       ice_ptp_cfg_timestamp(pf, false);
-               else if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL)
-                       /* for E82x PHC owner always need to have interrupts */
-                       ice_ptp_cfg_timestamp(pf, true);
-       }
-
        err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL);
        if (err) {
                dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err);
@@ -7461,6 +7452,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
        ice_plug_aux_dev(pf);
        if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
                ice_lag_rebuild(pf);
+
+       /* Restore timestamp mode settings after VSI rebuild */
+       ice_ptp_restore_timestamp_mode(pf);
        return;
 
 err_vsi_rebuild:
index 1eddcbe89b0c47c18aaaa288839149f4443a94f7..71f405f8a6fee67663feb4b5ea805dfb75239c9a 100644 (file)
@@ -256,48 +256,42 @@ ice_verify_pin_e810t(struct ptp_clock_info *info, unsigned int pin,
 }
 
 /**
- * ice_ptp_configure_tx_tstamp - Enable or disable Tx timestamp interrupt
- * @pf: The PF pointer to search in
- * @on: bool value for whether timestamp interrupt is enabled or disabled
+ * ice_ptp_cfg_tx_interrupt - Configure Tx timestamp interrupt for the device
+ * @pf: Board private structure
+ *
+ * Program the device to respond appropriately to the Tx timestamp interrupt
+ * cause.
  */
-static void ice_ptp_configure_tx_tstamp(struct ice_pf *pf, bool on)
+static void ice_ptp_cfg_tx_interrupt(struct ice_pf *pf)
 {
+       struct ice_hw *hw = &pf->hw;
+       bool enable;
        u32 val;
 
+       switch (pf->ptp.tx_interrupt_mode) {
+       case ICE_PTP_TX_INTERRUPT_ALL:
+               /* React to interrupts across all quads. */
+               wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x1f);
+               enable = true;
+               break;
+       case ICE_PTP_TX_INTERRUPT_NONE:
+               /* Do not react to interrupts on any quad. */
+               wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x0);
+               enable = false;
+               break;
+       case ICE_PTP_TX_INTERRUPT_SELF:
+       default:
+               enable = pf->ptp.tstamp_config.tx_type == HWTSTAMP_TX_ON;
+               break;
+       }
+
        /* Configure the Tx timestamp interrupt */
-       val = rd32(&pf->hw, PFINT_OICR_ENA);
-       if (on)
+       val = rd32(hw, PFINT_OICR_ENA);
+       if (enable)
                val |= PFINT_OICR_TSYN_TX_M;
        else
                val &= ~PFINT_OICR_TSYN_TX_M;
-       wr32(&pf->hw, PFINT_OICR_ENA, val);
-}
-
-/**
- * ice_set_tx_tstamp - Enable or disable Tx timestamping
- * @pf: The PF pointer to search in
- * @on: bool value for whether timestamps are enabled or disabled
- */
-static void ice_set_tx_tstamp(struct ice_pf *pf, bool on)
-{
-       struct ice_vsi *vsi;
-       u16 i;
-
-       vsi = ice_get_main_vsi(pf);
-       if (!vsi)
-               return;
-
-       /* Set the timestamp enable flag for all the Tx rings */
-       ice_for_each_txq(vsi, i) {
-               if (!vsi->tx_rings[i])
-                       continue;
-               vsi->tx_rings[i]->ptp_tx = on;
-       }
-
-       if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF)
-               ice_ptp_configure_tx_tstamp(pf, on);
-
-       pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+       wr32(hw, PFINT_OICR_ENA, val);
 }
 
 /**
@@ -311,7 +305,7 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on)
        u16 i;
 
        vsi = ice_get_main_vsi(pf);
-       if (!vsi)
+       if (!vsi || !vsi->rx_rings)
                return;
 
        /* Set the timestamp flag for all the Rx rings */
@@ -320,23 +314,50 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on)
                        continue;
                vsi->rx_rings[i]->ptp_rx = on;
        }
+}
+
+/**
+ * ice_ptp_disable_timestamp_mode - Disable current timestamp mode
+ * @pf: Board private structure
+ *
+ * Called during preparation for reset to temporarily disable timestamping on
+ * the device. Called during remove to disable timestamping while cleaning up
+ * driver resources.
+ */
+static void ice_ptp_disable_timestamp_mode(struct ice_pf *pf)
+{
+       struct ice_hw *hw = &pf->hw;
+       u32 val;
+
+       val = rd32(hw, PFINT_OICR_ENA);
+       val &= ~PFINT_OICR_TSYN_TX_M;
+       wr32(hw, PFINT_OICR_ENA, val);
 
-       pf->ptp.tstamp_config.rx_filter = on ? HWTSTAMP_FILTER_ALL :
-                                              HWTSTAMP_FILTER_NONE;
+       ice_set_rx_tstamp(pf, false);
 }
 
 /**
- * ice_ptp_cfg_timestamp - Configure timestamp for init/deinit
+ * ice_ptp_restore_timestamp_mode - Restore timestamp configuration
  * @pf: Board private structure
- * @ena: bool value to enable or disable time stamp
  *
- * This function will configure timestamping during PTP initialization
- * and deinitialization
+ * Called at the end of rebuild to restore timestamp configuration after
+ * a device reset.
  */
-void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena)
+void ice_ptp_restore_timestamp_mode(struct ice_pf *pf)
 {
-       ice_set_tx_tstamp(pf, ena);
-       ice_set_rx_tstamp(pf, ena);
+       struct ice_hw *hw = &pf->hw;
+       bool enable_rx;
+
+       ice_ptp_cfg_tx_interrupt(pf);
+
+       enable_rx = pf->ptp.tstamp_config.rx_filter == HWTSTAMP_FILTER_ALL;
+       ice_set_rx_tstamp(pf, enable_rx);
+
+       /* Trigger an immediate software interrupt to ensure that timestamps
+        * which occurred during reset are handled now.
+        */
+       wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);
+       ice_flush(hw);
 }
 
 /**
@@ -2037,10 +2058,10 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
 {
        switch (config->tx_type) {
        case HWTSTAMP_TX_OFF:
-               ice_set_tx_tstamp(pf, false);
+               pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_OFF;
                break;
        case HWTSTAMP_TX_ON:
-               ice_set_tx_tstamp(pf, true);
+               pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_ON;
                break;
        default:
                return -ERANGE;
@@ -2048,7 +2069,7 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
 
        switch (config->rx_filter) {
        case HWTSTAMP_FILTER_NONE:
-               ice_set_rx_tstamp(pf, false);
+               pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
                break;
        case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
        case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
@@ -2064,12 +2085,15 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
        case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
        case HWTSTAMP_FILTER_NTP_ALL:
        case HWTSTAMP_FILTER_ALL:
-               ice_set_rx_tstamp(pf, true);
+               pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_ALL;
                break;
        default:
                return -ERANGE;
        }
 
+       /* Immediately update the device timestamping mode */
+       ice_ptp_restore_timestamp_mode(pf);
+
        return 0;
 }
 
@@ -2737,7 +2761,7 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf)
        clear_bit(ICE_FLAG_PTP, pf->flags);
 
        /* Disable timestamping for both Tx and Rx */
-       ice_ptp_cfg_timestamp(pf, false);
+       ice_ptp_disable_timestamp_mode(pf);
 
        kthread_cancel_delayed_work_sync(&ptp->work);
 
@@ -2803,15 +2827,7 @@ static int ice_ptp_init_owner(struct ice_pf *pf)
        /* Release the global hardware lock */
        ice_ptp_unlock(hw);
 
-       if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL) {
-               /* The clock owner for this device type handles the timestamp
-                * interrupt for all ports.
-                */
-               ice_ptp_configure_tx_tstamp(pf, true);
-
-               /* React on all quads interrupts for E82x */
-               wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x1f);
-
+       if (!ice_is_e810(hw)) {
                /* Enable quad interrupts */
                err = ice_ptp_tx_ena_intr(pf, true, itr);
                if (err)
@@ -2881,13 +2897,6 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
        case ICE_PHY_E810:
                return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
        case ICE_PHY_E822:
-               /* Non-owner PFs don't react to any interrupts on E82x,
-                * neither on own quad nor on others
-                */
-               if (!ice_ptp_pf_handles_tx_interrupt(pf)) {
-                       ice_ptp_configure_tx_tstamp(pf, false);
-                       wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x0);
-               }
                kthread_init_delayed_work(&ptp_port->ov_work,
                                          ice_ptp_wait_for_offsets);
 
@@ -3032,6 +3041,9 @@ void ice_ptp_init(struct ice_pf *pf)
        /* Start the PHY timestamping block */
        ice_ptp_reset_phy_timestamping(pf);
 
+       /* Configure initial Tx interrupt settings */
+       ice_ptp_cfg_tx_interrupt(pf);
+
        set_bit(ICE_FLAG_PTP, pf->flags);
        err = ice_ptp_init_work(pf, ptp);
        if (err)
@@ -3067,7 +3079,7 @@ void ice_ptp_release(struct ice_pf *pf)
                return;
 
        /* Disable timestamping for both Tx and Rx */
-       ice_ptp_cfg_timestamp(pf, false);
+       ice_ptp_disable_timestamp_mode(pf);
 
        ice_ptp_remove_auxbus_device(pf);
 
index 8f6f943927564337d184168b072e1e656f6abbd9..06a330867fc9be3b5f6653bb6c8c43740cd62b04 100644 (file)
@@ -292,7 +292,7 @@ int ice_ptp_clock_index(struct ice_pf *pf);
 struct ice_pf;
 int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr);
 int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr);
-void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena);
+void ice_ptp_restore_timestamp_mode(struct ice_pf *pf);
 
 void ice_ptp_extts_event(struct ice_pf *pf);
 s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
@@ -317,8 +317,7 @@ static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
        return -EOPNOTSUPP;
 }
 
-static inline void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) { }
-
+static inline void ice_ptp_restore_timestamp_mode(struct ice_pf *pf) { }
 static inline void ice_ptp_extts_event(struct ice_pf *pf) { }
 static inline s8
 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
index 6d573908de7a0bf3690695e97ac191d8dec6f559..a00b55e14aac4e53806e22e9fcec377c5ffa9914 100644 (file)
@@ -3961,3 +3961,57 @@ int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num)
 
        return ret;
 }
+
+/**
+ * ice_cgu_get_output_pin_state_caps - get output pin state capabilities
+ * @hw: pointer to the hw struct
+ * @pin_id: id of a pin
+ * @caps: capabilities to modify
+ *
+ * Return:
+ * * 0 - success, state capabilities were modified
+ * * negative - failure, capabilities were not modified
+ */
+int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id,
+                                     unsigned long *caps)
+{
+       bool can_change = true;
+
+       switch (hw->device_id) {
+       case ICE_DEV_ID_E810C_SFP:
+               if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3)
+                       can_change = false;
+               break;
+       case ICE_DEV_ID_E810C_QSFP:
+               if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3 || pin_id == ZL_OUT4)
+                       can_change = false;
+               break;
+       case ICE_DEV_ID_E823L_10G_BASE_T:
+       case ICE_DEV_ID_E823L_1GBE:
+       case ICE_DEV_ID_E823L_BACKPLANE:
+       case ICE_DEV_ID_E823L_QSFP:
+       case ICE_DEV_ID_E823L_SFP:
+       case ICE_DEV_ID_E823C_10G_BASE_T:
+       case ICE_DEV_ID_E823C_BACKPLANE:
+       case ICE_DEV_ID_E823C_QSFP:
+       case ICE_DEV_ID_E823C_SFP:
+       case ICE_DEV_ID_E823C_SGMII:
+               if (hw->cgu_part_number ==
+                   ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 &&
+                   pin_id == ZL_OUT2)
+                       can_change = false;
+               else if (hw->cgu_part_number ==
+                        ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 &&
+                        pin_id == SI_OUT1)
+                       can_change = false;
+               break;
+       default:
+               return -EINVAL;
+       }
+       if (can_change)
+               *caps |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+       else
+               *caps &= ~DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+
+       return 0;
+}
index 36aeeef99ec07e9b7bfdb63d175a1ad09188b71d..cf76701566c72479e7c8df5f2011c7ee1d146b6a 100644 (file)
@@ -282,6 +282,8 @@ int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx,
 int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num);
 
 void ice_ptp_init_phy_model(struct ice_hw *hw);
+int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id,
+                                     unsigned long *caps);
 
 #define PFTSYN_SEM_BYTES       4
 
index 52d0a126eb6161852bb96457b9025a35e5641578..9e97ea8630686720bca3903cc3f6ae86c6807316 100644 (file)
@@ -2306,9 +2306,6 @@ ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb,
        if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
                return;
 
-       if (!tx_ring->ptp_tx)
-               return;
-
        /* Tx timestamps cannot be sampled when doing TSO */
        if (first->tx_flags & ICE_TX_FLAGS_TSO)
                return;
index 166413fc33f48f71a459009819a75d000601b03f..daf7b9dbb1435ad45a6c5efd0acfc84563434e02 100644 (file)
@@ -380,7 +380,6 @@ struct ice_tx_ring {
 #define ICE_TX_FLAGS_RING_VLAN_L2TAG2  BIT(2)
        u8 flags;
        u8 dcb_tc;                      /* Traffic class of ring */
-       u8 ptp_tx;
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring)
index aca1f2ea50340f26e7d79f57db0bb323e23212f5..b7ae099521566f5dff5a007c9b9e298166a2fbf0 100644 (file)
@@ -829,12 +829,16 @@ static void ice_notify_vf_reset(struct ice_vf *vf)
 int ice_reset_vf(struct ice_vf *vf, u32 flags)
 {
        struct ice_pf *pf = vf->pf;
+       struct ice_lag *lag;
        struct ice_vsi *vsi;
+       u8 act_prt, pri_prt;
        struct device *dev;
        int err = 0;
        bool rsd;
 
        dev = ice_pf_to_dev(pf);
+       act_prt = ICE_LAG_INVALID_PORT;
+       pri_prt = pf->hw.port_info->lport;
 
        if (flags & ICE_VF_RESET_NOTIFY)
                ice_notify_vf_reset(vf);
@@ -845,6 +849,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
                return 0;
        }
 
+       lag = pf->lag;
+       mutex_lock(&pf->lag_mutex);
+       if (lag && lag->bonded && lag->primary) {
+               act_prt = lag->active_port;
+               if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT &&
+                   lag->upper_netdev)
+                       ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
+               else
+                       act_prt = ICE_LAG_INVALID_PORT;
+       }
+
        if (flags & ICE_VF_RESET_LOCK)
                mutex_lock(&vf->cfg_lock);
        else
@@ -937,6 +952,11 @@ out_unlock:
        if (flags & ICE_VF_RESET_LOCK)
                mutex_unlock(&vf->cfg_lock);
 
+       if (lag && lag->bonded && lag->primary &&
+           act_prt != ICE_LAG_INVALID_PORT)
+               ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+       mutex_unlock(&pf->lag_mutex);
+
        return err;
 }
 
index cdf17b1e2f252bf833eb5743d35a868780885b59..de11b3186bd7ea5731c3b63850841ffa39279ac8 100644 (file)
@@ -1603,9 +1603,24 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
            (struct virtchnl_vsi_queue_config_info *)msg;
        struct virtchnl_queue_pair_info *qpi;
        struct ice_pf *pf = vf->pf;
+       struct ice_lag *lag;
        struct ice_vsi *vsi;
+       u8 act_prt, pri_prt;
        int i = -1, q_idx;
 
+       lag = pf->lag;
+       mutex_lock(&pf->lag_mutex);
+       act_prt = ICE_LAG_INVALID_PORT;
+       pri_prt = pf->hw.port_info->lport;
+       if (lag && lag->bonded && lag->primary) {
+               act_prt = lag->active_port;
+               if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT &&
+                   lag->upper_netdev)
+                       ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
+               else
+                       act_prt = ICE_LAG_INVALID_PORT;
+       }
+
        if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
                goto error_param;
 
@@ -1729,6 +1744,11 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
                }
        }
 
+       if (lag && lag->bonded && lag->primary &&
+           act_prt != ICE_LAG_INVALID_PORT)
+               ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+       mutex_unlock(&pf->lag_mutex);
+
        /* send the response to the VF */
        return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
                                     VIRTCHNL_STATUS_SUCCESS, NULL, 0);
@@ -1743,6 +1763,11 @@ error_param:
                                vf->vf_id, i);
        }
 
+       if (lag && lag->bonded && lag->primary &&
+           act_prt != ICE_LAG_INVALID_PORT)
+               ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+       mutex_unlock(&pf->lag_mutex);
+
        ice_lag_move_new_vf_nodes(vf);
 
        /* send the response to the VF */
index 90817136808d46039eb55839dff812533d1ae40e..29aac327574d64c281f30ecc8aba885506d83737 100644 (file)
@@ -4790,14 +4790,17 @@ static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset,
                                       u8 *data)
 {
        if (sset == ETH_SS_STATS) {
+               struct mvneta_port *pp = netdev_priv(netdev);
                int i;
 
                for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
                        memcpy(data + i * ETH_GSTRING_LEN,
                               mvneta_statistics[i].name, ETH_GSTRING_LEN);
 
-               data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics);
-               page_pool_ethtool_stats_get_strings(data);
+               if (!pp->bm_priv) {
+                       data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics);
+                       page_pool_ethtool_stats_get_strings(data);
+               }
        }
 }
 
@@ -4915,8 +4918,10 @@ static void mvneta_ethtool_pp_stats(struct mvneta_port *pp, u64 *data)
        struct page_pool_stats stats = {};
        int i;
 
-       for (i = 0; i < rxq_number; i++)
-               page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
+       for (i = 0; i < rxq_number; i++) {
+               if (pp->rxqs[i].page_pool)
+                       page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
+       }
 
        page_pool_ethtool_stats_get(data, &stats);
 }
@@ -4932,14 +4937,21 @@ static void mvneta_ethtool_get_stats(struct net_device *dev,
        for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
                *data++ = pp->ethtool_stats[i];
 
-       mvneta_ethtool_pp_stats(pp, data);
+       if (!pp->bm_priv)
+               mvneta_ethtool_pp_stats(pp, data);
 }
 
 static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset)
 {
-       if (sset == ETH_SS_STATS)
-               return ARRAY_SIZE(mvneta_statistics) +
-                      page_pool_ethtool_stats_get_count();
+       if (sset == ETH_SS_STATS) {
+               int count = ARRAY_SIZE(mvneta_statistics);
+               struct mvneta_port *pp = netdev_priv(dev);
+
+               if (!pp->bm_priv)
+                       count += page_pool_ethtool_stats_get_count();
+
+               return count;
+       }
 
        return -EOPNOTSUPP;
 }
index 23c2f2ed2fb83294f4345cf55e9b5252d280b376..c112c71ff576f8a693412ff30b435eb0ee3a193f 100644 (file)
@@ -5505,6 +5505,8 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu,
 
                ipolicer = &nix_hw->ipolicer[layer];
                for (idx = 0; idx < req->prof_count[layer]; idx++) {
+                       if (idx == MAX_BANDPROF_PER_PFFUNC)
+                               break;
                        prof_idx = req->prof_idx[layer][idx];
                        if (prof_idx >= ipolicer->band_prof.max ||
                            ipolicer->pfvf_map[prof_idx] != pcifunc)
@@ -5518,8 +5520,6 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu,
                        ipolicer->pfvf_map[prof_idx] = 0x00;
                        ipolicer->match_id[prof_idx] = 0;
                        rvu_free_rsrc(&ipolicer->band_prof, prof_idx);
-                       if (idx == MAX_BANDPROF_PER_PFFUNC)
-                               break;
                }
        }
        mutex_unlock(&rvu->rsrc_lock);
index a4a258da8dd59aca6d6832ca0ca77d9d091d38f8..c1c99d7054f87f5b241453e45e9b9ec9598a1b5e 100644 (file)
@@ -450,6 +450,9 @@ int cn10k_set_ipolicer_rate(struct otx2_nic *pfvf, u16 profile,
        aq->prof.pebs_mantissa = 0;
        aq->prof_mask.pebs_mantissa = 0xFF;
 
+       aq->prof.hl_en = 0;
+       aq->prof_mask.hl_en = 1;
+
        /* Fill AQ info */
        aq->qidx = profile;
        aq->ctype = NIX_AQ_CTYPE_BANDPROF;
index e7c69b57147e097827afaba08cf647372ad021f7..06910307085efa83cc386e8e180805bef0ee539e 100644 (file)
@@ -1070,6 +1070,8 @@ int otx2_init_tc(struct otx2_nic *nic);
 void otx2_shutdown_tc(struct otx2_nic *nic);
 int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
                  void *type_data);
+void otx2_tc_apply_ingress_police_rules(struct otx2_nic *nic);
+
 /* CGX/RPM DMAC filters support */
 int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf);
 int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u32 bit_pos);
index 4762dbea64a12bab71b579b822a9c355c2535e30..97a71e9b856372b95edd964e31fdc0b1156b8454 100644 (file)
@@ -1088,6 +1088,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
        struct ethhdr *eth_hdr;
        bool new = false;
        int err = 0;
+       u64 vf_num;
        u32 ring;
 
        if (!flow_cfg->max_flows) {
@@ -1100,7 +1101,21 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
        if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT))
                return -ENOMEM;
 
-       if (ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC)
+       /* Number of queues on a VF can be greater or less than
+        * the PF's queue. Hence no need to check for the
+        * queue count. Hence no need to check queue count if PF
+        * is installing for its VF. Below is the expected vf_num value
+        * based on the ethtool commands.
+        *
+        * e.g.
+        * 1. ethtool -U <netdev> ... action -1  ==> vf_num:255
+        * 2. ethtool -U <netdev> ... action <queue_num>  ==> vf_num:0
+        * 3. ethtool -U <netdev> ... vf <vf_idx> queue <queue_num>  ==>
+        *    vf_num:vf_idx+1
+        */
+       vf_num = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+       if (!is_otx2_vf(pfvf->pcifunc) && !vf_num &&
+           ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC)
                return -EINVAL;
 
        if (fsp->location >= otx2_get_maxflows(flow_cfg))
@@ -1182,6 +1197,9 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
                flow_cfg->nr_flows++;
        }
 
+       if (flow->is_vf)
+               netdev_info(pfvf->netdev,
+                           "Make sure that VF's queue number is within its queue limit\n");
        return 0;
 }
 
index 91b99fd703616ae5915758a374a17e458bea8ffc..532e324bdcc8e6cbd975017474d06a6af303ae85 100644 (file)
@@ -566,7 +566,9 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq)
                otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), intr);
                otx2_queue_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr,
                                TYPE_PFVF);
-               vfs -= 64;
+               if (intr)
+                       trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr);
+               vfs = 64;
        }
 
        intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(0));
@@ -574,7 +576,8 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq)
 
        otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF);
 
-       trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr);
+       if (intr)
+               trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr);
 
        return IRQ_HANDLED;
 }
@@ -1870,6 +1873,8 @@ int otx2_open(struct net_device *netdev)
        if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT)
                otx2_dmacflt_reinstall_flows(pf);
 
+       otx2_tc_apply_ingress_police_rules(pf);
+
        err = otx2_rxtx_enable(pf, true);
        /* If a mbox communication error happens at this point then interface
         * will end up in a state such that it is in down state but hardware
@@ -1934,6 +1939,8 @@ int otx2_stop(struct net_device *netdev)
        /* Clear RSS enable flag */
        rss = &pf->hw.rss_info;
        rss->enable = false;
+       if (!netif_is_rxfh_configured(netdev))
+               kfree(rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]);
 
        /* Cleanup Queue IRQ */
        vec = pci_irq_vector(pf->pdev,
index 8a5e3987a482c2986b1780d7cb5ce87b6a16c3b9..db1e0e0e812d3d1c7674d22989ce410767c896fc 100644 (file)
@@ -47,6 +47,9 @@ struct otx2_tc_flow {
        bool                            is_act_police;
        u32                             prio;
        struct npc_install_flow_req     req;
+       u64                             rate;
+       u32                             burst;
+       bool                            is_pps;
 };
 
 static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
@@ -284,21 +287,10 @@ static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic,
        return err;
 }
 
-static int otx2_tc_act_set_police(struct otx2_nic *nic,
-                                 struct otx2_tc_flow *node,
-                                 struct flow_cls_offload *f,
-                                 u64 rate, u32 burst, u32 mark,
-                                 struct npc_install_flow_req *req, bool pps)
+static int otx2_tc_act_set_hw_police(struct otx2_nic *nic,
+                                    struct otx2_tc_flow *node)
 {
-       struct netlink_ext_ack *extack = f->common.extack;
-       struct otx2_hw *hw = &nic->hw;
-       int rq_idx, rc;
-
-       rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues);
-       if (rq_idx >= hw->rx_queues) {
-               NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded");
-               return -EINVAL;
-       }
+       int rc;
 
        mutex_lock(&nic->mbox.lock);
 
@@ -308,23 +300,17 @@ static int otx2_tc_act_set_police(struct otx2_nic *nic,
                return rc;
        }
 
-       rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile, burst, rate, pps);
+       rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile,
+                                    node->burst, node->rate, node->is_pps);
        if (rc)
                goto free_leaf;
 
-       rc = cn10k_map_unmap_rq_policer(nic, rq_idx, node->leaf_profile, true);
+       rc = cn10k_map_unmap_rq_policer(nic, node->rq, node->leaf_profile, true);
        if (rc)
                goto free_leaf;
 
        mutex_unlock(&nic->mbox.lock);
 
-       req->match_id = mark & 0xFFFFULL;
-       req->index = rq_idx;
-       req->op = NIX_RX_ACTIONOP_UCAST;
-       set_bit(rq_idx, &nic->rq_bmap);
-       node->is_act_police = true;
-       node->rq = rq_idx;
-
        return 0;
 
 free_leaf:
@@ -336,6 +322,39 @@ free_leaf:
        return rc;
 }
 
+static int otx2_tc_act_set_police(struct otx2_nic *nic,
+                                 struct otx2_tc_flow *node,
+                                 struct flow_cls_offload *f,
+                                 u64 rate, u32 burst, u32 mark,
+                                 struct npc_install_flow_req *req, bool pps)
+{
+       struct netlink_ext_ack *extack = f->common.extack;
+       struct otx2_hw *hw = &nic->hw;
+       int rq_idx, rc;
+
+       rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues);
+       if (rq_idx >= hw->rx_queues) {
+               NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded");
+               return -EINVAL;
+       }
+
+       req->match_id = mark & 0xFFFFULL;
+       req->index = rq_idx;
+       req->op = NIX_RX_ACTIONOP_UCAST;
+
+       node->is_act_police = true;
+       node->rq = rq_idx;
+       node->burst = burst;
+       node->rate = rate;
+       node->is_pps = pps;
+
+       rc = otx2_tc_act_set_hw_police(nic, node);
+       if (!rc)
+               set_bit(rq_idx, &nic->rq_bmap);
+
+       return rc;
+}
+
 static int otx2_tc_parse_actions(struct otx2_nic *nic,
                                 struct flow_action *flow_action,
                                 struct npc_install_flow_req *req,
@@ -1044,6 +1063,11 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
        }
 
        if (flow_node->is_act_police) {
+               __clear_bit(flow_node->rq, &nic->rq_bmap);
+
+               if (nic->flags & OTX2_FLAG_INTF_DOWN)
+                       goto free_mcam_flow;
+
                mutex_lock(&nic->mbox.lock);
 
                err = cn10k_map_unmap_rq_policer(nic, flow_node->rq,
@@ -1059,11 +1083,10 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
                                   "Unable to free leaf bandwidth profile(%d)\n",
                                   flow_node->leaf_profile);
 
-               __clear_bit(flow_node->rq, &nic->rq_bmap);
-
                mutex_unlock(&nic->mbox.lock);
        }
 
+free_mcam_flow:
        otx2_del_mcam_flow_entry(nic, flow_node->entry, NULL);
        otx2_tc_update_mcam_table(nic, flow_cfg, flow_node, false);
        kfree_rcu(flow_node, rcu);
@@ -1083,6 +1106,11 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
        if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT))
                return -ENOMEM;
 
+       if (nic->flags & OTX2_FLAG_INTF_DOWN) {
+               NL_SET_ERR_MSG_MOD(extack, "Interface not initialized");
+               return -EINVAL;
+       }
+
        if (flow_cfg->nr_flows == flow_cfg->max_flows) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Free MCAM entry not available to add the flow");
@@ -1442,3 +1470,45 @@ void otx2_shutdown_tc(struct otx2_nic *nic)
        otx2_destroy_tc_flow_list(nic);
 }
 EXPORT_SYMBOL(otx2_shutdown_tc);
+
+static void otx2_tc_config_ingress_rule(struct otx2_nic *nic,
+                                       struct otx2_tc_flow *node)
+{
+       struct npc_install_flow_req *req;
+
+       if (otx2_tc_act_set_hw_police(nic, node))
+               return;
+
+       mutex_lock(&nic->mbox.lock);
+
+       req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox);
+       if (!req)
+               goto err;
+
+       memcpy(req, &node->req, sizeof(struct npc_install_flow_req));
+
+       if (otx2_sync_mbox_msg(&nic->mbox))
+               netdev_err(nic->netdev,
+                          "Failed to install MCAM flow entry for ingress rule");
+err:
+       mutex_unlock(&nic->mbox.lock);
+}
+
+void otx2_tc_apply_ingress_police_rules(struct otx2_nic *nic)
+{
+       struct otx2_flow_config *flow_cfg = nic->flow_cfg;
+       struct otx2_tc_flow *node;
+
+       /* If any ingress policer rules exist for the interface then
+        * apply those rules. Ingress policer rules depend on bandwidth
+        * profiles linked to the receive queues. Since no receive queues
+        * exist when interface is down, ingress policer rules are stored
+        * and configured in hardware after all receive queues are allocated
+        * in otx2_open.
+        */
+       list_for_each_entry(node, &flow_cfg->flow_list_tc, list) {
+               if (node->is_act_police)
+                       otx2_tc_config_ingress_rule(nic, node);
+       }
+}
+EXPORT_SYMBOL(otx2_tc_apply_ingress_police_rules);
index bb11e644d24f7b2c7576d3402e2404315bead4af..af3928eddafd11fc65b312ab7147a3095ad9f1a2 100644 (file)
@@ -177,6 +177,8 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq,
 
 static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
                                    struct mlx5_cqe64 *cqe,
+                                   u8 *md_buff,
+                                   u8 *md_buff_sz,
                                    int budget)
 {
        struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list;
@@ -211,19 +213,24 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
        mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
 out:
        napi_consume_skb(skb, budget);
-       mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, metadata_id);
+       md_buff[*md_buff_sz++] = metadata_id;
        if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) &&
            !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
                queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work);
 }
 
-static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
+static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int napi_budget)
 {
        struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq);
-       struct mlx5_cqwq *cqwq = &cq->wq;
+       int budget = min(napi_budget, MLX5E_TX_CQ_POLL_BUDGET);
+       u8 metadata_buff[MLX5E_TX_CQ_POLL_BUDGET];
+       u8 metadata_buff_sz = 0;
+       struct mlx5_cqwq *cqwq;
        struct mlx5_cqe64 *cqe;
        int work_done = 0;
 
+       cqwq = &cq->wq;
+
        if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state)))
                return false;
 
@@ -234,7 +241,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
        do {
                mlx5_cqwq_pop(cqwq);
 
-               mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget);
+               mlx5e_ptp_handle_ts_cqe(ptpsq, cqe,
+                                       metadata_buff, &metadata_buff_sz, napi_budget);
        } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
 
        mlx5_cqwq_update_db_record(cqwq);
@@ -242,6 +250,10 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
        /* ensure cq space is freed before enabling more cqes */
        wmb();
 
+       while (metadata_buff_sz > 0)
+               mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist,
+                                            metadata_buff[--metadata_buff_sz]);
+
        mlx5e_txqsq_wake(&ptpsq->txqsq);
 
        return work_done == budget;
index fea8c0a5fe893b9f8abc06e22e1aa57e1df16425..4358798d6ce14a1ff41245a8b9ea043cde249fa9 100644 (file)
@@ -492,11 +492,11 @@ static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter,
 
 void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
 {
-       char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {};
        char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
        struct mlx5e_icosq *icosq = rq->icosq;
        struct mlx5e_priv *priv = rq->priv;
        struct mlx5e_err_ctx err_ctx = {};
+       char icosq_str[32] = {};
 
        err_ctx.ctx = rq;
        err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
@@ -505,7 +505,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
        if (icosq)
                snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
        snprintf(err_str, sizeof(err_str),
-                "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x",
+                "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x",
                 rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
 
        mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
index 00a04fdd756f570f8a9db368eb1685f9d5df9a8e..668da5c70e63de76b3dabe667fceae0502721dff 100644 (file)
@@ -300,9 +300,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
        if (err)
                goto destroy_neigh_entry;
 
-       e->encap_size = ipv4_encap_size;
-       e->encap_header = encap_header;
-
        if (!(nud_state & NUD_VALID)) {
                neigh_event_send(attr.n, NULL);
                /* the encap entry will be made valid on neigh update event
@@ -322,6 +319,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
                goto destroy_neigh_entry;
        }
 
+       e->encap_size = ipv4_encap_size;
+       e->encap_header = encap_header;
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
        mlx5e_route_lookup_ipv4_put(&attr);
@@ -404,16 +403,12 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
        if (err)
                goto free_encap;
 
-       e->encap_size = ipv4_encap_size;
-       kfree(e->encap_header);
-       e->encap_header = encap_header;
-
        if (!(nud_state & NUD_VALID)) {
                neigh_event_send(attr.n, NULL);
                /* the encap entry will be made valid on neigh update event
                 * and not used before that.
                 */
-               goto release_neigh;
+               goto free_encap;
        }
 
        memset(&reformat_params, 0, sizeof(reformat_params));
@@ -427,6 +422,10 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
                goto free_encap;
        }
 
+       e->encap_size = ipv4_encap_size;
+       kfree(e->encap_header);
+       e->encap_header = encap_header;
+
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
        mlx5e_route_lookup_ipv4_put(&attr);
@@ -568,9 +567,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
        if (err)
                goto destroy_neigh_entry;
 
-       e->encap_size = ipv6_encap_size;
-       e->encap_header = encap_header;
-
        if (!(nud_state & NUD_VALID)) {
                neigh_event_send(attr.n, NULL);
                /* the encap entry will be made valid on neigh update event
@@ -590,6 +586,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
                goto destroy_neigh_entry;
        }
 
+       e->encap_size = ipv6_encap_size;
+       e->encap_header = encap_header;
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
        mlx5e_route_lookup_ipv6_put(&attr);
@@ -671,16 +669,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
        if (err)
                goto free_encap;
 
-       e->encap_size = ipv6_encap_size;
-       kfree(e->encap_header);
-       e->encap_header = encap_header;
-
        if (!(nud_state & NUD_VALID)) {
                neigh_event_send(attr.n, NULL);
                /* the encap entry will be made valid on neigh update event
                 * and not used before that.
                 */
-               goto release_neigh;
+               goto free_encap;
        }
 
        memset(&reformat_params, 0, sizeof(reformat_params));
@@ -694,6 +688,10 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
                goto free_encap;
        }
 
+       e->encap_size = ipv6_encap_size;
+       kfree(e->encap_header);
+       e->encap_header = encap_header;
+
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
        mlx5e_route_lookup_ipv6_put(&attr);
index 215261a6925507ec84babc66465fddc67f3e97c6..792a0ea544cd39997378ff43cf61776cb0c69bdd 100644 (file)
@@ -43,12 +43,17 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
                               struct ethtool_drvinfo *drvinfo)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
+       int count;
 
        strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
-       snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-                "%d.%d.%04d (%.16s)",
-                fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
-                mdev->board_id);
+       count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                        "%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
+                        fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
+       if (count == sizeof(drvinfo->fw_version))
+               snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                        "%d.%d.%04d", fw_rev_maj(mdev),
+                        fw_rev_min(mdev), fw_rev_sub(mdev));
+
        strscpy(drvinfo->bus_info, dev_name(mdev->device),
                sizeof(drvinfo->bus_info));
 }
index 693e55b010d9e85f55c24b38282fd63bbf1a8bd2..3ab682bbcf86780fa16daa27bc99baf9219bd88e 100644 (file)
@@ -71,13 +71,17 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev,
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct mlx5_core_dev *mdev = priv->mdev;
+       int count;
 
        strscpy(drvinfo->driver, mlx5e_rep_driver_name,
                sizeof(drvinfo->driver));
-       snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-                "%d.%d.%04d (%.16s)",
-                fw_rev_maj(mdev), fw_rev_min(mdev),
-                fw_rev_sub(mdev), mdev->board_id);
+       count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                        "%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
+                        fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
+       if (count == sizeof(drvinfo->fw_version))
+               snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                        "%d.%d.%04d", fw_rev_maj(mdev),
+                        fw_rev_min(mdev), fw_rev_sub(mdev));
 }
 
 static const struct counter_desc sw_rep_stats_desc[] = {
index 9a5a5c2c7da9e10fe123b7cc2911f6b775dae50a..7ca9e5b86778e3b0353530c4ce021a1fa8e5fd21 100644 (file)
@@ -3147,7 +3147,7 @@ static struct mlx5_fields fields[] = {
        OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
        OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
-       OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
+       OFFLOAD(IP_DSCP, 16,  0x0fc0, ip6, 0, ip_dscp),
 
        OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
        OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
@@ -3158,21 +3158,31 @@ static struct mlx5_fields fields[] = {
        OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
 };
 
-static unsigned long mask_to_le(unsigned long mask, int size)
+static u32 mask_field_get(void *mask, struct mlx5_fields *f)
 {
-       __be32 mask_be32;
-       __be16 mask_be16;
-
-       if (size == 32) {
-               mask_be32 = (__force __be32)(mask);
-               mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
-       } else if (size == 16) {
-               mask_be32 = (__force __be32)(mask);
-               mask_be16 = *(__be16 *)&mask_be32;
-               mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
+       switch (f->field_bsize) {
+       case 32:
+               return be32_to_cpu(*(__be32 *)mask) & f->field_mask;
+       case 16:
+               return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask;
+       default:
+               return *(u8 *)mask & (u8)f->field_mask;
        }
+}
 
-       return mask;
+static void mask_field_clear(void *mask, struct mlx5_fields *f)
+{
+       switch (f->field_bsize) {
+       case 32:
+               *(__be32 *)mask &= ~cpu_to_be32(f->field_mask);
+               break;
+       case 16:
+               *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask);
+               break;
+       default:
+               *(u8 *)mask &= ~(u8)f->field_mask;
+               break;
+       }
 }
 
 static int offload_pedit_fields(struct mlx5e_priv *priv,
@@ -3184,11 +3194,12 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
        struct pedit_headers_action *hdrs = parse_attr->hdrs;
        void *headers_c, *headers_v, *action, *vals_p;
-       u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
        struct mlx5e_tc_mod_hdr_acts *mod_acts;
-       unsigned long mask, field_mask;
+       void *s_masks_p, *a_masks_p;
        int i, first, last, next_z;
        struct mlx5_fields *f;
+       unsigned long mask;
+       u32 s_mask, a_mask;
        u8 cmd;
 
        mod_acts = &parse_attr->mod_hdr_acts;
@@ -3204,15 +3215,11 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
                bool skip;
 
                f = &fields[i];
-               /* avoid seeing bits set from previous iterations */
-               s_mask = 0;
-               a_mask = 0;
-
                s_masks_p = (void *)set_masks + f->offset;
                a_masks_p = (void *)add_masks + f->offset;
 
-               s_mask = *s_masks_p & f->field_mask;
-               a_mask = *a_masks_p & f->field_mask;
+               s_mask = mask_field_get(s_masks_p, f);
+               a_mask = mask_field_get(a_masks_p, f);
 
                if (!s_mask && !a_mask) /* nothing to offload here */
                        continue;
@@ -3239,22 +3246,20 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
                                         match_mask, f->field_bsize))
                                skip = true;
                        /* clear to denote we consumed this field */
-                       *s_masks_p &= ~f->field_mask;
+                       mask_field_clear(s_masks_p, f);
                } else {
                        cmd  = MLX5_ACTION_TYPE_ADD;
                        mask = a_mask;
                        vals_p = (void *)add_vals + f->offset;
                        /* add 0 is no change */
-                       if ((*(u32 *)vals_p & f->field_mask) == 0)
+                       if (!mask_field_get(vals_p, f))
                                skip = true;
                        /* clear to denote we consumed this field */
-                       *a_masks_p &= ~f->field_mask;
+                       mask_field_clear(a_masks_p, f);
                }
                if (skip)
                        continue;
 
-               mask = mask_to_le(mask, f->field_bsize);
-
                first = find_first_bit(&mask, f->field_bsize);
                next_z = find_next_zero_bit(&mask, f->field_bsize, first);
                last  = find_last_bit(&mask, f->field_bsize);
@@ -3281,10 +3286,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
                MLX5_SET(set_action_in, action, field, f->field);
 
                if (cmd == MLX5_ACTION_TYPE_SET) {
+                       unsigned long field_mask = f->field_mask;
                        int start;
 
-                       field_mask = mask_to_le(f->field_mask, f->field_bsize);
-
                        /* if field is bit sized it can start not from first bit */
                        start = find_first_bit(&field_mask, f->field_bsize);
 
index d41435c22ce56f6c4fc2f9bed393b9785ff5b45b..f0b506e562df31d194490dd482ae6bf71b658706 100644 (file)
@@ -399,9 +399,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
 
                mlx5e_skb_cb_hwtstamp_init(skb);
-               mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
                mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
                                           metadata_index);
+               mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
                if (!netif_tx_queue_stopped(sq->txq) &&
                    mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) {
                        netif_tx_stop_queue(sq->txq);
@@ -494,10 +494,10 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 
 err_drop:
        stats->dropped++;
-       dev_kfree_skb_any(skb);
        if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
                mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
                                             be32_to_cpu(eseg->flow_table_metadata));
+       dev_kfree_skb_any(skb);
        mlx5e_tx_flush(sq);
 }
 
index ea0405e0a43facbae35f5d0ebe4bb01f144b2a56..40a6cb052a2da3f4c0e272202b3bd1466053a5ad 100644 (file)
@@ -885,11 +885,14 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx)
 {
        struct mlx5_eq_table *table = dev->priv.eq_table;
        struct mlx5_irq *irq;
+       int cpu;
 
        irq = xa_load(&table->comp_irqs, vecidx);
        if (!irq)
                return;
 
+       cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq));
+       cpumask_clear_cpu(cpu, &table->used_cpus);
        xa_erase(&table->comp_irqs, vecidx);
        mlx5_irq_affinity_irq_release(dev, irq);
 }
@@ -897,16 +900,26 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx)
 static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
 {
        struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+       struct irq_affinity_desc af_desc = {};
        struct mlx5_irq *irq;
 
-       irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx);
-       if (IS_ERR(irq)) {
-               /* In case SF irq pool does not exist, fallback to the PF irqs*/
-               if (PTR_ERR(irq) == -ENOENT)
-                       return comp_irq_request_pci(dev, vecidx);
+       /* In case SF irq pool does not exist, fallback to the PF irqs*/
+       if (!mlx5_irq_pool_is_sf_pool(pool))
+               return comp_irq_request_pci(dev, vecidx);
 
+       af_desc.is_managed = 1;
+       cpumask_copy(&af_desc.mask, cpu_online_mask);
+       cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus);
+       irq = mlx5_irq_affinity_request(pool, &af_desc);
+       if (IS_ERR(irq))
                return PTR_ERR(irq);
-       }
+
+       cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq));
+       mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
+                     pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
+                     cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
+                     mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
 
        return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL));
 }
index b296ac52a43974fc17e372a403a981a9220ab273..88236e75fd9013058dd855a77073ed21cb1e8afe 100644 (file)
@@ -984,7 +984,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
        dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-       if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) {
+       if (rep->vport == MLX5_VPORT_UPLINK &&
+           on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) {
                dest.ft = on_esw->offloads.ft_ipsec_tx_pol;
                flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL;
                dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
index 047d5fed5f89e62cb58c4e9f8d2e094247430a16..612e666ec2635614f2e24443f319efd822b369b5 100644 (file)
@@ -168,45 +168,3 @@ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *i
                if (pool->irqs_per_cpu)
                        cpu_put(pool, cpu);
 }
-
-/**
- * mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device.
- * @dev: mlx5 device that is requesting the IRQ.
- * @used_cpus: cpumask of bounded cpus by the device
- * @vecidx: vector index to request an IRQ for.
- *
- * Each IRQ is bounded to at most 1 CPU.
- * This function is requesting an IRQ according to the default assignment.
- * The default assignment policy is:
- * - request the least loaded IRQ which is not bound to any
- *   CPU of the previous IRQs requested.
- *
- * On success, this function updates used_cpus mask and returns an irq pointer.
- * In case of an error, an appropriate error pointer is returned.
- */
-struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
-                                                   struct cpumask *used_cpus, u16 vecidx)
-{
-       struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
-       struct irq_affinity_desc af_desc = {};
-       struct mlx5_irq *irq;
-
-       if (!mlx5_irq_pool_is_sf_pool(pool))
-               return ERR_PTR(-ENOENT);
-
-       af_desc.is_managed = 1;
-       cpumask_copy(&af_desc.mask, cpu_online_mask);
-       cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus);
-       irq = mlx5_irq_affinity_request(pool, &af_desc);
-
-       if (IS_ERR(irq))
-               return irq;
-
-       cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq));
-       mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
-                     pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
-                     cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
-                     mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
-
-       return irq;
-}
index aa29f09e83564270b62c8abfe4e4f81e5e672408..0c83ef174275a7948eb42b2c4412c77178d8792c 100644 (file)
@@ -384,7 +384,12 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 
 static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
 {
-       return mlx5_ptp_adjtime(ptp, delta);
+       struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+       struct mlx5_core_dev *mdev;
+
+       mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+       return mlx5_ptp_adjtime_real_time(mdev, delta);
 }
 
 static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)
index 653648216730acb975a9a69f1dd54a001bd21383..4dcf995cb1a2042c39938ee2f166a6c3d3e6ef24 100644 (file)
@@ -28,7 +28,7 @@
 struct mlx5_irq {
        struct atomic_notifier_head nh;
        cpumask_var_t mask;
-       char name[MLX5_MAX_IRQ_NAME];
+       char name[MLX5_MAX_IRQ_FORMATTED_NAME];
        struct mlx5_irq_pool *pool;
        int refcount;
        struct msi_map map;
@@ -292,8 +292,8 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
        else
                irq_sf_set_name(pool, name, i);
        ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
-       snprintf(irq->name, MLX5_MAX_IRQ_NAME,
-                "%s@pci:%s", name, pci_name(dev->pdev));
+       snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
+                MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
        err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
                          &irq->nh);
        if (err) {
index d3a77a0ab8488b7ddddf9ebbaac5c6ee46c638e5..c4d377f8df308917c89e0c8306cbd6c9ef3d8f83 100644 (file)
@@ -7,6 +7,9 @@
 #include <linux/mlx5/driver.h>
 
 #define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s")
+#define MLX5_MAX_IRQ_FORMATTED_NAME \
+       (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR))
 /* max irq_index is 2047, so four chars */
 #define MLX5_MAX_IRQ_IDX_CHARS (4)
 #define MLX5_EQ_REFS_PER_IRQ (2)
index 6ea88a5818047224973f7eb76e6beeebaf55b7f9..e3ec559369fa07ab7768454f7d31d0413678561b 100644 (file)
@@ -57,7 +57,8 @@ static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id)
 
 static bool mlx5dr_action_supp_fwd_fdb_multi_ft(struct mlx5_core_dev *dev)
 {
-       return (MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) ||
+       return (MLX5_CAP_GEN(dev, steering_format_version) < MLX5_STEERING_FORMAT_CONNECTX_6DX ||
+               MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) ||
                MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table));
 }
 
index 4e8527a724f504e177058cb404bea04e04322819..6fa06ba2d346532a6e0c7ffd24cd8fb127b9d219 100644 (file)
@@ -52,7 +52,6 @@ struct dr_qp_init_attr {
        u32 cqn;
        u32 pdn;
        u32 max_send_wr;
-       u32 max_send_sge;
        struct mlx5_uars_page *uar;
        u8 isolate_vl_tc:1;
 };
@@ -247,37 +246,6 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
        return err == CQ_POLL_ERR ? err : npolled;
 }
 
-static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr)
-{
-       return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) +
-                                 sizeof(struct mlx5_wqe_flow_update_ctrl_seg) +
-                                 sizeof(struct mlx5_wqe_header_modify_argument_update_seg));
-}
-
-/* We calculate for specific RC QP with the required functionality */
-static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr)
-{
-       int update_arg_size;
-       int inl_size = 0;
-       int tot_size;
-       int size;
-
-       update_arg_size = dr_qp_get_args_update_send_wqe_size(attr);
-
-       size = sizeof(struct mlx5_wqe_ctrl_seg) +
-              sizeof(struct mlx5_wqe_raddr_seg);
-       inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) +
-                               DR_STE_SIZE, 16);
-
-       size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg);
-
-       size = max(size, update_arg_size);
-
-       tot_size = max(size, inl_size);
-
-       return ALIGN(tot_size, MLX5_SEND_WQE_BB);
-}
-
 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
                                         struct dr_qp_init_attr *attr)
 {
@@ -285,7 +253,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
        u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
        struct mlx5_wq_param wqp;
        struct mlx5dr_qp *dr_qp;
-       int wqe_size;
        int inlen;
        void *qpc;
        void *in;
@@ -365,15 +332,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
        if (err)
                goto err_in;
        dr_qp->uar = attr->uar;
-       wqe_size = dr_qp_calc_rc_send_wqe(attr);
-       dr_qp->max_inline_data = min(wqe_size -
-                                    (sizeof(struct mlx5_wqe_ctrl_seg) +
-                                     sizeof(struct mlx5_wqe_raddr_seg) +
-                                     sizeof(struct mlx5_wqe_inline_seg)),
-                                    (2 * MLX5_SEND_WQE_BB -
-                                     (sizeof(struct mlx5_wqe_ctrl_seg) +
-                                      sizeof(struct mlx5_wqe_raddr_seg) +
-                                      sizeof(struct mlx5_wqe_inline_seg))));
 
        return dr_qp;
 
@@ -437,48 +395,8 @@ dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
                MLX5_SEND_WQE_DS;
 }
 
-static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp,
-                              struct dr_data_seg *data_seg, void *wqe)
-{
-       int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) +
-                               sizeof(struct mlx5_wqe_raddr_seg) +
-                               sizeof(struct mlx5_wqe_inline_seg);
-       struct mlx5_wqe_inline_seg *seg;
-       int left_space;
-       int inl = 0;
-       void *addr;
-       int len;
-       int idx;
-
-       seg = wqe;
-       wqe += sizeof(*seg);
-       addr = (void *)(unsigned long)(data_seg->addr);
-       len  = data_seg->length;
-       inl += len;
-       left_space = MLX5_SEND_WQE_BB - inline_header_size;
-
-       if (likely(len > left_space)) {
-               memcpy(wqe, addr, left_space);
-               len -= left_space;
-               addr += left_space;
-               idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1);
-               wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
-       }
-
-       memcpy(wqe, addr, len);
-
-       if (likely(inl)) {
-               seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
-               return DIV_ROUND_UP(inl + sizeof(seg->byte_count),
-                                   MLX5_SEND_WQE_DS);
-       } else {
-               return 0;
-       }
-}
-
 static void
-dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
-                                 struct mlx5_wqe_ctrl_seg *wq_ctrl,
+dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
                                  u64 remote_addr,
                                  u32 rkey,
                                  struct dr_data_seg *data_seg,
@@ -494,17 +412,15 @@ dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
        wq_raddr->reserved = 0;
 
        wq_dseg = (void *)(wq_raddr + 1);
-       /* WQE ctrl segment + WQE remote addr segment */
-       *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS;
 
-       if (data_seg->send_flags & IB_SEND_INLINE) {
-               *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg);
-       } else {
-               wq_dseg->byte_count = cpu_to_be32(data_seg->length);
-               wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
-               wq_dseg->addr = cpu_to_be64(data_seg->addr);
-               *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS;  /* WQE data segment */
-       }
+       wq_dseg->byte_count = cpu_to_be32(data_seg->length);
+       wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
+       wq_dseg->addr = cpu_to_be64(data_seg->addr);
+
+       *size = (sizeof(*wq_ctrl) +    /* WQE ctrl segment */
+                sizeof(*wq_dseg) +    /* WQE data segment */
+                sizeof(*wq_raddr)) /  /* WQE remote addr segment */
+               MLX5_SEND_WQE_DS;
 }
 
 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
@@ -535,7 +451,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
        switch (opcode) {
        case MLX5_OPCODE_RDMA_READ:
        case MLX5_OPCODE_RDMA_WRITE:
-               dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr,
+               dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
                                                  rkey, data_seg, &size);
                break;
        case MLX5_OPCODE_FLOW_TBL_ACCESS:
@@ -656,7 +572,7 @@ static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
        if (send_ring->pending_wqe % send_ring->signal_th == 0)
                send_info->write.send_flags |= IB_SEND_SIGNALED;
        else
-               send_info->write.send_flags &= ~IB_SEND_SIGNALED;
+               send_info->write.send_flags = 0;
 }
 
 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
@@ -680,13 +596,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
        }
 
        send_ring->pending_wqe++;
-       if (!send_info->write.lkey)
-               send_info->write.send_flags |= IB_SEND_INLINE;
 
        if (send_ring->pending_wqe % send_ring->signal_th == 0)
                send_info->write.send_flags |= IB_SEND_SIGNALED;
-       else
-               send_info->write.send_flags &= ~IB_SEND_SIGNALED;
 
        send_ring->pending_wqe++;
        send_info->read.length = send_info->write.length;
@@ -696,9 +608,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
        send_info->read.lkey = send_ring->sync_mr->mkey;
 
        if (send_ring->pending_wqe % send_ring->signal_th == 0)
-               send_info->read.send_flags |= IB_SEND_SIGNALED;
+               send_info->read.send_flags = IB_SEND_SIGNALED;
        else
-               send_info->read.send_flags &= ~IB_SEND_SIGNALED;
+               send_info->read.send_flags = 0;
 }
 
 static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
@@ -1345,7 +1257,6 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
        dmn->send_ring->cq->qp = dmn->send_ring->qp;
 
        dmn->info.max_send_wr = QUEUE_SIZE;
-       init_attr.max_send_sge = 1;
        dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
                                        DR_STE_SIZE);
 
index 0c76c162b8a9f5eb720ea3ed00a5beaf4d930ad0..62cabeeb842a135684ead5de19bab9872a422ba3 100644 (file)
@@ -579,6 +579,7 @@ struct rtl8169_tc_offsets {
 enum rtl_flag {
        RTL_FLAG_TASK_ENABLED = 0,
        RTL_FLAG_TASK_RESET_PENDING,
+       RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
        RTL_FLAG_TASK_TX_TIMEOUT,
        RTL_FLAG_MAX
 };
@@ -624,6 +625,7 @@ struct rtl8169_private {
 
        unsigned supports_gmii:1;
        unsigned aspm_manageable:1;
+       unsigned dash_enabled:1;
        dma_addr_t counters_phys_addr;
        struct rtl8169_counters *counters;
        struct rtl8169_tc_offsets tc_offset;
@@ -1253,14 +1255,26 @@ static bool r8168ep_check_dash(struct rtl8169_private *tp)
        return r8168ep_ocp_read(tp, 0x128) & BIT(0);
 }
 
-static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp)
+static bool rtl_dash_is_enabled(struct rtl8169_private *tp)
+{
+       switch (tp->dash_type) {
+       case RTL_DASH_DP:
+               return r8168dp_check_dash(tp);
+       case RTL_DASH_EP:
+               return r8168ep_check_dash(tp);
+       default:
+               return false;
+       }
+}
+
+static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp)
 {
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_28:
        case RTL_GIGA_MAC_VER_31:
-               return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE;
+               return RTL_DASH_DP;
        case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53:
-               return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE;
+               return RTL_DASH_EP;
        default:
                return RTL_DASH_NONE;
        }
@@ -1453,7 +1467,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 
        device_set_wakeup_enable(tp_to_dev(tp), wolopts);
 
-       if (tp->dash_type == RTL_DASH_NONE) {
+       if (!tp->dash_enabled) {
                rtl_set_d3_pll_down(tp, !wolopts);
                tp->dev->wol_enabled = wolopts ? 1 : 0;
        }
@@ -2512,7 +2526,7 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp)
 
 static void rtl_prepare_power_down(struct rtl8169_private *tp)
 {
-       if (tp->dash_type != RTL_DASH_NONE)
+       if (tp->dash_enabled)
                return;
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
@@ -2586,9 +2600,7 @@ static void rtl_set_rx_mode(struct net_device *dev)
                rx_mode &= ~AcceptMulticast;
        } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT ||
                   dev->flags & IFF_ALLMULTI ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_35 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_46 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_48) {
+                  tp->mac_version == RTL_GIGA_MAC_VER_35) {
                /* accept all multicasts */
        } else if (netdev_mc_empty(dev)) {
                rx_mode &= ~AcceptMulticast;
@@ -4571,6 +4583,8 @@ static void rtl_task(struct work_struct *work)
 reset:
                rtl_reset_work(tp);
                netif_wake_queue(tp->dev);
+       } else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) {
+               rtl_reset_work(tp);
        }
 out_unlock:
        rtnl_unlock();
@@ -4604,7 +4618,7 @@ static void r8169_phylink_handler(struct net_device *ndev)
        } else {
                /* In few cases rx is broken after link-down otherwise */
                if (rtl_is_8125(tp))
-                       rtl_reset_work(tp);
+                       rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE);
                pm_runtime_idle(d);
        }
 
@@ -4648,10 +4662,16 @@ static void rtl8169_down(struct rtl8169_private *tp)
        rtl8169_cleanup(tp);
        rtl_disable_exit_l1(tp);
        rtl_prepare_power_down(tp);
+
+       if (tp->dash_type != RTL_DASH_NONE)
+               rtl8168_driver_stop(tp);
 }
 
 static void rtl8169_up(struct rtl8169_private *tp)
 {
+       if (tp->dash_type != RTL_DASH_NONE)
+               rtl8168_driver_start(tp);
+
        pci_set_master(tp->pci_dev);
        phy_init_hw(tp->phydev);
        phy_resume(tp->phydev);
@@ -4674,7 +4694,7 @@ static int rtl8169_close(struct net_device *dev)
        rtl8169_down(tp);
        rtl8169_rx_clear(tp);
 
-       cancel_work_sync(&tp->wk.work);
+       cancel_work(&tp->wk.work);
 
        free_irq(tp->irq, tp);
 
@@ -4869,7 +4889,7 @@ static int rtl8169_runtime_idle(struct device *device)
 {
        struct rtl8169_private *tp = dev_get_drvdata(device);
 
-       if (tp->dash_type != RTL_DASH_NONE)
+       if (tp->dash_enabled)
                return -EBUSY;
 
        if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev))
@@ -4895,8 +4915,7 @@ static void rtl_shutdown(struct pci_dev *pdev)
        /* Restore original MAC address */
        rtl_rar_set(tp, tp->dev->perm_addr);
 
-       if (system_state == SYSTEM_POWER_OFF &&
-           tp->dash_type == RTL_DASH_NONE) {
+       if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) {
                pci_wake_from_d3(pdev, tp->saved_wolopts);
                pci_set_power_state(pdev, PCI_D3hot);
        }
@@ -4909,6 +4928,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
        if (pci_dev_run_wake(pdev))
                pm_runtime_get_noresume(&pdev->dev);
 
+       cancel_work_sync(&tp->wk.work);
+
        unregister_netdev(tp->dev);
 
        if (tp->dash_type != RTL_DASH_NONE)
@@ -5254,7 +5275,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
        tp->aspm_manageable = !rc;
 
-       tp->dash_type = rtl_check_dash(tp);
+       tp->dash_type = rtl_get_dash_type(tp);
+       tp->dash_enabled = rtl_dash_is_enabled(tp);
 
        tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
 
@@ -5325,7 +5347,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* configure chip for default features */
        rtl8169_set_features(dev, dev->features);
 
-       if (tp->dash_type == RTL_DASH_NONE) {
+       if (!tp->dash_enabled) {
                rtl_set_d3_pll_down(tp, true);
        } else {
                rtl_set_d3_pll_down(tp, false);
@@ -5365,7 +5387,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                            "ok" : "ko");
 
        if (tp->dash_type != RTL_DASH_NONE) {
-               netdev_info(dev, "DASH enabled\n");
+               netdev_info(dev, "DASH %s\n",
+                           tp->dash_enabled ? "enabled" : "disabled");
                rtl8168_driver_start(tp);
        }
 
index c70cff80cc99939614235a1fdb63ab3467b499a1..664eda4b5a114e984da55feebdda984b54570d87 100644 (file)
@@ -515,6 +515,15 @@ static void ravb_emac_init_gbeth(struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
 
+       if (priv->phy_interface == PHY_INTERFACE_MODE_MII) {
+               ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_MII, CXR35);
+               ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, 0);
+       } else {
+               ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_RGMII, CXR35);
+               ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1,
+                           CXR31_SEL_LINK0);
+       }
+
        /* Receive frame limit set register */
        ravb_write(ndev, GBETH_RX_BUFF_MAX + ETH_FCS_LEN, RFLR);
 
@@ -537,14 +546,6 @@ static void ravb_emac_init_gbeth(struct net_device *ndev)
 
        /* E-MAC interrupt enable register */
        ravb_write(ndev, ECSIPR_ICDIP, ECSIPR);
-
-       if (priv->phy_interface == PHY_INTERFACE_MODE_MII) {
-               ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, 0);
-               ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_MII, CXR35);
-       } else {
-               ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1,
-                           CXR31_SEL_LINK0);
-       }
 }
 
 static void ravb_emac_init_rcar(struct net_device *ndev)
@@ -1811,19 +1812,20 @@ static int ravb_open(struct net_device *ndev)
        if (info->gptp)
                ravb_ptp_init(ndev, priv->pdev);
 
-       netif_tx_start_all_queues(ndev);
-
        /* PHY control start */
        error = ravb_phy_start(ndev);
        if (error)
                goto out_ptp_stop;
 
+       netif_tx_start_all_queues(ndev);
+
        return 0;
 
 out_ptp_stop:
        /* Stop PTP Clock driver */
        if (info->gptp)
                ravb_ptp_stop(ndev);
+       ravb_stop_dma(ndev);
 out_free_irq_mgmta:
        if (!info->multi_irqs)
                goto out_free_irq;
@@ -1874,6 +1876,12 @@ static void ravb_tx_timeout_work(struct work_struct *work)
        struct net_device *ndev = priv->ndev;
        int error;
 
+       if (!rtnl_trylock()) {
+               usleep_range(1000, 2000);
+               schedule_work(&priv->work);
+               return;
+       }
+
        netif_tx_stop_all_queues(ndev);
 
        /* Stop PTP Clock driver */
@@ -1907,7 +1915,7 @@ static void ravb_tx_timeout_work(struct work_struct *work)
                 */
                netdev_err(ndev, "%s: ravb_dmac_init() failed, error %d\n",
                           __func__, error);
-               return;
+               goto out_unlock;
        }
        ravb_emac_init(ndev);
 
@@ -1917,6 +1925,9 @@ out:
                ravb_ptp_init(ndev, priv->pdev);
 
        netif_tx_start_all_queues(ndev);
+
+out_unlock:
+       rtnl_unlock();
 }
 
 /* Packet transmit function for Ethernet AVB */
@@ -2645,9 +2656,14 @@ static int ravb_probe(struct platform_device *pdev)
        ndev->features = info->net_features;
        ndev->hw_features = info->net_hw_features;
 
-       reset_control_deassert(rstc);
+       error = reset_control_deassert(rstc);
+       if (error)
+               goto out_free_netdev;
+
        pm_runtime_enable(&pdev->dev);
-       pm_runtime_get_sync(&pdev->dev);
+       error = pm_runtime_resume_and_get(&pdev->dev);
+       if (error < 0)
+               goto out_rpm_disable;
 
        if (info->multi_irqs) {
                if (info->err_mgmt_irqs)
@@ -2872,11 +2888,12 @@ out_disable_gptp_clk:
 out_disable_refclk:
        clk_disable_unprepare(priv->refclk);
 out_release:
-       free_netdev(ndev);
-
        pm_runtime_put(&pdev->dev);
+out_rpm_disable:
        pm_runtime_disable(&pdev->dev);
        reset_control_assert(rstc);
+out_free_netdev:
+       free_netdev(ndev);
        return error;
 }
 
@@ -2886,22 +2903,26 @@ static void ravb_remove(struct platform_device *pdev)
        struct ravb_private *priv = netdev_priv(ndev);
        const struct ravb_hw_info *info = priv->info;
 
-       /* Stop PTP Clock driver */
-       if (info->ccc_gac)
-               ravb_ptp_stop(ndev);
-
-       clk_disable_unprepare(priv->gptp_clk);
-       clk_disable_unprepare(priv->refclk);
-
-       /* Set reset mode */
-       ravb_write(ndev, CCC_OPC_RESET, CCC);
        unregister_netdev(ndev);
        if (info->nc_queues)
                netif_napi_del(&priv->napi[RAVB_NC]);
        netif_napi_del(&priv->napi[RAVB_BE]);
+
        ravb_mdio_release(priv);
+
+       /* Stop PTP Clock driver */
+       if (info->ccc_gac)
+               ravb_ptp_stop(ndev);
+
        dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
                          priv->desc_bat_dma);
+
+       /* Set reset mode */
+       ravb_write(ndev, CCC_OPC_RESET, CCC);
+
+       clk_disable_unprepare(priv->gptp_clk);
+       clk_disable_unprepare(priv->refclk);
+
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
        reset_control_assert(priv->rstc);
index 43a7795d65919ed2be465a59eeac0f9e4f8a2255..e77c6ff93d81b64c99a6f189218b4c1884bdd836 100644 (file)
@@ -1504,8 +1504,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
 {
        struct rswitch_device *rdev = netdev_priv(ndev);
        struct rswitch_gwca_queue *gq = rdev->tx_queue;
+       netdev_tx_t ret = NETDEV_TX_OK;
        struct rswitch_ext_desc *desc;
-       int ret = NETDEV_TX_OK;
        dma_addr_t dma_addr;
 
        if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - 1) {
@@ -1517,10 +1517,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
                return ret;
 
        dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb->len, DMA_TO_DEVICE);
-       if (dma_mapping_error(ndev->dev.parent, dma_addr)) {
-               dev_kfree_skb_any(skb);
-               return ret;
-       }
+       if (dma_mapping_error(ndev->dev.parent, dma_addr))
+               goto err_kfree;
 
        gq->skbs[gq->cur] = skb;
        desc = &gq->tx_ring[gq->cur];
@@ -1533,10 +1531,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
                struct rswitch_gwca_ts_info *ts_info;
 
                ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC);
-               if (!ts_info) {
-                       dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE);
-                       return -ENOMEM;
-               }
+               if (!ts_info)
+                       goto err_unmap;
 
                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
                rdev->ts_tag++;
@@ -1558,6 +1554,14 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
        gq->cur = rswitch_next_queue_index(gq, true, 1);
        rswitch_modify(rdev->addr, GWTRC(gq->index), 0, BIT(gq->index % 32));
 
+       return ret;
+
+err_unmap:
+       dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE);
+
+err_kfree:
+       dev_kfree_skb_any(skb);
+
        return ret;
 }
 
index a2b9e289aa36a9488e536d27665e0829d3688086..85dcda51df0522f0c4048890b484f98c5bf6e3f2 100644 (file)
@@ -280,7 +280,7 @@ config DWMAC_INTEL
 config DWMAC_LOONGSON
        tristate "Loongson PCI DWMAC support"
        default MACH_LOONGSON64
-       depends on STMMAC_ETH && PCI
+       depends on (MACH_LOONGSON64 || COMPILE_TEST) && STMMAC_ETH && PCI
        depends on COMMON_CLK
        help
          This selects the LOONGSON PCI bus support for the stmmac driver,
index ea4910ae0921acf9cd167abdda7321005cade53d..6a7c1d325c464ec62f60828581489d92b530f48f 100644 (file)
 #define MMC_XGMAC_RX_DISCARD_OCT_GB    0x1b4
 #define MMC_XGMAC_RX_ALIGN_ERR_PKT     0x1bc
 
+#define MMC_XGMAC_TX_FPE_INTR_MASK     0x204
 #define MMC_XGMAC_TX_FPE_FRAG          0x208
 #define MMC_XGMAC_TX_HOLD_REQ          0x20c
+#define MMC_XGMAC_RX_FPE_INTR_MASK     0x224
 #define MMC_XGMAC_RX_PKT_ASSEMBLY_ERR  0x228
 #define MMC_XGMAC_RX_PKT_SMD_ERR       0x22c
 #define MMC_XGMAC_RX_PKT_ASSEMBLY_OK   0x230
@@ -352,6 +354,8 @@ static void dwxgmac_mmc_intr_all_mask(void __iomem *mmcaddr)
 {
        writel(0x0, mmcaddr + MMC_RX_INTR_MASK);
        writel(0x0, mmcaddr + MMC_TX_INTR_MASK);
+       writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_TX_FPE_INTR_MASK);
+       writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_FPE_INTR_MASK);
        writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_IPC_INTR_MASK);
 }
 
index 3e50fd53a617440c3f417fd0ceea7209bca9dc1b..2afb2bd25977a2265d998fb2203bbe3a70a9d3f5 100644 (file)
@@ -5293,6 +5293,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 
        dma_dir = page_pool_get_dma_dir(rx_q->page_pool);
        buf_sz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
+       limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit);
 
        if (netif_msg_rx_status(priv)) {
                void *rx_head;
@@ -5328,10 +5329,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
                        len = 0;
                }
 
+read_again:
                if (count >= limit)
                        break;
 
-read_again:
                buf1_len = 0;
                buf2_len = 0;
                entry = next_entry;
index 6c4b64227ac886dde27b4b0e5343bd730099d00a..411898a4f38caa73b5e88e31b554601ee079ae6d 100644 (file)
@@ -2063,7 +2063,7 @@ static int prueth_probe(struct platform_device *pdev)
                                       &prueth->shram);
        if (ret) {
                dev_err(dev, "unable to get PRUSS SHRD RAM2: %d\n", ret);
-               pruss_put(prueth->pruss);
+               goto put_pruss;
        }
 
        prueth->sram_pool = of_gen_pool_get(np, "sram", 0);
@@ -2105,10 +2105,7 @@ static int prueth_probe(struct platform_device *pdev)
        prueth->iep1 = icss_iep_get_idx(np, 1);
        if (IS_ERR(prueth->iep1)) {
                ret = dev_err_probe(dev, PTR_ERR(prueth->iep1), "iep1 get failed\n");
-               icss_iep_put(prueth->iep0);
-               prueth->iep0 = NULL;
-               prueth->iep1 = NULL;
-               goto free_pool;
+               goto put_iep0;
        }
 
        if (prueth->pdata.quirk_10m_link_issue) {
@@ -2205,6 +2202,12 @@ netdev_exit:
 exit_iep:
        if (prueth->pdata.quirk_10m_link_issue)
                icss_iep_exit_fw(prueth->iep1);
+       icss_iep_put(prueth->iep1);
+
+put_iep0:
+       icss_iep_put(prueth->iep0);
+       prueth->iep0 = NULL;
+       prueth->iep1 = NULL;
 
 free_pool:
        gen_pool_free(prueth->sram_pool,
@@ -2212,6 +2215,8 @@ free_pool:
 
 put_mem:
        pruss_release_mem_region(prueth->pruss, &prueth->shram);
+
+put_pruss:
        pruss_put(prueth->pruss);
 
 put_cores:
index a3c5de9d547a4b082cb52e60dc23ff721cf66b27..533e912af0893a0786a7436b88512cb65e9a5d63 100644 (file)
@@ -1769,10 +1769,12 @@ int wx_sw_init(struct wx *wx)
                wx->subsystem_device_id = pdev->subsystem_device;
        } else {
                err = wx_flash_read_dword(wx, 0xfffdc, &ssid);
-               if (!err)
-                       wx->subsystem_device_id = swab16((u16)ssid);
+               if (err < 0) {
+                       wx_err(wx, "read of internal subsystem device id failed\n");
+                       return err;
+               }
 
-               return err;
+               wx->subsystem_device_id = swab16((u16)ssid);
        }
 
        wx->mac_table = kcalloc(wx->mac.num_rar_entries,
index 2823861e5a92f5af26318e8bcacc6aacb43073f4..a5a50b5a8816847fed40728dc85b3339dd62b3b3 100644 (file)
@@ -1972,11 +1972,11 @@ void wx_reset_interrupt_capability(struct wx *wx)
        if (!pdev->msi_enabled && !pdev->msix_enabled)
                return;
 
-       pci_free_irq_vectors(wx->pdev);
        if (pdev->msix_enabled) {
                kfree(wx->msix_entries);
                wx->msix_entries = NULL;
        }
+       pci_free_irq_vectors(wx->pdev);
 }
 EXPORT_SYMBOL(wx_reset_interrupt_capability);
 
index 3d43f808c86b784d60aaa1eb8d90769fcedbafba..8db804543e66da8a39e1246a95d0e0622c1f5d94 100644 (file)
@@ -121,10 +121,8 @@ static int ngbe_sw_init(struct wx *wx)
 
        /* PCI config space info */
        err = wx_sw_init(wx);
-       if (err < 0) {
-               wx_err(wx, "read of internal subsystem device id failed\n");
+       if (err < 0)
                return err;
-       }
 
        /* mac type, phy type , oem type */
        ngbe_init_type_code(wx);
index 70f0b5c01dacf2dbd29c2322551ae146fa07baa4..526250102db27c139c140fac7dfd0117c566426b 100644 (file)
@@ -364,10 +364,8 @@ static int txgbe_sw_init(struct wx *wx)
 
        /* PCI config space info */
        err = wx_sw_init(wx);
-       if (err < 0) {
-               wx_err(wx, "read of internal subsystem device id failed\n");
+       if (err < 0)
                return err;
-       }
 
        txgbe_init_type_code(wx);
 
index 82d0d44b2b02f1b564ce3502a2534f269566959f..bf6e33990490922851d029ffb7f42bad444964f8 100644 (file)
@@ -822,7 +822,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                if (lp->features & XAE_FEATURE_FULL_TX_CSUM) {
                        /* Tx Full Checksum Offload Enabled */
                        cur_p->app0 |= 2;
-               } else if (lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) {
+               } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) {
                        csum_start_off = skb_transport_offset(skb);
                        csum_index_off = csum_start_off + skb->csum_offset;
                        /* Tx Partial Checksum Offload Enabled */
index 3ba3c8fb28a5d692c268b243a2fcfb893a31c36e..706ea5263e879df8b4bff89cd4408e151f4615cd 100644 (file)
@@ -2206,9 +2206,6 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
                goto upper_link_failed;
        }
 
-       /* set slave flag before open to prevent IPv6 addrconf */
-       vf_netdev->flags |= IFF_SLAVE;
-
        schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
 
        call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
@@ -2315,16 +2312,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
 
        }
 
-       /* Fallback path to check synthetic vf with
-        * help of mac addr
+       /* Fallback path to check synthetic vf with help of mac addr.
+        * Because this function can be called before vf_netdev is
+        * initialized (NETDEV_POST_INIT) when its perm_addr has not been copied
+        * from dev_addr, also try to match to its dev_addr.
+        * Note: On Hyper-V and Azure, it's not possible to set a MAC address
+        * on a VF that matches to the MAC of a unrelated NETVSC device.
         */
        list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
                ndev = hv_get_drvdata(ndev_ctx->device_ctx);
-               if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) {
-                       netdev_notice(vf_netdev,
-                                     "falling back to mac addr based matching\n");
+               if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr) ||
+                   ether_addr_equal(vf_netdev->dev_addr, ndev->perm_addr))
                        return ndev;
-               }
        }
 
        netdev_notice(vf_netdev,
@@ -2332,6 +2331,19 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
        return NULL;
 }
 
+static int netvsc_prepare_bonding(struct net_device *vf_netdev)
+{
+       struct net_device *ndev;
+
+       ndev = get_netvsc_byslot(vf_netdev);
+       if (!ndev)
+               return NOTIFY_DONE;
+
+       /* set slave flag before open to prevent IPv6 addrconf */
+       vf_netdev->flags |= IFF_SLAVE;
+       return NOTIFY_DONE;
+}
+
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
        struct net_device_context *net_device_ctx;
@@ -2531,15 +2543,6 @@ static int netvsc_probe(struct hv_device *dev,
                goto devinfo_failed;
        }
 
-       nvdev = rndis_filter_device_add(dev, device_info);
-       if (IS_ERR(nvdev)) {
-               ret = PTR_ERR(nvdev);
-               netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
-               goto rndis_failed;
-       }
-
-       eth_hw_addr_set(net, device_info->mac_adr);
-
        /* We must get rtnl lock before scheduling nvdev->subchan_work,
         * otherwise netvsc_subchan_work() can get rtnl lock first and wait
         * all subchannels to show up, but that may not happen because
@@ -2547,9 +2550,23 @@ static int netvsc_probe(struct hv_device *dev,
         * -> ... -> device_add() -> ... -> __device_attach() can't get
         * the device lock, so all the subchannels can't be processed --
         * finally netvsc_subchan_work() hangs forever.
+        *
+        * The rtnl lock also needs to be held before rndis_filter_device_add()
+        * which advertises nvsp_2_vsc_capability / sriov bit, and triggers
+        * VF NIC offering and registering. If VF NIC finished register_netdev()
+        * earlier it may cause name based config failure.
         */
        rtnl_lock();
 
+       nvdev = rndis_filter_device_add(dev, device_info);
+       if (IS_ERR(nvdev)) {
+               ret = PTR_ERR(nvdev);
+               netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
+               goto rndis_failed;
+       }
+
+       eth_hw_addr_set(net, device_info->mac_adr);
+
        if (nvdev->num_chn > 1)
                schedule_work(&nvdev->subchan_work);
 
@@ -2586,9 +2603,9 @@ static int netvsc_probe(struct hv_device *dev,
        return 0;
 
 register_failed:
-       rtnl_unlock();
        rndis_filter_device_remove(dev, nvdev);
 rndis_failed:
+       rtnl_unlock();
        netvsc_devinfo_put(device_info);
 devinfo_failed:
        free_percpu(net_device_ctx->vf_stats);
@@ -2753,6 +2770,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
                return NOTIFY_DONE;
 
        switch (event) {
+       case NETDEV_POST_INIT:
+               return netvsc_prepare_bonding(event_dev);
        case NETDEV_REGISTER:
                return netvsc_register_vf(event_dev);
        case NETDEV_UNREGISTER:
@@ -2788,12 +2807,17 @@ static int __init netvsc_drv_init(void)
        }
        netvsc_ring_bytes = ring_size * PAGE_SIZE;
 
+       register_netdevice_notifier(&netvsc_netdev_notifier);
+
        ret = vmbus_driver_register(&netvsc_drv);
        if (ret)
-               return ret;
+               goto err_vmbus_reg;
 
-       register_netdevice_notifier(&netvsc_netdev_notifier);
        return 0;
+
+err_vmbus_reg:
+       unregister_netdevice_notifier(&netvsc_netdev_notifier);
+       return ret;
 }
 
 MODULE_LICENSE("GPL");
index d7b81a36d673bbe7d6d7a1dab376b2c4be6347f8..145eb0bd096d60466b4bde042d1fc7873830ecb6 100644 (file)
@@ -78,7 +78,7 @@ REG_STRIDE_FIELDS(EV_CH_E_CNTXT_0, ev_ch_e_cntxt_0,
                  0x0001c000 + 0x12000 * GSI_EE_AP, 0x80);
 
 static const u32 reg_ev_ch_e_cntxt_1_fmask[] = {
-       [R_LENGTH]                                      = GENMASK(19, 0),
+       [R_LENGTH]                                      = GENMASK(23, 0),
 };
 
 REG_STRIDE_FIELDS(EV_CH_E_CNTXT_1, ev_ch_e_cntxt_1,
index 21e9cac7312186380fa60de11f0a9178080b74b0..2d5b021b4ea6053eeb055a76fa4c7d9380cd2a53 100644 (file)
@@ -411,7 +411,7 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
        return addr;
 }
 
-static int ipvlan_process_v4_outbound(struct sk_buff *skb)
+static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
 {
        const struct iphdr *ip4h = ip_hdr(skb);
        struct net_device *dev = skb->dev;
@@ -453,13 +453,11 @@ out:
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+
+static noinline_for_stack int
+ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb)
 {
        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-       struct net_device *dev = skb->dev;
-       struct net *net = dev_net(dev);
-       struct dst_entry *dst;
-       int err, ret = NET_XMIT_DROP;
        struct flowi6 fl6 = {
                .flowi6_oif = dev->ifindex,
                .daddr = ip6h->daddr,
@@ -469,27 +467,38 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
                .flowi6_mark = skb->mark,
                .flowi6_proto = ip6h->nexthdr,
        };
+       struct dst_entry *dst;
+       int err;
 
-       dst = ip6_route_output(net, NULL, &fl6);
-       if (dst->error) {
-               ret = dst->error;
+       dst = ip6_route_output(dev_net(dev), NULL, &fl6);
+       err = dst->error;
+       if (err) {
                dst_release(dst);
-               goto err;
+               return err;
        }
        skb_dst_set(skb, dst);
+       return 0;
+}
+
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+       struct net_device *dev = skb->dev;
+       int err, ret = NET_XMIT_DROP;
+
+       err = ipvlan_route_v6_outbound(dev, skb);
+       if (unlikely(err)) {
+               DEV_STATS_INC(dev, tx_errors);
+               kfree_skb(skb);
+               return err;
+       }
 
        memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 
-       err = ip6_local_out(net, skb->sk, skb);
+       err = ip6_local_out(dev_net(dev), skb->sk, skb);
        if (unlikely(net_xmit_eval(err)))
                DEV_STATS_INC(dev, tx_errors);
        else
                ret = NET_XMIT_SUCCESS;
-       goto out;
-err:
-       DEV_STATS_INC(dev, tx_errors);
-       kfree_skb(skb);
-out:
        return ret;
 }
 #else
index 02bd201bc7e58e566af962986e2c3bf00deba50e..c8da94af4161a5fc7e61aae5e11646bc0a5013fd 100644 (file)
@@ -780,7 +780,7 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change)
        if (dev->flags & IFF_UP) {
                if (change & IFF_ALLMULTI)
                        dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1);
-               if (change & IFF_PROMISC)
+               if (!macvlan_passthru(vlan->port) && change & IFF_PROMISC)
                        dev_set_promiscuity(lowerdev,
                                            dev->flags & IFF_PROMISC ? 1 : -1);
 
index f60eb97e3a627eabaafb6db0c77f4ddc39761620..608953d4f98da9f2e44b006e8187b0e445b1d38c 100644 (file)
@@ -93,7 +93,7 @@ static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded)
 {
        struct nsim_bpf_bound_prog *state;
 
-       if (!prog || !prog->aux->offload)
+       if (!prog || !bpf_prog_is_offloaded(prog->aux))
                return;
 
        state = prog->aux->offload->dev_priv;
@@ -311,7 +311,7 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
        if (!bpf->prog)
                return 0;
 
-       if (!bpf->prog->aux->offload) {
+       if (!bpf_prog_is_offloaded(bpf->prog->aux)) {
                NSIM_EA(bpf->extack, "xdpoffload of non-bound program");
                return -EINVAL;
        }
index 5a0f86f38f093123c010beea5ac39c2cda031ec8..39171380ccf29e27412bb2b9cee7102acc4a83ab 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/filter.h>
 #include <linux/netfilter_netdev.h>
 #include <linux/bpf_mprog.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <net/netkit.h>
 #include <net/dst.h>
@@ -68,6 +69,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
        netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
        const struct bpf_mprog_entry *entry;
        struct net_device *peer;
+       int len = skb->len;
 
        rcu_read_lock();
        peer = rcu_dereference(nk->peer);
@@ -85,15 +87,22 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
        case NETKIT_PASS:
                skb->protocol = eth_type_trans(skb, skb->dev);
                skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-               __netif_rx(skb);
+               if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
+                       dev_sw_netstats_tx_add(dev, 1, len);
+                       dev_sw_netstats_rx_add(peer, len);
+               } else {
+                       goto drop_stats;
+               }
                break;
        case NETKIT_REDIRECT:
+               dev_sw_netstats_tx_add(dev, 1, len);
                skb_do_redirect(skb);
                break;
        case NETKIT_DROP:
        default:
 drop:
                kfree_skb(skb);
+drop_stats:
                dev_core_stats_tx_dropped_inc(dev);
                ret_dev = NET_XMIT_DROP;
                break;
@@ -169,11 +178,18 @@ out:
        rcu_read_unlock();
 }
 
-static struct net_device *netkit_peer_dev(struct net_device *dev)
+INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
 {
        return rcu_dereference(netkit_priv(dev)->peer);
 }
 
+static void netkit_get_stats(struct net_device *dev,
+                            struct rtnl_link_stats64 *stats)
+{
+       dev_fetch_sw_netstats(stats, dev->tstats);
+       stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
+}
+
 static void netkit_uninit(struct net_device *dev);
 
 static const struct net_device_ops netkit_netdev_ops = {
@@ -184,6 +200,7 @@ static const struct net_device_ops netkit_netdev_ops = {
        .ndo_set_rx_headroom    = netkit_set_headroom,
        .ndo_get_iflink         = netkit_get_iflink,
        .ndo_get_peer_dev       = netkit_peer_dev,
+       .ndo_get_stats64        = netkit_get_stats,
        .ndo_uninit             = netkit_uninit,
        .ndo_features_check     = passthru_features_check,
 };
@@ -218,6 +235,7 @@ static void netkit_setup(struct net_device *dev)
 
        ether_setup(dev);
        dev->max_mtu = ETH_MAX_MTU;
+       dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
 
        dev->flags |= IFF_NOARP;
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
@@ -833,6 +851,12 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
                return -EACCES;
        }
 
+       if (data[IFLA_NETKIT_PEER_INFO]) {
+               NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
+                                   "netkit peer info cannot be changed after device creation");
+               return -EINVAL;
+       }
+
        if (data[IFLA_NETKIT_POLICY]) {
                attr = data[IFLA_NETKIT_POLICY];
                policy = nla_get_u32(attr);
index ebcdffdf4f0e0193635d2b479e8a9f7a32703509..52d05ce4a2819815963eebf4df399058835ff350 100644 (file)
@@ -453,6 +453,10 @@ ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg)
        case PPPIOCSMRU:
                if (get_user(val, (int __user *) argp))
                        break;
+               if (val > U16_MAX) {
+                       err = -EINVAL;
+                       break;
+               }
                if (val < PPP_MRU)
                        val = PPP_MRU;
                ap->mru = val;
@@ -687,7 +691,7 @@ ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count)
 
        /* strip address/control field if present */
        p = skb->data;
-       if (p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
+       if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
                /* chop off address/control */
                if (skb->len < 3)
                        goto err;
index a017e9de2119d5f5163981b9777e13e94f523f86..7b8afa589a53c457ef07878f207ddbaafa668c54 100644 (file)
@@ -1079,17 +1079,17 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
        u16 pkt_count = 0;
        u64 desc_hdr = 0;
        u16 vlan_tag = 0;
-       u32 skb_len = 0;
+       u32 skb_len;
 
        if (!skb)
                goto err;
 
-       if (skb->len == 0)
+       skb_len = skb->len;
+       if (skb_len < sizeof(desc_hdr))
                goto err;
 
-       skb_len = skb->len;
        /* RX Descriptor Header */
-       skb_trim(skb, skb->len - sizeof(desc_hdr));
+       skb_trim(skb, skb_len - sizeof(desc_hdr));
        desc_hdr = le64_to_cpup((u64 *)skb_tail_pointer(skb));
 
        /* Check these packets */
index aff39bf3161ded7e5a67289c6cbdac0eacbf0ef0..4ea0e155bb0d5d82d9e3f6f2ffce2f4266e552b7 100644 (file)
@@ -1583,11 +1583,11 @@ static int ax88179_reset(struct usbnet *dev)
 
        *tmp16 = AX_PHYPWR_RSTCTL_IPRL;
        ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16);
-       msleep(200);
+       msleep(500);
 
        *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS;
        ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp);
-       msleep(100);
+       msleep(200);
 
        /* Ethernet PHY Auto Detach*/
        ax88179_auto_detach(dev);
index 344af3c5c836683db6e1183214391353df0009d3..e2e181378f4124c64b1d02bbe910f6209b57a356 100644 (file)
@@ -1289,6 +1289,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x19d2, 0x0168, 4)},
        {QMI_FIXED_INTF(0x19d2, 0x0176, 3)},
        {QMI_FIXED_INTF(0x19d2, 0x0178, 3)},
+       {QMI_FIXED_INTF(0x19d2, 0x0189, 4)},    /* ZTE MF290 */
        {QMI_FIXED_INTF(0x19d2, 0x0191, 4)},    /* ZTE EuFi890 */
        {QMI_FIXED_INTF(0x19d2, 0x0199, 1)},    /* ZTE MF820S */
        {QMI_FIXED_INTF(0x19d2, 0x0200, 1)},
index 9980517ed8b0d1bbe26083a8fcdfd217a58bfd63..57efb3454c57aca0c5bf4e790226f2c7176c8468 100644 (file)
@@ -236,8 +236,8 @@ static void veth_get_ethtool_stats(struct net_device *dev,
                                data[tx_idx + j] += *(u64 *)(base + offset);
                        }
                } while (u64_stats_fetch_retry(&rq_stats->syncp, start));
-               pp_idx = tx_idx + VETH_TQ_STATS_LEN;
        }
+       pp_idx = idx + dev->real_num_tx_queues * VETH_TQ_STATS_LEN;
 
 page_pool_stats:
        veth_get_page_pool_stats(dev, &data[pp_idx]);
@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
        skb_tx_timestamp(skb);
        if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
                if (!use_napi)
-                       dev_lstats_add(dev, length);
+                       dev_sw_netstats_tx_add(dev, 1, length);
                else
                        __veth_xdp_flush(rq);
        } else {
@@ -387,14 +387,6 @@ drop:
        return ret;
 }
 
-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
-{
-       struct veth_priv *priv = netdev_priv(dev);
-
-       dev_lstats_read(dev, packets, bytes);
-       return atomic64_read(&priv->dropped);
-}
-
 static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
 {
        struct veth_priv *priv = netdev_priv(dev);
@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev,
        struct veth_priv *priv = netdev_priv(dev);
        struct net_device *peer;
        struct veth_stats rx;
-       u64 packets, bytes;
 
-       tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
-       tot->tx_bytes = bytes;
-       tot->tx_packets = packets;
+       tot->tx_dropped = atomic64_read(&priv->dropped);
+       dev_fetch_sw_netstats(tot, dev->tstats);
 
        veth_stats_rx(&rx, dev);
        tot->tx_dropped += rx.xdp_tx_err;
        tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
-       tot->rx_bytes = rx.xdp_bytes;
-       tot->rx_packets = rx.xdp_packets;
+       tot->rx_bytes += rx.xdp_bytes;
+       tot->rx_packets += rx.xdp_packets;
 
        rcu_read_lock();
        peer = rcu_dereference(priv->peer);
        if (peer) {
-               veth_stats_tx(peer, &packets, &bytes);
-               tot->rx_bytes += bytes;
-               tot->rx_packets += packets;
+               struct rtnl_link_stats64 tot_peer = {};
+
+               dev_fetch_sw_netstats(&tot_peer, peer->tstats);
+               tot->rx_bytes += tot_peer.tx_bytes;
+               tot->rx_packets += tot_peer.tx_packets;
 
                veth_stats_rx(&rx, peer);
                tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
@@ -1506,25 +1498,12 @@ static void veth_free_queues(struct net_device *dev)
 
 static int veth_dev_init(struct net_device *dev)
 {
-       int err;
-
-       dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
-       if (!dev->lstats)
-               return -ENOMEM;
-
-       err = veth_alloc_queues(dev);
-       if (err) {
-               free_percpu(dev->lstats);
-               return err;
-       }
-
-       return 0;
+       return veth_alloc_queues(dev);
 }
 
 static void veth_dev_free(struct net_device *dev)
 {
        veth_free_queues(dev);
-       free_percpu(dev->lstats);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1796,6 +1775,7 @@ static void veth_setup(struct net_device *dev)
                               NETIF_F_HW_VLAN_STAG_RX);
        dev->needs_free_netdev = true;
        dev->priv_destructor = veth_dev_free;
+       dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
        dev->max_mtu = ETH_MAX_MTU;
 
        dev->hw_features = VETH_FEATURES;
index db766941b78f67690aae4b0a653af4efa3453e94..bb95ce43cd97d3827e3499ca29bb2f8b51961e1d 100644 (file)
@@ -121,22 +121,12 @@ struct net_vrf {
        int                     ifindex;
 };
 
-struct pcpu_dstats {
-       u64                     tx_pkts;
-       u64                     tx_bytes;
-       u64                     tx_drps;
-       u64                     rx_pkts;
-       u64                     rx_bytes;
-       u64                     rx_drps;
-       struct u64_stats_sync   syncp;
-};
-
 static void vrf_rx_stats(struct net_device *dev, int len)
 {
        struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
 
        u64_stats_update_begin(&dstats->syncp);
-       dstats->rx_pkts++;
+       dstats->rx_packets++;
        dstats->rx_bytes += len;
        u64_stats_update_end(&dstats->syncp);
 }
@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev,
                do {
                        start = u64_stats_fetch_begin(&dstats->syncp);
                        tbytes = dstats->tx_bytes;
-                       tpkts = dstats->tx_pkts;
-                       tdrops = dstats->tx_drps;
+                       tpkts = dstats->tx_packets;
+                       tdrops = dstats->tx_drops;
                        rbytes = dstats->rx_bytes;
-                       rpkts = dstats->rx_pkts;
+                       rpkts = dstats->rx_packets;
                } while (u64_stats_fetch_retry(&dstats->syncp, start));
                stats->tx_bytes += tbytes;
                stats->tx_packets += tpkts;
@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
        if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
                vrf_rx_stats(dev, len);
        else
-               this_cpu_inc(dev->dstats->rx_drps);
+               this_cpu_inc(dev->dstats->rx_drops);
 
        return NETDEV_TX_OK;
 }
@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
                struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
 
                u64_stats_update_begin(&dstats->syncp);
-               dstats->tx_pkts++;
+               dstats->tx_packets++;
                dstats->tx_bytes += len;
                u64_stats_update_end(&dstats->syncp);
        } else {
-               this_cpu_inc(dev->dstats->tx_drps);
+               this_cpu_inc(dev->dstats->tx_drops);
        }
 
        return ret;
@@ -1174,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev)
 
        vrf_rtable_release(dev, vrf);
        vrf_rt6_release(dev, vrf);
-
-       free_percpu(dev->dstats);
-       dev->dstats = NULL;
 }
 
 static int vrf_dev_init(struct net_device *dev)
 {
        struct net_vrf *vrf = netdev_priv(dev);
 
-       dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
-       if (!dev->dstats)
-               goto out_nomem;
-
        /* create the default dst which points back to us */
        if (vrf_rtable_create(dev) != 0)
-               goto out_stats;
+               goto out_nomem;
 
        if (vrf_rt6_create(dev) != 0)
                goto out_rth;
@@ -1203,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev)
 
 out_rth:
        vrf_rtable_release(dev, vrf);
-out_stats:
-       free_percpu(dev->dstats);
-       dev->dstats = NULL;
 out_nomem:
        return -ENOMEM;
 }
@@ -1704,6 +1684,8 @@ static void vrf_setup(struct net_device *dev)
        dev->min_mtu = IPV6_MIN_MTU;
        dev->max_mtu = IP6_MAX_MTU;
        dev->mtu = dev->max_mtu;
+
+       dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
 }
 
 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
index 258dcc1039216f311a223fd348295d4b5e03a3ed..deb9636b0ecf8f47e832a0b07e9e049ba19bdf16 100644 (file)
@@ -210,7 +210,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
         */
        while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) {
                dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
-               ++dev->stats.tx_dropped;
+               DEV_STATS_INC(dev, tx_dropped);
        }
        skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
        spin_unlock_bh(&peer->staged_packet_queue.lock);
@@ -228,7 +228,7 @@ err_icmp:
        else if (skb->protocol == htons(ETH_P_IPV6))
                icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 err:
-       ++dev->stats.tx_errors;
+       DEV_STATS_INC(dev, tx_errors);
        kfree_skb(skb);
        return ret;
 }
index 0b3f0c843550957ee1fe3bed7185a7d990246c2b..a176653c88616b1bc871fe52fcea778b5e189f69 100644 (file)
@@ -416,20 +416,20 @@ dishonest_packet_peer:
        net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n",
                                dev->name, skb, peer->internal_id,
                                &peer->endpoint.addr);
-       ++dev->stats.rx_errors;
-       ++dev->stats.rx_frame_errors;
+       DEV_STATS_INC(dev, rx_errors);
+       DEV_STATS_INC(dev, rx_frame_errors);
        goto packet_processed;
 dishonest_packet_type:
        net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n",
                            dev->name, peer->internal_id, &peer->endpoint.addr);
-       ++dev->stats.rx_errors;
-       ++dev->stats.rx_frame_errors;
+       DEV_STATS_INC(dev, rx_errors);
+       DEV_STATS_INC(dev, rx_frame_errors);
        goto packet_processed;
 dishonest_packet_size:
        net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n",
                            dev->name, peer->internal_id, &peer->endpoint.addr);
-       ++dev->stats.rx_errors;
-       ++dev->stats.rx_length_errors;
+       DEV_STATS_INC(dev, rx_errors);
+       DEV_STATS_INC(dev, rx_length_errors);
        goto packet_processed;
 packet_processed:
        dev_kfree_skb(skb);
index 95c853b59e1dae1df8b4e5cbf4e3541e35806b82..0d48e0f4a1ba3e1f11825136a65de0867b204496 100644 (file)
@@ -333,7 +333,8 @@ err:
 void wg_packet_purge_staged_packets(struct wg_peer *peer)
 {
        spin_lock_bh(&peer->staged_packet_queue.lock);
-       peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen;
+       DEV_STATS_ADD(peer->device->dev, tx_dropped,
+                     peer->staged_packet_queue.qlen);
        __skb_queue_purge(&peer->staged_packet_queue);
        spin_unlock_bh(&peer->staged_packet_queue.lock);
 }
index e150d82eddb6c72859872abc7b3d7e73b7484e94..0c47be06c153be18c410324a62f12c77264b38b9 100644 (file)
@@ -57,8 +57,7 @@ config ATH9K_AHB
 
 config ATH9K_DEBUGFS
        bool "Atheros ath9k debugging"
-       depends on ATH9K && DEBUG_FS
-       select MAC80211_DEBUGFS
+       depends on ATH9K && DEBUG_FS && MAC80211_DEBUGFS
        select ATH9K_COMMON_DEBUG
        help
          Say Y, if you need access to ath9k's statistics for
@@ -70,7 +69,6 @@ config ATH9K_DEBUGFS
 config ATH9K_STATION_STATISTICS
        bool "Detailed station statistics"
        depends on ATH9K && ATH9K_DEBUGFS && DEBUG_FS
-       select MAC80211_DEBUGFS
        default n
        help
          This option enables detailed statistics for association stations.
index ca5e4fbcf8ce53108448b4fe338658e04b820398..6af606e5da657ed5d860f7cf67726424480ce595 100644 (file)
@@ -707,8 +707,10 @@ int iwl_mvm_mld_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
                        rcu_dereference_protected(mvm_sta->link[link_id],
                                                  lockdep_is_held(&mvm->mutex));
 
-               if (WARN_ON(!link_conf || !mvm_link_sta))
+               if (WARN_ON(!link_conf || !mvm_link_sta)) {
+                       ret = -EINVAL;
                        goto err;
+               }
 
                ret = iwl_mvm_mld_cfg_sta(mvm, sta, vif, link_sta, link_conf,
                                          mvm_link_sta);
index 63f3d4a5c9aa6e9daafc69fbe049c04b91b766ad..2cc2d2788f831257980e08cc475cdec0f6b3b4c6 100644 (file)
@@ -375,6 +375,7 @@ static int mt7921_load_clc(struct mt792x_dev *dev, const char *fw_name)
        int ret, i, len, offset = 0;
        u8 *clc_base = NULL, hw_encap = 0;
 
+       dev->phy.clc_chan_conf = 0xff;
        if (mt7921_disable_clc ||
            mt76_is_usb(&dev->mt76))
                return 0;
index 15c2fb0bcb1b98d1ea436db26c3c6da09441534a..aa918b9b0469f8444ce2fb1f68a5d79b71c53ede 100644 (file)
@@ -14,7 +14,7 @@
 static void
 mt7925_init_he_caps(struct mt792x_phy *phy, enum nl80211_band band,
                    struct ieee80211_sband_iftype_data *data,
-                       enum nl80211_iftype iftype)
+                   enum nl80211_iftype iftype)
 {
        struct ieee80211_sta_he_cap *he_cap = &data->he_cap;
        struct ieee80211_he_cap_elem *he_cap_elem = &he_cap->he_cap_elem;
@@ -53,7 +53,7 @@ mt7925_init_he_caps(struct mt792x_phy *phy, enum nl80211_band band,
                IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO |
                IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO;
 
-       switch (i) {
+       switch (iftype) {
        case NL80211_IFTYPE_AP:
                he_cap_elem->mac_cap_info[2] |=
                        IEEE80211_HE_MAC_CAP2_BSR;
index b027be0b0b6ff7781ebeab9ee9b5304a4f2f6d15..590b038e449e5c61c9950ce6074fe94020915207 100644 (file)
@@ -26,10 +26,14 @@ struct virtual_nci_dev {
        struct mutex mtx;
        struct sk_buff *send_buff;
        struct wait_queue_head wq;
+       bool running;
 };
 
 static int virtual_nci_open(struct nci_dev *ndev)
 {
+       struct virtual_nci_dev *vdev = nci_get_drvdata(ndev);
+
+       vdev->running = true;
        return 0;
 }
 
@@ -40,6 +44,7 @@ static int virtual_nci_close(struct nci_dev *ndev)
        mutex_lock(&vdev->mtx);
        kfree_skb(vdev->send_buff);
        vdev->send_buff = NULL;
+       vdev->running = false;
        mutex_unlock(&vdev->mtx);
 
        return 0;
@@ -50,7 +55,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
        struct virtual_nci_dev *vdev = nci_get_drvdata(ndev);
 
        mutex_lock(&vdev->mtx);
-       if (vdev->send_buff) {
+       if (vdev->send_buff || !vdev->running) {
                mutex_unlock(&vdev->mtx);
                kfree_skb(skb);
                return -1;
index 539d8920c2029b2146308b45cf7650ad992f72a9..bb0d92461b08b3796df3f2b4078987291bb2f3cf 100644 (file)
@@ -176,7 +176,7 @@ static struct notifier_block parisc_panic_block = {
 static int qemu_power_off(struct sys_off_data *data)
 {
        /* this turns the system off via SeaBIOS */
-       *(int *)data->cb_data = 0;
+       gsc_writel(0, (unsigned long) data->cb_data);
        pdc_soft_power_button(1);
        return NOTIFY_DONE;
 }
index 787354b849c75c34614c41e4bec3a9da235f60ac..4cef568231bf08cde753a83901c0a14be0b97c1d 100644 (file)
@@ -87,7 +87,6 @@ source "drivers/phy/motorola/Kconfig"
 source "drivers/phy/mscc/Kconfig"
 source "drivers/phy/qualcomm/Kconfig"
 source "drivers/phy/ralink/Kconfig"
-source "drivers/phy/realtek/Kconfig"
 source "drivers/phy/renesas/Kconfig"
 source "drivers/phy/rockchip/Kconfig"
 source "drivers/phy/samsung/Kconfig"
index 868a220ed0f6df60ee478df391777a212a210716..fb3dc9de611154abf78ebcf51c055eba03d263b5 100644 (file)
@@ -26,7 +26,6 @@ obj-y                                 += allwinner/   \
                                           mscc/        \
                                           qualcomm/    \
                                           ralink/      \
-                                          realtek/     \
                                           renesas/     \
                                           rockchip/    \
                                           samsung/     \
diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig
deleted file mode 100644 (file)
index 75ac7e7..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Phy drivers for Realtek platforms
-#
-
-if ARCH_REALTEK || COMPILE_TEST
-
-config PHY_RTK_RTD_USB2PHY
-       tristate "Realtek RTD USB2 PHY Transceiver Driver"
-       depends on USB_SUPPORT
-       select GENERIC_PHY
-       select USB_PHY
-       select USB_COMMON
-       help
-         Enable this to support Realtek SoC USB2 phy transceiver.
-         The DHC (digital home center) RTD series SoCs used the Synopsys
-         DWC3 USB IP. This driver will do the PHY initialization
-         of the parameters.
-
-config PHY_RTK_RTD_USB3PHY
-       tristate "Realtek RTD USB3 PHY Transceiver Driver"
-       depends on USB_SUPPORT
-       select GENERIC_PHY
-       select USB_PHY
-       select USB_COMMON
-       help
-         Enable this to support Realtek SoC USB3 phy transceiver.
-         The DHC (digital home center) RTD series SoCs used the Synopsys
-         DWC3 USB IP. This driver will do the PHY initialization
-         of the parameters.
-
-endif # ARCH_REALTEK || COMPILE_TEST
diff --git a/drivers/phy/realtek/Makefile b/drivers/phy/realtek/Makefile
deleted file mode 100644 (file)
index ed7b47f..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_PHY_RTK_RTD_USB2PHY)      += phy-rtk-usb2.o
-obj-$(CONFIG_PHY_RTK_RTD_USB3PHY)      += phy-rtk-usb3.o
diff --git a/drivers/phy/realtek/phy-rtk-usb2.c b/drivers/phy/realtek/phy-rtk-usb2.c
deleted file mode 100644 (file)
index 0a64262..0000000
+++ /dev/null
@@ -1,1325 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  phy-rtk-usb2.c RTK usb2.0 PHY driver
- *
- * Copyright (C) 2023 Realtek Semiconductor Corporation
- *
- */
-
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/platform_device.h>
-#include <linux/uaccess.h>
-#include <linux/debugfs.h>
-#include <linux/nvmem-consumer.h>
-#include <linux/regmap.h>
-#include <linux/sys_soc.h>
-#include <linux/mfd/syscon.h>
-#include <linux/phy/phy.h>
-#include <linux/usb.h>
-#include <linux/usb/phy.h>
-#include <linux/usb/hcd.h>
-
-/* GUSB2PHYACCn register */
-#define PHY_NEW_REG_REQ BIT(25)
-#define PHY_VSTS_BUSY   BIT(23)
-#define PHY_VCTRL_SHIFT 8
-#define PHY_REG_DATA_MASK 0xff
-
-#define GET_LOW_NIBBLE(addr) ((addr) & 0x0f)
-#define GET_HIGH_NIBBLE(addr) (((addr) & 0xf0) >> 4)
-
-#define EFUS_USB_DC_CAL_RATE 2
-#define EFUS_USB_DC_CAL_MAX 7
-
-#define EFUS_USB_DC_DIS_RATE 1
-#define EFUS_USB_DC_DIS_MAX 7
-
-#define MAX_PHY_DATA_SIZE 20
-#define OFFEST_PHY_READ 0x20
-
-#define MAX_USB_PHY_NUM 4
-#define MAX_USB_PHY_PAGE0_DATA_SIZE 16
-#define MAX_USB_PHY_PAGE1_DATA_SIZE 16
-#define MAX_USB_PHY_PAGE2_DATA_SIZE 8
-
-#define SET_PAGE_OFFSET 0xf4
-#define SET_PAGE_0 0x9b
-#define SET_PAGE_1 0xbb
-#define SET_PAGE_2 0xdb
-
-#define PAGE_START 0xe0
-#define PAGE0_0XE4 0xe4
-#define PAGE0_0XE6 0xe6
-#define PAGE0_0XE7 0xe7
-#define PAGE1_0XE0 0xe0
-#define PAGE1_0XE2 0xe2
-
-#define SENSITIVITY_CTRL (BIT(4) | BIT(5) | BIT(6))
-#define ENABLE_AUTO_SENSITIVITY_CALIBRATION BIT(2)
-#define DEFAULT_DC_DRIVING_VALUE (0x8)
-#define DEFAULT_DC_DISCONNECTION_VALUE (0x6)
-#define HS_CLK_SELECT BIT(6)
-
-struct phy_reg {
-       void __iomem *reg_wrap_vstatus;
-       void __iomem *reg_gusb2phyacc0;
-       int vstatus_index;
-};
-
-struct phy_data {
-       u8 addr;
-       u8 data;
-};
-
-struct phy_cfg {
-       int page0_size;
-       struct phy_data page0[MAX_USB_PHY_PAGE0_DATA_SIZE];
-       int page1_size;
-       struct phy_data page1[MAX_USB_PHY_PAGE1_DATA_SIZE];
-       int page2_size;
-       struct phy_data page2[MAX_USB_PHY_PAGE2_DATA_SIZE];
-
-       int num_phy;
-
-       bool check_efuse;
-       int check_efuse_version;
-#define CHECK_EFUSE_V1 1
-#define CHECK_EFUSE_V2 2
-       int efuse_dc_driving_rate;
-       int efuse_dc_disconnect_rate;
-       int dc_driving_mask;
-       int dc_disconnect_mask;
-       bool usb_dc_disconnect_at_page0;
-       int driving_updated_for_dev_dis;
-
-       bool do_toggle;
-       bool do_toggle_driving;
-       bool use_default_parameter;
-       bool is_double_sensitivity_mode;
-};
-
-struct phy_parameter {
-       struct phy_reg phy_reg;
-
-       /* Get from efuse */
-       s8 efuse_usb_dc_cal;
-       s8 efuse_usb_dc_dis;
-
-       /* Get from dts */
-       bool inverse_hstx_sync_clock;
-       u32 driving_level;
-       s32 driving_level_compensate;
-       s32 disconnection_compensate;
-};
-
-struct rtk_phy {
-       struct usb_phy phy;
-       struct device *dev;
-
-       struct phy_cfg *phy_cfg;
-       int num_phy;
-       struct phy_parameter *phy_parameter;
-
-       struct dentry *debug_dir;
-};
-
-/* mapping 0xE0 to 0 ... 0xE7 to 7, 0xF0 to 8 ,,, 0xF7 to 15 */
-static inline int page_addr_to_array_index(u8 addr)
-{
-       return (int)((((addr) - PAGE_START) & 0x7) +
-               ((((addr) - PAGE_START) & 0x10) >> 1));
-}
-
-static inline u8 array_index_to_page_addr(int index)
-{
-       return ((((index) + PAGE_START) & 0x7) +
-               ((((index) & 0x8) << 1) + PAGE_START));
-}
-
-#define PHY_IO_TIMEOUT_USEC            (50000)
-#define PHY_IO_DELAY_US                        (100)
-
-static inline int utmi_wait_register(void __iomem *reg, u32 mask, u32 result)
-{
-       int ret;
-       unsigned int val;
-
-       ret = read_poll_timeout(readl, val, ((val & mask) == result),
-                               PHY_IO_DELAY_US, PHY_IO_TIMEOUT_USEC, false, reg);
-       if (ret) {
-               pr_err("%s can't program USB phy\n", __func__);
-               return -ETIMEDOUT;
-       }
-
-       return 0;
-}
-
-static char rtk_phy_read(struct phy_reg *phy_reg, char addr)
-{
-       void __iomem *reg_gusb2phyacc0 = phy_reg->reg_gusb2phyacc0;
-       unsigned int val;
-       int ret = 0;
-
-       addr -= OFFEST_PHY_READ;
-
-       /* polling until VBusy == 0 */
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return (char)ret;
-
-       /* VCtrl = low nibble of addr, and set PHY_NEW_REG_REQ */
-       val = PHY_NEW_REG_REQ | (GET_LOW_NIBBLE(addr) << PHY_VCTRL_SHIFT);
-       writel(val, reg_gusb2phyacc0);
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return (char)ret;
-
-       /* VCtrl = high nibble of addr, and set PHY_NEW_REG_REQ */
-       val = PHY_NEW_REG_REQ | (GET_HIGH_NIBBLE(addr) << PHY_VCTRL_SHIFT);
-       writel(val, reg_gusb2phyacc0);
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return (char)ret;
-
-       val = readl(reg_gusb2phyacc0);
-
-       return (char)(val & PHY_REG_DATA_MASK);
-}
-
-static int rtk_phy_write(struct phy_reg *phy_reg, char addr, char data)
-{
-       unsigned int val;
-       void __iomem *reg_wrap_vstatus = phy_reg->reg_wrap_vstatus;
-       void __iomem *reg_gusb2phyacc0 = phy_reg->reg_gusb2phyacc0;
-       int shift_bits = phy_reg->vstatus_index * 8;
-       int ret = 0;
-
-       /* write data to VStatusOut2 (data output to phy) */
-       writel((u32)data << shift_bits, reg_wrap_vstatus);
-
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return ret;
-
-       /* VCtrl = low nibble of addr, set PHY_NEW_REG_REQ */
-       val = PHY_NEW_REG_REQ | (GET_LOW_NIBBLE(addr) << PHY_VCTRL_SHIFT);
-
-       writel(val, reg_gusb2phyacc0);
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return ret;
-
-       /* VCtrl = high nibble of addr, set PHY_NEW_REG_REQ */
-       val = PHY_NEW_REG_REQ | (GET_HIGH_NIBBLE(addr) << PHY_VCTRL_SHIFT);
-
-       writel(val, reg_gusb2phyacc0);
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-static int rtk_phy_set_page(struct phy_reg *phy_reg, int page)
-{
-       switch (page) {
-       case 0:
-               return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_0);
-       case 1:
-               return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_1);
-       case 2:
-               return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_2);
-       default:
-               pr_err("%s error page=%d\n", __func__, page);
-       }
-
-       return -EINVAL;
-}
-
-static u8 __updated_dc_disconnect_level_page0_0xe4(struct phy_cfg *phy_cfg,
-                                                  struct phy_parameter *phy_parameter, u8 data)
-{
-       u8 ret;
-       s32 val;
-       s32 dc_disconnect_mask = phy_cfg->dc_disconnect_mask;
-       int offset = 4;
-
-       val = (s32)((data >> offset) & dc_disconnect_mask)
-                    + phy_parameter->efuse_usb_dc_dis
-                    + phy_parameter->disconnection_compensate;
-
-       if (val > dc_disconnect_mask)
-               val = dc_disconnect_mask;
-       else if (val < 0)
-               val = 0;
-
-       ret = (data & (~(dc_disconnect_mask << offset))) |
-                   (val & dc_disconnect_mask) << offset;
-
-       return ret;
-}
-
-/* updated disconnect level at page0 */
-static void update_dc_disconnect_level_at_page0(struct rtk_phy *rtk_phy,
-                                               struct phy_parameter *phy_parameter, bool update)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-       struct phy_data *phy_data_page;
-       struct phy_data *phy_data;
-       u8 addr, data;
-       int offset = 4;
-       s32 dc_disconnect_mask;
-       int i;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_reg = &phy_parameter->phy_reg;
-
-       /* Set page 0 */
-       phy_data_page = phy_cfg->page0;
-       rtk_phy_set_page(phy_reg, 0);
-
-       i = page_addr_to_array_index(PAGE0_0XE4);
-       phy_data = phy_data_page + i;
-       if (!phy_data->addr) {
-               phy_data->addr = PAGE0_0XE4;
-               phy_data->data = rtk_phy_read(phy_reg, PAGE0_0XE4);
-       }
-
-       addr = phy_data->addr;
-       data = phy_data->data;
-       dc_disconnect_mask = phy_cfg->dc_disconnect_mask;
-
-       if (update)
-               data = __updated_dc_disconnect_level_page0_0xe4(phy_cfg, phy_parameter, data);
-       else
-               data = (data & ~(dc_disconnect_mask << offset)) |
-                       (DEFAULT_DC_DISCONNECTION_VALUE << offset);
-
-       if (rtk_phy_write(phy_reg, addr, data))
-               dev_err(rtk_phy->dev,
-                       "%s: Error to set page1 parameter addr=0x%x value=0x%x\n",
-                       __func__, addr, data);
-}
-
-static u8 __updated_dc_disconnect_level_page1_0xe2(struct phy_cfg *phy_cfg,
-                                                  struct phy_parameter *phy_parameter, u8 data)
-{
-       u8 ret;
-       s32 val;
-       s32 dc_disconnect_mask = phy_cfg->dc_disconnect_mask;
-
-       if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) {
-               val = (s32)(data & dc_disconnect_mask)
-                           + phy_parameter->efuse_usb_dc_dis
-                           + phy_parameter->disconnection_compensate;
-       } else { /* for CHECK_EFUSE_V2 or no efuse */
-               if (phy_parameter->efuse_usb_dc_dis)
-                       val = (s32)(phy_parameter->efuse_usb_dc_dis +
-                                   phy_parameter->disconnection_compensate);
-               else
-                       val = (s32)((data & dc_disconnect_mask) +
-                                   phy_parameter->disconnection_compensate);
-       }
-
-       if (val > dc_disconnect_mask)
-               val = dc_disconnect_mask;
-       else if (val < 0)
-               val = 0;
-
-       ret = (data & (~dc_disconnect_mask)) | (val & dc_disconnect_mask);
-
-       return ret;
-}
-
-/* updated disconnect level at page1 */
-static void update_dc_disconnect_level_at_page1(struct rtk_phy *rtk_phy,
-                                               struct phy_parameter *phy_parameter, bool update)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_data *phy_data_page;
-       struct phy_data *phy_data;
-       struct phy_reg *phy_reg;
-       u8 addr, data;
-       s32 dc_disconnect_mask;
-       int i;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_reg = &phy_parameter->phy_reg;
-
-       /* Set page 1 */
-       phy_data_page = phy_cfg->page1;
-       rtk_phy_set_page(phy_reg, 1);
-
-       i = page_addr_to_array_index(PAGE1_0XE2);
-       phy_data = phy_data_page + i;
-       if (!phy_data->addr) {
-               phy_data->addr = PAGE1_0XE2;
-               phy_data->data = rtk_phy_read(phy_reg, PAGE1_0XE2);
-       }
-
-       addr = phy_data->addr;
-       data = phy_data->data;
-       dc_disconnect_mask = phy_cfg->dc_disconnect_mask;
-
-       if (update)
-               data = __updated_dc_disconnect_level_page1_0xe2(phy_cfg, phy_parameter, data);
-       else
-               data = (data & ~dc_disconnect_mask) | DEFAULT_DC_DISCONNECTION_VALUE;
-
-       if (rtk_phy_write(phy_reg, addr, data))
-               dev_err(rtk_phy->dev,
-                       "%s: Error to set page1 parameter addr=0x%x value=0x%x\n",
-                       __func__, addr, data);
-}
-
-static void update_dc_disconnect_level(struct rtk_phy *rtk_phy,
-                                      struct phy_parameter *phy_parameter, bool update)
-{
-       struct phy_cfg *phy_cfg = rtk_phy->phy_cfg;
-
-       if (phy_cfg->usb_dc_disconnect_at_page0)
-               update_dc_disconnect_level_at_page0(rtk_phy, phy_parameter, update);
-       else
-               update_dc_disconnect_level_at_page1(rtk_phy, phy_parameter, update);
-}
-
-static u8 __update_dc_driving_page0_0xe4(struct phy_cfg *phy_cfg,
-                                        struct phy_parameter *phy_parameter, u8 data)
-{
-       s32 driving_level_compensate = phy_parameter->driving_level_compensate;
-       s32 dc_driving_mask = phy_cfg->dc_driving_mask;
-       s32 val;
-       u8 ret;
-
-       if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) {
-               val = (s32)(data & dc_driving_mask) + driving_level_compensate
-                           + phy_parameter->efuse_usb_dc_cal;
-       } else { /* for CHECK_EFUSE_V2 or no efuse */
-               if (phy_parameter->efuse_usb_dc_cal)
-                       val = (s32)((phy_parameter->efuse_usb_dc_cal & dc_driving_mask)
-                                   + driving_level_compensate);
-               else
-                       val = (s32)(data & dc_driving_mask);
-       }
-
-       if (val > dc_driving_mask)
-               val = dc_driving_mask;
-       else if (val < 0)
-               val = 0;
-
-       ret = (data & (~dc_driving_mask)) | (val & dc_driving_mask);
-
-       return ret;
-}
-
-static void update_dc_driving_level(struct rtk_phy *rtk_phy,
-                                   struct phy_parameter *phy_parameter)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-
-       phy_reg = &phy_parameter->phy_reg;
-       phy_cfg = rtk_phy->phy_cfg;
-       if (!phy_cfg->page0[4].addr) {
-               rtk_phy_set_page(phy_reg, 0);
-               phy_cfg->page0[4].addr = PAGE0_0XE4;
-               phy_cfg->page0[4].data = rtk_phy_read(phy_reg, PAGE0_0XE4);
-       }
-
-       if (phy_parameter->driving_level != DEFAULT_DC_DRIVING_VALUE) {
-               u32 dc_driving_mask;
-               u8 driving_level;
-               u8 data;
-
-               data = phy_cfg->page0[4].data;
-               dc_driving_mask = phy_cfg->dc_driving_mask;
-               driving_level = data & dc_driving_mask;
-
-               dev_dbg(rtk_phy->dev, "%s driving_level=%d => dts driving_level=%d\n",
-                       __func__, driving_level, phy_parameter->driving_level);
-
-               phy_cfg->page0[4].data = (data & (~dc_driving_mask)) |
-                           (phy_parameter->driving_level & dc_driving_mask);
-       }
-
-       phy_cfg->page0[4].data = __update_dc_driving_page0_0xe4(phy_cfg,
-                                                               phy_parameter,
-                                                               phy_cfg->page0[4].data);
-}
-
-static void update_hs_clk_select(struct rtk_phy *rtk_phy,
-                                struct phy_parameter *phy_parameter)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (phy_parameter->inverse_hstx_sync_clock) {
-               if (!phy_cfg->page0[6].addr) {
-                       rtk_phy_set_page(phy_reg, 0);
-                       phy_cfg->page0[6].addr = PAGE0_0XE6;
-                       phy_cfg->page0[6].data = rtk_phy_read(phy_reg, PAGE0_0XE6);
-               }
-
-               phy_cfg->page0[6].data = phy_cfg->page0[6].data | HS_CLK_SELECT;
-       }
-}
-
-static void do_rtk_phy_toggle(struct rtk_phy *rtk_phy,
-                             int index, bool connect)
-{
-       struct phy_parameter *phy_parameter;
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-       struct phy_data *phy_data_page;
-       u8 addr, data;
-       int i;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (!phy_cfg->do_toggle)
-               goto out;
-
-       if (phy_cfg->is_double_sensitivity_mode)
-               goto do_toggle_driving;
-
-       /* Set page 0 */
-       rtk_phy_set_page(phy_reg, 0);
-
-       addr = PAGE0_0XE7;
-       data = rtk_phy_read(phy_reg, addr);
-
-       if (connect)
-               rtk_phy_write(phy_reg, addr, data & (~SENSITIVITY_CTRL));
-       else
-               rtk_phy_write(phy_reg, addr, data | (SENSITIVITY_CTRL));
-
-do_toggle_driving:
-
-       if (!phy_cfg->do_toggle_driving)
-               goto do_toggle;
-
-       /* Page 0 addr 0xE4 driving capability */
-
-       /* Set page 0 */
-       phy_data_page = phy_cfg->page0;
-       rtk_phy_set_page(phy_reg, 0);
-
-       i = page_addr_to_array_index(PAGE0_0XE4);
-       addr = phy_data_page[i].addr;
-       data = phy_data_page[i].data;
-
-       if (connect) {
-               rtk_phy_write(phy_reg, addr, data);
-       } else {
-               u8 value;
-               s32 tmp;
-               s32 driving_updated =
-                           phy_cfg->driving_updated_for_dev_dis;
-               s32 dc_driving_mask = phy_cfg->dc_driving_mask;
-
-               tmp = (s32)(data & dc_driving_mask) + driving_updated;
-
-               if (tmp > dc_driving_mask)
-                       tmp = dc_driving_mask;
-               else if (tmp < 0)
-                       tmp = 0;
-
-               value = (data & (~dc_driving_mask)) | (tmp & dc_driving_mask);
-
-               rtk_phy_write(phy_reg, addr, value);
-       }
-
-do_toggle:
-       /* restore dc disconnect level before toggle */
-       update_dc_disconnect_level(rtk_phy, phy_parameter, false);
-
-       /* Set page 1 */
-       rtk_phy_set_page(phy_reg, 1);
-
-       addr = PAGE1_0XE0;
-       data = rtk_phy_read(phy_reg, addr);
-
-       rtk_phy_write(phy_reg, addr, data &
-                     (~ENABLE_AUTO_SENSITIVITY_CALIBRATION));
-       mdelay(1);
-       rtk_phy_write(phy_reg, addr, data |
-                     (ENABLE_AUTO_SENSITIVITY_CALIBRATION));
-
-       /* update dc disconnect level after toggle */
-       update_dc_disconnect_level(rtk_phy, phy_parameter, true);
-
-out:
-       return;
-}
-
-static int do_rtk_phy_init(struct rtk_phy *rtk_phy, int index)
-{
-       struct phy_parameter *phy_parameter;
-       struct phy_cfg *phy_cfg;
-       struct phy_data *phy_data_page;
-       struct phy_reg *phy_reg;
-       int i;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (phy_cfg->use_default_parameter) {
-               dev_dbg(rtk_phy->dev, "%s phy#%d use default parameter\n",
-                       __func__, index);
-               goto do_toggle;
-       }
-
-       /* Set page 0 */
-       phy_data_page = phy_cfg->page0;
-       rtk_phy_set_page(phy_reg, 0);
-
-       for (i = 0; i < phy_cfg->page0_size; i++) {
-               struct phy_data *phy_data = phy_data_page + i;
-               u8 addr = phy_data->addr;
-               u8 data = phy_data->data;
-
-               if (!addr)
-                       continue;
-
-               if (rtk_phy_write(phy_reg, addr, data)) {
-                       dev_err(rtk_phy->dev,
-                               "%s: Error to set page0 parameter addr=0x%x value=0x%x\n",
-                               __func__, addr, data);
-                       return -EINVAL;
-               }
-       }
-
-       /* Set page 1 */
-       phy_data_page = phy_cfg->page1;
-       rtk_phy_set_page(phy_reg, 1);
-
-       for (i = 0; i < phy_cfg->page1_size; i++) {
-               struct phy_data *phy_data = phy_data_page + i;
-               u8 addr = phy_data->addr;
-               u8 data = phy_data->data;
-
-               if (!addr)
-                       continue;
-
-               if (rtk_phy_write(phy_reg, addr, data)) {
-                       dev_err(rtk_phy->dev,
-                               "%s: Error to set page1 parameter addr=0x%x value=0x%x\n",
-                               __func__, addr, data);
-                       return -EINVAL;
-               }
-       }
-
-       if (phy_cfg->page2_size == 0)
-               goto do_toggle;
-
-       /* Set page 2 */
-       phy_data_page = phy_cfg->page2;
-       rtk_phy_set_page(phy_reg, 2);
-
-       for (i = 0; i < phy_cfg->page2_size; i++) {
-               struct phy_data *phy_data = phy_data_page + i;
-               u8 addr = phy_data->addr;
-               u8 data = phy_data->data;
-
-               if (!addr)
-                       continue;
-
-               if (rtk_phy_write(phy_reg, addr, data)) {
-                       dev_err(rtk_phy->dev,
-                               "%s: Error to set page2 parameter addr=0x%x value=0x%x\n",
-                               __func__, addr, data);
-                       return -EINVAL;
-               }
-       }
-
-do_toggle:
-       do_rtk_phy_toggle(rtk_phy, index, false);
-
-       return 0;
-}
-
-static int rtk_phy_init(struct phy *phy)
-{
-       struct rtk_phy *rtk_phy = phy_get_drvdata(phy);
-       unsigned long phy_init_time = jiffies;
-       int i, ret = 0;
-
-       if (!rtk_phy)
-               return -EINVAL;
-
-       for (i = 0; i < rtk_phy->num_phy; i++)
-               ret = do_rtk_phy_init(rtk_phy, i);
-
-       dev_dbg(rtk_phy->dev, "Initialized RTK USB 2.0 PHY (take %dms)\n",
-               jiffies_to_msecs(jiffies - phy_init_time));
-       return ret;
-}
-
-static int rtk_phy_exit(struct phy *phy)
-{
-       return 0;
-}
-
-static const struct phy_ops ops = {
-       .init           = rtk_phy_init,
-       .exit           = rtk_phy_exit,
-       .owner          = THIS_MODULE,
-};
-
-static void rtk_phy_toggle(struct usb_phy *usb2_phy, bool connect, int port)
-{
-       int index = port;
-       struct rtk_phy *rtk_phy = NULL;
-
-       rtk_phy = dev_get_drvdata(usb2_phy->dev);
-
-       if (index > rtk_phy->num_phy) {
-               dev_err(rtk_phy->dev, "%s: The port=%d is not in usb phy (num_phy=%d)\n",
-                       __func__, index, rtk_phy->num_phy);
-               return;
-       }
-
-       do_rtk_phy_toggle(rtk_phy, index, connect);
-}
-
-static int rtk_phy_notify_port_status(struct usb_phy *x, int port,
-                                     u16 portstatus, u16 portchange)
-{
-       bool connect = false;
-
-       pr_debug("%s port=%d portstatus=0x%x portchange=0x%x\n",
-                __func__, port, (int)portstatus, (int)portchange);
-       if (portstatus & USB_PORT_STAT_CONNECTION)
-               connect = true;
-
-       if (portchange & USB_PORT_STAT_C_CONNECTION)
-               rtk_phy_toggle(x, connect, port);
-
-       return 0;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static struct dentry *create_phy_debug_root(void)
-{
-       struct dentry *phy_debug_root;
-
-       phy_debug_root = debugfs_lookup("phy", usb_debug_root);
-       if (!phy_debug_root)
-               phy_debug_root = debugfs_create_dir("phy", usb_debug_root);
-
-       return phy_debug_root;
-}
-
-static int rtk_usb2_parameter_show(struct seq_file *s, void *unused)
-{
-       struct rtk_phy *rtk_phy = s->private;
-       struct phy_cfg *phy_cfg;
-       int i, index;
-
-       phy_cfg = rtk_phy->phy_cfg;
-
-       seq_puts(s, "Property:\n");
-       seq_printf(s, "  check_efuse: %s\n",
-                  phy_cfg->check_efuse ? "Enable" : "Disable");
-       seq_printf(s, "  check_efuse_version: %d\n",
-                  phy_cfg->check_efuse_version);
-       seq_printf(s, "  efuse_dc_driving_rate: %d\n",
-                  phy_cfg->efuse_dc_driving_rate);
-       seq_printf(s, "  dc_driving_mask: 0x%x\n",
-                  phy_cfg->dc_driving_mask);
-       seq_printf(s, "  efuse_dc_disconnect_rate: %d\n",
-                  phy_cfg->efuse_dc_disconnect_rate);
-       seq_printf(s, "  dc_disconnect_mask: 0x%x\n",
-                  phy_cfg->dc_disconnect_mask);
-       seq_printf(s, "  usb_dc_disconnect_at_page0: %s\n",
-                  phy_cfg->usb_dc_disconnect_at_page0 ? "true" : "false");
-       seq_printf(s, "  do_toggle: %s\n",
-                  phy_cfg->do_toggle ? "Enable" : "Disable");
-       seq_printf(s, "  do_toggle_driving: %s\n",
-                  phy_cfg->do_toggle_driving ? "Enable" : "Disable");
-       seq_printf(s, "  driving_updated_for_dev_dis: 0x%x\n",
-                  phy_cfg->driving_updated_for_dev_dis);
-       seq_printf(s, "  use_default_parameter: %s\n",
-                  phy_cfg->use_default_parameter ? "Enable" : "Disable");
-       seq_printf(s, "  is_double_sensitivity_mode: %s\n",
-                  phy_cfg->is_double_sensitivity_mode ? "Enable" : "Disable");
-
-       for (index = 0; index < rtk_phy->num_phy; index++) {
-               struct phy_parameter *phy_parameter;
-               struct phy_reg *phy_reg;
-               struct phy_data *phy_data_page;
-
-               phy_parameter =  &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-               phy_reg = &phy_parameter->phy_reg;
-
-               seq_printf(s, "PHY %d:\n", index);
-
-               seq_puts(s, "Page 0:\n");
-               /* Set page 0 */
-               phy_data_page = phy_cfg->page0;
-               rtk_phy_set_page(phy_reg, 0);
-
-               for (i = 0; i < phy_cfg->page0_size; i++) {
-                       struct phy_data *phy_data = phy_data_page + i;
-                       u8 addr = array_index_to_page_addr(i);
-                       u8 data = phy_data->data;
-                       u8 value = rtk_phy_read(phy_reg, addr);
-
-                       if (phy_data->addr)
-                               seq_printf(s, "  Page 0: addr=0x%x data=0x%02x ==> read value=0x%02x\n",
-                                          addr, data, value);
-                       else
-                               seq_printf(s, "  Page 0: addr=0x%x data=none ==> read value=0x%02x\n",
-                                          addr, value);
-               }
-
-               seq_puts(s, "Page 1:\n");
-               /* Set page 1 */
-               phy_data_page = phy_cfg->page1;
-               rtk_phy_set_page(phy_reg, 1);
-
-               for (i = 0; i < phy_cfg->page1_size; i++) {
-                       struct phy_data *phy_data = phy_data_page + i;
-                       u8 addr = array_index_to_page_addr(i);
-                       u8 data = phy_data->data;
-                       u8 value = rtk_phy_read(phy_reg, addr);
-
-                       if (phy_data->addr)
-                               seq_printf(s, "  Page 1: addr=0x%x data=0x%02x ==> read value=0x%02x\n",
-                                          addr, data, value);
-                       else
-                               seq_printf(s, "  Page 1: addr=0x%x data=none ==> read value=0x%02x\n",
-                                          addr, value);
-               }
-
-               if (phy_cfg->page2_size == 0)
-                       goto out;
-
-               seq_puts(s, "Page 2:\n");
-               /* Set page 2 */
-               phy_data_page = phy_cfg->page2;
-               rtk_phy_set_page(phy_reg, 2);
-
-               for (i = 0; i < phy_cfg->page2_size; i++) {
-                       struct phy_data *phy_data = phy_data_page + i;
-                       u8 addr = array_index_to_page_addr(i);
-                       u8 data = phy_data->data;
-                       u8 value = rtk_phy_read(phy_reg, addr);
-
-                       if (phy_data->addr)
-                               seq_printf(s, "  Page 2: addr=0x%x data=0x%02x ==> read value=0x%02x\n",
-                                          addr, data, value);
-                       else
-                               seq_printf(s, "  Page 2: addr=0x%x data=none ==> read value=0x%02x\n",
-                                          addr, value);
-               }
-
-out:
-               seq_puts(s, "PHY Property:\n");
-               seq_printf(s, "  efuse_usb_dc_cal: %d\n",
-                          (int)phy_parameter->efuse_usb_dc_cal);
-               seq_printf(s, "  efuse_usb_dc_dis: %d\n",
-                          (int)phy_parameter->efuse_usb_dc_dis);
-               seq_printf(s, "  inverse_hstx_sync_clock: %s\n",
-                          phy_parameter->inverse_hstx_sync_clock ? "Enable" : "Disable");
-               seq_printf(s, "  driving_level: %d\n",
-                          phy_parameter->driving_level);
-               seq_printf(s, "  driving_level_compensate: %d\n",
-                          phy_parameter->driving_level_compensate);
-               seq_printf(s, "  disconnection_compensate: %d\n",
-                          phy_parameter->disconnection_compensate);
-       }
-
-       return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(rtk_usb2_parameter);
-
-static inline void create_debug_files(struct rtk_phy *rtk_phy)
-{
-       struct dentry *phy_debug_root = NULL;
-
-       phy_debug_root = create_phy_debug_root();
-       if (!phy_debug_root)
-               return;
-
-       rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev),
-                                               phy_debug_root);
-
-       debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy,
-                           &rtk_usb2_parameter_fops);
-
-       return;
-}
-
-static inline void remove_debug_files(struct rtk_phy *rtk_phy)
-{
-       debugfs_remove_recursive(rtk_phy->debug_dir);
-}
-#else
-static inline void create_debug_files(struct rtk_phy *rtk_phy) { }
-static inline void remove_debug_files(struct rtk_phy *rtk_phy) { }
-#endif /* CONFIG_DEBUG_FS */
-
-static int get_phy_data_by_efuse(struct rtk_phy *rtk_phy,
-                                struct phy_parameter *phy_parameter, int index)
-{
-       struct phy_cfg *phy_cfg = rtk_phy->phy_cfg;
-       u8 value = 0;
-       struct nvmem_cell *cell;
-       struct soc_device_attribute rtk_soc_groot[] = {
-                   { .family = "Realtek Groot",},
-                   { /* empty */ } };
-
-       if (!phy_cfg->check_efuse)
-               goto out;
-
-       /* Read efuse for usb dc cal */
-       cell = nvmem_cell_get(rtk_phy->dev, "usb-dc-cal");
-       if (IS_ERR(cell)) {
-               dev_dbg(rtk_phy->dev, "%s no usb-dc-cal: %ld\n",
-                       __func__, PTR_ERR(cell));
-       } else {
-               unsigned char *buf;
-               size_t buf_size;
-
-               buf = nvmem_cell_read(cell, &buf_size);
-               if (!IS_ERR(buf)) {
-                       value = buf[0] & phy_cfg->dc_driving_mask;
-                       kfree(buf);
-               }
-               nvmem_cell_put(cell);
-       }
-
-       if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) {
-               int rate = phy_cfg->efuse_dc_driving_rate;
-
-               if (value <= EFUS_USB_DC_CAL_MAX)
-                       phy_parameter->efuse_usb_dc_cal = (int8_t)(value * rate);
-               else
-                       phy_parameter->efuse_usb_dc_cal = -(int8_t)
-                                   ((EFUS_USB_DC_CAL_MAX & value) * rate);
-
-               if (soc_device_match(rtk_soc_groot)) {
-                       dev_dbg(rtk_phy->dev, "For groot IC we need a workaround to adjust efuse_usb_dc_cal\n");
-
-                       /* We don't multiple dc_cal_rate=2 for positive dc cal compensate */
-                       if (value <= EFUS_USB_DC_CAL_MAX)
-                               phy_parameter->efuse_usb_dc_cal = (int8_t)(value);
-
-                       /* We set max dc cal compensate is 0x8 if otp is 0x7 */
-                       if (value == 0x7)
-                               phy_parameter->efuse_usb_dc_cal = (int8_t)(value + 1);
-               }
-       } else { /* for CHECK_EFUSE_V2 */
-               phy_parameter->efuse_usb_dc_cal = value & phy_cfg->dc_driving_mask;
-       }
-
-       /* Read efuse for usb dc disconnect level */
-       value = 0;
-       cell = nvmem_cell_get(rtk_phy->dev, "usb-dc-dis");
-       if (IS_ERR(cell)) {
-               dev_dbg(rtk_phy->dev, "%s no usb-dc-dis: %ld\n",
-                       __func__, PTR_ERR(cell));
-       } else {
-               unsigned char *buf;
-               size_t buf_size;
-
-               buf = nvmem_cell_read(cell, &buf_size);
-               if (!IS_ERR(buf)) {
-                       value = buf[0] & phy_cfg->dc_disconnect_mask;
-                       kfree(buf);
-               }
-               nvmem_cell_put(cell);
-       }
-
-       if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) {
-               int rate = phy_cfg->efuse_dc_disconnect_rate;
-
-               if (value <= EFUS_USB_DC_DIS_MAX)
-                       phy_parameter->efuse_usb_dc_dis = (int8_t)(value * rate);
-               else
-                       phy_parameter->efuse_usb_dc_dis = -(int8_t)
-                                   ((EFUS_USB_DC_DIS_MAX & value) * rate);
-       } else { /* for CHECK_EFUSE_V2 */
-               phy_parameter->efuse_usb_dc_dis = value & phy_cfg->dc_disconnect_mask;
-       }
-
-out:
-       return 0;
-}
-
-static int parse_phy_data(struct rtk_phy *rtk_phy)
-{
-       struct device *dev = rtk_phy->dev;
-       struct device_node *np = dev->of_node;
-       struct phy_parameter *phy_parameter;
-       int ret = 0;
-       int index;
-
-       rtk_phy->phy_parameter = devm_kzalloc(dev, sizeof(struct phy_parameter) *
-                                               rtk_phy->num_phy, GFP_KERNEL);
-       if (!rtk_phy->phy_parameter)
-               return -ENOMEM;
-
-       for (index = 0; index < rtk_phy->num_phy; index++) {
-               phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-
-               phy_parameter->phy_reg.reg_wrap_vstatus = of_iomap(np, 0);
-               phy_parameter->phy_reg.reg_gusb2phyacc0 = of_iomap(np, 1) + index;
-               phy_parameter->phy_reg.vstatus_index = index;
-
-               if (of_property_read_bool(np, "realtek,inverse-hstx-sync-clock"))
-                       phy_parameter->inverse_hstx_sync_clock = true;
-               else
-                       phy_parameter->inverse_hstx_sync_clock = false;
-
-               if (of_property_read_u32_index(np, "realtek,driving-level",
-                                              index, &phy_parameter->driving_level))
-                       phy_parameter->driving_level = DEFAULT_DC_DRIVING_VALUE;
-
-               if (of_property_read_u32_index(np, "realtek,driving-level-compensate",
-                                              index, &phy_parameter->driving_level_compensate))
-                       phy_parameter->driving_level_compensate = 0;
-
-               if (of_property_read_u32_index(np, "realtek,disconnection-compensate",
-                                              index, &phy_parameter->disconnection_compensate))
-                       phy_parameter->disconnection_compensate = 0;
-
-               get_phy_data_by_efuse(rtk_phy, phy_parameter, index);
-
-               update_dc_driving_level(rtk_phy, phy_parameter);
-
-               update_hs_clk_select(rtk_phy, phy_parameter);
-       }
-
-       return ret;
-}
-
-static int rtk_usb2phy_probe(struct platform_device *pdev)
-{
-       struct rtk_phy *rtk_phy;
-       struct device *dev = &pdev->dev;
-       struct phy *generic_phy;
-       struct phy_provider *phy_provider;
-       const struct phy_cfg *phy_cfg;
-       int ret = 0;
-
-       phy_cfg = of_device_get_match_data(dev);
-       if (!phy_cfg) {
-               dev_err(dev, "phy config are not assigned!\n");
-               return -EINVAL;
-       }
-
-       rtk_phy = devm_kzalloc(dev, sizeof(*rtk_phy), GFP_KERNEL);
-       if (!rtk_phy)
-               return -ENOMEM;
-
-       rtk_phy->dev                    = &pdev->dev;
-       rtk_phy->phy.dev                = rtk_phy->dev;
-       rtk_phy->phy.label              = "rtk-usb2phy";
-       rtk_phy->phy.notify_port_status = rtk_phy_notify_port_status;
-
-       rtk_phy->phy_cfg = devm_kzalloc(dev, sizeof(*phy_cfg), GFP_KERNEL);
-
-       memcpy(rtk_phy->phy_cfg, phy_cfg, sizeof(*phy_cfg));
-
-       rtk_phy->num_phy = phy_cfg->num_phy;
-
-       ret = parse_phy_data(rtk_phy);
-       if (ret)
-               goto err;
-
-       platform_set_drvdata(pdev, rtk_phy);
-
-       generic_phy = devm_phy_create(rtk_phy->dev, NULL, &ops);
-       if (IS_ERR(generic_phy))
-               return PTR_ERR(generic_phy);
-
-       phy_set_drvdata(generic_phy, rtk_phy);
-
-       phy_provider = devm_of_phy_provider_register(rtk_phy->dev,
-                                                    of_phy_simple_xlate);
-       if (IS_ERR(phy_provider))
-               return PTR_ERR(phy_provider);
-
-       ret = usb_add_phy_dev(&rtk_phy->phy);
-       if (ret)
-               goto err;
-
-       create_debug_files(rtk_phy);
-
-err:
-       return ret;
-}
-
-static void rtk_usb2phy_remove(struct platform_device *pdev)
-{
-       struct rtk_phy *rtk_phy = platform_get_drvdata(pdev);
-
-       remove_debug_files(rtk_phy);
-
-       usb_remove_phy(&rtk_phy->phy);
-}
-
-static const struct phy_cfg rtd1295_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0x90},
-                  [3] = {0xe3, 0x3a},
-                  [4] = {0xe4, 0x68},
-                  [6] = {0xe6, 0x91},
-                 [13] = {0xf5, 0x81},
-                 [15] = {0xf7, 0x02}, },
-       .page1_size = 8,
-       .page1 = { /* default parameter */ },
-       .page2_size = 0,
-       .page2 = { /* no parameter */ },
-       .num_phy = 1,
-       .check_efuse = false,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = false,
-};
-
-static const struct phy_cfg rtd1395_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [4] = {0xe4, 0xac},
-                 [13] = {0xf5, 0x00},
-                 [15] = {0xf7, 0x02}, },
-       .page1_size = 8,
-       .page1 = { /* default parameter */ },
-       .page2_size = 0,
-       .page2 = { /* no parameter */ },
-       .num_phy = 1,
-       .check_efuse = false,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = false,
-};
-
-static const struct phy_cfg rtd1395_phy_cfg_2port = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [4] = {0xe4, 0xac},
-                 [13] = {0xf5, 0x00},
-                 [15] = {0xf7, 0x02}, },
-       .page1_size = 8,
-       .page1 = { /* default parameter */ },
-       .page2_size = 0,
-       .page2 = { /* no parameter */ },
-       .num_phy = 2,
-       .check_efuse = false,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = false,
-};
-
-static const struct phy_cfg rtd1619_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [4] = {0xe4, 0x68}, },
-       .page1_size = 8,
-       .page1 = { /* default parameter */ },
-       .page2_size = 0,
-       .page2 = { /* no parameter */ },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = false,
-};
-
-static const struct phy_cfg rtd1319_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0x18},
-                  [4] = {0xe4, 0x6a},
-                  [7] = {0xe7, 0x71},
-                 [13] = {0xf5, 0x15},
-                 [15] = {0xf7, 0x32}, },
-       .page1_size = 8,
-       .page1 = { [3] = {0xe3, 0x44}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { [0] = {0xe0, 0x01}, },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = true,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct phy_cfg rtd1312c_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0x14},
-                  [4] = {0xe4, 0x67},
-                  [5] = {0xe5, 0x55}, },
-       .page1_size = 8,
-       .page1 = { [3] = {0xe3, 0x23},
-                  [6] = {0xe6, 0x58}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { /* default parameter */ },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = true,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct phy_cfg rtd1619b_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0xa3},
-                  [4] = {0xe4, 0x88},
-                  [5] = {0xe5, 0x4f},
-                  [6] = {0xe6, 0x02}, },
-       .page1_size = 8,
-       .page1 = { [3] = {0xe3, 0x64}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { [7] = {0xe7, 0x45}, },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE,
-       .dc_driving_mask = 0x1f,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = false,
-       .do_toggle = true,
-       .do_toggle_driving = true,
-       .driving_updated_for_dev_dis = 0x8,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct phy_cfg rtd1319d_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0xa3},
-                  [4] = {0xe4, 0x8e},
-                  [5] = {0xe5, 0x4f},
-                  [6] = {0xe6, 0x02}, },
-       .page1_size = MAX_USB_PHY_PAGE1_DATA_SIZE,
-       .page1 = { [14] = {0xf5, 0x1}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { [7] = {0xe7, 0x44}, },
-       .check_efuse = true,
-       .num_phy = 1,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE,
-       .dc_driving_mask = 0x1f,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = false,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0x8,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct phy_cfg rtd1315e_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0xa3},
-                  [4] = {0xe4, 0x8c},
-                  [5] = {0xe5, 0x4f},
-                  [6] = {0xe6, 0x02}, },
-       .page1_size = MAX_USB_PHY_PAGE1_DATA_SIZE,
-       .page1 = { [3] = {0xe3, 0x7f},
-                 [14] = {0xf5, 0x01}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { [7] = {0xe7, 0x44}, },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V2,
-       .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE,
-       .dc_driving_mask = 0x1f,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = false,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0x8,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct of_device_id usbphy_rtk_dt_match[] = {
-       { .compatible = "realtek,rtd1295-usb2phy", .data = &rtd1295_phy_cfg },
-       { .compatible = "realtek,rtd1312c-usb2phy", .data = &rtd1312c_phy_cfg },
-       { .compatible = "realtek,rtd1315e-usb2phy", .data = &rtd1315e_phy_cfg },
-       { .compatible = "realtek,rtd1319-usb2phy", .data = &rtd1319_phy_cfg },
-       { .compatible = "realtek,rtd1319d-usb2phy", .data = &rtd1319d_phy_cfg },
-       { .compatible = "realtek,rtd1395-usb2phy", .data = &rtd1395_phy_cfg },
-       { .compatible = "realtek,rtd1395-usb2phy-2port", .data = &rtd1395_phy_cfg_2port },
-       { .compatible = "realtek,rtd1619-usb2phy", .data = &rtd1619_phy_cfg },
-       { .compatible = "realtek,rtd1619b-usb2phy", .data = &rtd1619b_phy_cfg },
-       {},
-};
-MODULE_DEVICE_TABLE(of, usbphy_rtk_dt_match);
-
-static struct platform_driver rtk_usb2phy_driver = {
-       .probe          = rtk_usb2phy_probe,
-       .remove_new     = rtk_usb2phy_remove,
-       .driver         = {
-               .name   = "rtk-usb2phy",
-               .of_match_table = usbphy_rtk_dt_match,
-       },
-};
-
-module_platform_driver(rtk_usb2phy_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform: rtk-usb2phy");
-MODULE_AUTHOR("Stanley Chang <stanley_chang@realtek.com>");
-MODULE_DESCRIPTION("Realtek usb 2.0 phy driver");
diff --git a/drivers/phy/realtek/phy-rtk-usb3.c b/drivers/phy/realtek/phy-rtk-usb3.c
deleted file mode 100644 (file)
index 67446a8..0000000
+++ /dev/null
@@ -1,761 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  phy-rtk-usb3.c RTK usb3.0 phy driver
- *
- * copyright (c) 2023 realtek semiconductor corporation
- *
- */
-
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/platform_device.h>
-#include <linux/uaccess.h>
-#include <linux/debugfs.h>
-#include <linux/nvmem-consumer.h>
-#include <linux/regmap.h>
-#include <linux/sys_soc.h>
-#include <linux/mfd/syscon.h>
-#include <linux/phy/phy.h>
-#include <linux/usb.h>
-#include <linux/usb/hcd.h>
-#include <linux/usb/phy.h>
-
-#define USB_MDIO_CTRL_PHY_BUSY BIT(7)
-#define USB_MDIO_CTRL_PHY_WRITE BIT(0)
-#define USB_MDIO_CTRL_PHY_ADDR_SHIFT 8
-#define USB_MDIO_CTRL_PHY_DATA_SHIFT 16
-
-#define MAX_USB_PHY_DATA_SIZE 0x30
-#define PHY_ADDR_0X09 0x09
-#define PHY_ADDR_0X0B 0x0b
-#define PHY_ADDR_0X0D 0x0d
-#define PHY_ADDR_0X10 0x10
-#define PHY_ADDR_0X1F 0x1f
-#define PHY_ADDR_0X20 0x20
-#define PHY_ADDR_0X21 0x21
-#define PHY_ADDR_0X30 0x30
-
-#define REG_0X09_FORCE_CALIBRATION BIT(9)
-#define REG_0X0B_RX_OFFSET_RANGE_MASK 0xc
-#define REG_0X0D_RX_DEBUG_TEST_EN BIT(6)
-#define REG_0X10_DEBUG_MODE_SETTING 0x3c0
-#define REG_0X10_DEBUG_MODE_SETTING_MASK 0x3f8
-#define REG_0X1F_RX_OFFSET_CODE_MASK 0x1e
-
-#define USB_U3_TX_LFPS_SWING_TRIM_SHIFT 4
-#define USB_U3_TX_LFPS_SWING_TRIM_MASK 0xf
-#define AMPLITUDE_CONTROL_COARSE_MASK 0xff
-#define AMPLITUDE_CONTROL_FINE_MASK 0xffff
-#define AMPLITUDE_CONTROL_COARSE_DEFAULT 0xff
-#define AMPLITUDE_CONTROL_FINE_DEFAULT 0xffff
-
-#define PHY_ADDR_MAP_ARRAY_INDEX(addr) (addr)
-#define ARRAY_INDEX_MAP_PHY_ADDR(index) (index)
-
-struct phy_reg {
-       void __iomem *reg_mdio_ctl;
-};
-
-struct phy_data {
-       u8 addr;
-       u16 data;
-};
-
-struct phy_cfg {
-       int param_size;
-       struct phy_data param[MAX_USB_PHY_DATA_SIZE];
-
-       bool check_efuse;
-       bool do_toggle;
-       bool do_toggle_once;
-       bool use_default_parameter;
-       bool check_rx_front_end_offset;
-};
-
-struct phy_parameter {
-       struct phy_reg phy_reg;
-
-       /* Get from efuse */
-       u8 efuse_usb_u3_tx_lfps_swing_trim;
-
-       /* Get from dts */
-       u32 amplitude_control_coarse;
-       u32 amplitude_control_fine;
-};
-
-struct rtk_phy {
-       struct usb_phy phy;
-       struct device *dev;
-
-       struct phy_cfg *phy_cfg;
-       int num_phy;
-       struct phy_parameter *phy_parameter;
-
-       struct dentry *debug_dir;
-};
-
-#define PHY_IO_TIMEOUT_USEC            (50000)
-#define PHY_IO_DELAY_US                        (100)
-
-static inline int utmi_wait_register(void __iomem *reg, u32 mask, u32 result)
-{
-       int ret;
-       unsigned int val;
-
-       ret = read_poll_timeout(readl, val, ((val & mask) == result),
-                               PHY_IO_DELAY_US, PHY_IO_TIMEOUT_USEC, false, reg);
-       if (ret) {
-               pr_err("%s can't program USB phy\n", __func__);
-               return -ETIMEDOUT;
-       }
-
-       return 0;
-}
-
-static int rtk_phy3_wait_vbusy(struct phy_reg *phy_reg)
-{
-       return utmi_wait_register(phy_reg->reg_mdio_ctl, USB_MDIO_CTRL_PHY_BUSY, 0);
-}
-
-static u16 rtk_phy_read(struct phy_reg *phy_reg, char addr)
-{
-       unsigned int tmp;
-       u32 value;
-
-       tmp = (addr << USB_MDIO_CTRL_PHY_ADDR_SHIFT);
-
-       writel(tmp, phy_reg->reg_mdio_ctl);
-
-       rtk_phy3_wait_vbusy(phy_reg);
-
-       value = readl(phy_reg->reg_mdio_ctl);
-       value = value >> USB_MDIO_CTRL_PHY_DATA_SHIFT;
-
-       return (u16)value;
-}
-
-static int rtk_phy_write(struct phy_reg *phy_reg, char addr, u16 data)
-{
-       unsigned int val;
-
-       val = USB_MDIO_CTRL_PHY_WRITE |
-                   (addr << USB_MDIO_CTRL_PHY_ADDR_SHIFT) |
-                   (data << USB_MDIO_CTRL_PHY_DATA_SHIFT);
-
-       writel(val, phy_reg->reg_mdio_ctl);
-
-       rtk_phy3_wait_vbusy(phy_reg);
-
-       return 0;
-}
-
-static void do_rtk_usb3_phy_toggle(struct rtk_phy *rtk_phy, int index, bool connect)
-{
-       struct phy_cfg *phy_cfg = rtk_phy->phy_cfg;
-       struct phy_reg *phy_reg;
-       struct phy_parameter *phy_parameter;
-       struct phy_data *phy_data;
-       u8 addr;
-       u16 data;
-       int i;
-
-       phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (!phy_cfg->do_toggle)
-               return;
-
-       i = PHY_ADDR_MAP_ARRAY_INDEX(PHY_ADDR_0X09);
-       phy_data = phy_cfg->param + i;
-       addr = phy_data->addr;
-       data = phy_data->data;
-
-       if (!addr && !data) {
-               addr = PHY_ADDR_0X09;
-               data = rtk_phy_read(phy_reg, addr);
-               phy_data->addr = addr;
-               phy_data->data = data;
-       }
-
-       rtk_phy_write(phy_reg, addr, data & (~REG_0X09_FORCE_CALIBRATION));
-       mdelay(1);
-       rtk_phy_write(phy_reg, addr, data | REG_0X09_FORCE_CALIBRATION);
-}
-
-static int do_rtk_phy_init(struct rtk_phy *rtk_phy, int index)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-       struct phy_parameter *phy_parameter;
-       int i = 0;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (phy_cfg->use_default_parameter)
-               goto do_toggle;
-
-       for (i = 0; i < phy_cfg->param_size; i++) {
-               struct phy_data *phy_data = phy_cfg->param + i;
-               u8 addr = phy_data->addr;
-               u16 data = phy_data->data;
-
-               if (!addr && !data)
-                       continue;
-
-               rtk_phy_write(phy_reg, addr, data);
-       }
-
-do_toggle:
-       if (phy_cfg->do_toggle_once)
-               phy_cfg->do_toggle = true;
-
-       do_rtk_usb3_phy_toggle(rtk_phy, index, false);
-
-       if (phy_cfg->do_toggle_once) {
-               u16 check_value = 0;
-               int count = 10;
-               u16 value_0x0d, value_0x10;
-
-               /* Enable Debug mode by set 0x0D and 0x10 */
-               value_0x0d = rtk_phy_read(phy_reg, PHY_ADDR_0X0D);
-               value_0x10 = rtk_phy_read(phy_reg, PHY_ADDR_0X10);
-
-               rtk_phy_write(phy_reg, PHY_ADDR_0X0D,
-                             value_0x0d | REG_0X0D_RX_DEBUG_TEST_EN);
-               rtk_phy_write(phy_reg, PHY_ADDR_0X10,
-                             (value_0x10 & ~REG_0X10_DEBUG_MODE_SETTING_MASK) |
-                             REG_0X10_DEBUG_MODE_SETTING);
-
-               check_value = rtk_phy_read(phy_reg, PHY_ADDR_0X30);
-
-               while (!(check_value & BIT(15))) {
-                       check_value = rtk_phy_read(phy_reg, PHY_ADDR_0X30);
-                       mdelay(1);
-                       if (count-- < 0)
-                               break;
-               }
-
-               if (!(check_value & BIT(15)))
-                       dev_info(rtk_phy->dev, "toggle fail addr=0x%02x, data=0x%04x\n",
-                                PHY_ADDR_0X30, check_value);
-
-               /* Disable Debug mode by set 0x0D and 0x10 to default*/
-               rtk_phy_write(phy_reg, PHY_ADDR_0X0D, value_0x0d);
-               rtk_phy_write(phy_reg, PHY_ADDR_0X10, value_0x10);
-
-               phy_cfg->do_toggle = false;
-       }
-
-       if (phy_cfg->check_rx_front_end_offset) {
-               u16 rx_offset_code, rx_offset_range;
-               u16 code_mask = REG_0X1F_RX_OFFSET_CODE_MASK;
-               u16 range_mask = REG_0X0B_RX_OFFSET_RANGE_MASK;
-               bool do_update = false;
-
-               rx_offset_code = rtk_phy_read(phy_reg, PHY_ADDR_0X1F);
-               if (((rx_offset_code & code_mask) == 0x0) ||
-                   ((rx_offset_code & code_mask) == code_mask))
-                       do_update = true;
-
-               rx_offset_range = rtk_phy_read(phy_reg, PHY_ADDR_0X0B);
-               if (((rx_offset_range & range_mask) == range_mask) && do_update) {
-                       dev_warn(rtk_phy->dev, "Don't update rx_offset_range (rx_offset_code=0x%x, rx_offset_range=0x%x)\n",
-                                rx_offset_code, rx_offset_range);
-                       do_update = false;
-               }
-
-               if (do_update) {
-                       u16 tmp1, tmp2;
-
-                       tmp1 = rx_offset_range & (~range_mask);
-                       tmp2 = rx_offset_range & range_mask;
-                       tmp2 += (1 << 2);
-                       rx_offset_range = tmp1 | (tmp2 & range_mask);
-                       rtk_phy_write(phy_reg, PHY_ADDR_0X0B, rx_offset_range);
-                       goto do_toggle;
-               }
-       }
-
-       return 0;
-}
-
-static int rtk_phy_init(struct phy *phy)
-{
-       struct rtk_phy *rtk_phy = phy_get_drvdata(phy);
-       int ret = 0;
-       int i;
-       unsigned long phy_init_time = jiffies;
-
-       for (i = 0; i < rtk_phy->num_phy; i++)
-               ret = do_rtk_phy_init(rtk_phy, i);
-
-       dev_dbg(rtk_phy->dev, "Initialized RTK USB 3.0 PHY (take %dms)\n",
-               jiffies_to_msecs(jiffies - phy_init_time));
-
-       return ret;
-}
-
-static int rtk_phy_exit(struct phy *phy)
-{
-       return 0;
-}
-
-static const struct phy_ops ops = {
-       .init           = rtk_phy_init,
-       .exit           = rtk_phy_exit,
-       .owner          = THIS_MODULE,
-};
-
-static void rtk_phy_toggle(struct usb_phy *usb3_phy, bool connect, int port)
-{
-       int index = port;
-       struct rtk_phy *rtk_phy = NULL;
-
-       rtk_phy = dev_get_drvdata(usb3_phy->dev);
-
-       if (index > rtk_phy->num_phy) {
-               dev_err(rtk_phy->dev, "%s: The port=%d is not in usb phy (num_phy=%d)\n",
-                       __func__, index, rtk_phy->num_phy);
-               return;
-       }
-
-       do_rtk_usb3_phy_toggle(rtk_phy, index, connect);
-}
-
-static int rtk_phy_notify_port_status(struct usb_phy *x, int port,
-                                     u16 portstatus, u16 portchange)
-{
-       bool connect = false;
-
-       pr_debug("%s port=%d portstatus=0x%x portchange=0x%x\n",
-                __func__, port, (int)portstatus, (int)portchange);
-       if (portstatus & USB_PORT_STAT_CONNECTION)
-               connect = true;
-
-       if (portchange & USB_PORT_STAT_C_CONNECTION)
-               rtk_phy_toggle(x, connect, port);
-
-       return 0;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static struct dentry *create_phy_debug_root(void)
-{
-       struct dentry *phy_debug_root;
-
-       phy_debug_root = debugfs_lookup("phy", usb_debug_root);
-       if (!phy_debug_root)
-               phy_debug_root = debugfs_create_dir("phy", usb_debug_root);
-
-       return phy_debug_root;
-}
-
-static int rtk_usb3_parameter_show(struct seq_file *s, void *unused)
-{
-       struct rtk_phy *rtk_phy = s->private;
-       struct phy_cfg *phy_cfg;
-       int i, index;
-
-       phy_cfg = rtk_phy->phy_cfg;
-
-       seq_puts(s, "Property:\n");
-       seq_printf(s, "  check_efuse: %s\n",
-                  phy_cfg->check_efuse ? "Enable" : "Disable");
-       seq_printf(s, "  do_toggle: %s\n",
-                  phy_cfg->do_toggle ? "Enable" : "Disable");
-       seq_printf(s, "  do_toggle_once: %s\n",
-                  phy_cfg->do_toggle_once ? "Enable" : "Disable");
-       seq_printf(s, "  use_default_parameter: %s\n",
-                  phy_cfg->use_default_parameter ? "Enable" : "Disable");
-
-       for (index = 0; index < rtk_phy->num_phy; index++) {
-               struct phy_reg *phy_reg;
-               struct phy_parameter *phy_parameter;
-
-               phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-               phy_reg = &phy_parameter->phy_reg;
-
-               seq_printf(s, "PHY %d:\n", index);
-
-               for (i = 0; i < phy_cfg->param_size; i++) {
-                       struct phy_data *phy_data = phy_cfg->param + i;
-                       u8 addr = ARRAY_INDEX_MAP_PHY_ADDR(i);
-                       u16 data = phy_data->data;
-
-                       if (!phy_data->addr && !data)
-                               seq_printf(s, "  addr = 0x%02x, data = none   ==> read value = 0x%04x\n",
-                                          addr, rtk_phy_read(phy_reg, addr));
-                       else
-                               seq_printf(s, "  addr = 0x%02x, data = 0x%04x ==> read value = 0x%04x\n",
-                                          addr, data, rtk_phy_read(phy_reg, addr));
-               }
-
-               seq_puts(s, "PHY Property:\n");
-               seq_printf(s, "  efuse_usb_u3_tx_lfps_swing_trim: 0x%x\n",
-                          (int)phy_parameter->efuse_usb_u3_tx_lfps_swing_trim);
-               seq_printf(s, "  amplitude_control_coarse: 0x%x\n",
-                          (int)phy_parameter->amplitude_control_coarse);
-               seq_printf(s, "  amplitude_control_fine: 0x%x\n",
-                          (int)phy_parameter->amplitude_control_fine);
-       }
-
-       return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(rtk_usb3_parameter);
-
-static inline void create_debug_files(struct rtk_phy *rtk_phy)
-{
-       struct dentry *phy_debug_root = NULL;
-
-       phy_debug_root = create_phy_debug_root();
-
-       if (!phy_debug_root)
-               return;
-
-       rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root);
-
-       debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy,
-                           &rtk_usb3_parameter_fops);
-
-       return;
-}
-
-static inline void remove_debug_files(struct rtk_phy *rtk_phy)
-{
-       debugfs_remove_recursive(rtk_phy->debug_dir);
-}
-#else
-static inline void create_debug_files(struct rtk_phy *rtk_phy) { }
-static inline void remove_debug_files(struct rtk_phy *rtk_phy) { }
-#endif /* CONFIG_DEBUG_FS */
-
-static int get_phy_data_by_efuse(struct rtk_phy *rtk_phy,
-                                struct phy_parameter *phy_parameter, int index)
-{
-       struct phy_cfg *phy_cfg = rtk_phy->phy_cfg;
-       u8 value = 0;
-       struct nvmem_cell *cell;
-
-       if (!phy_cfg->check_efuse)
-               goto out;
-
-       cell = nvmem_cell_get(rtk_phy->dev, "usb_u3_tx_lfps_swing_trim");
-       if (IS_ERR(cell)) {
-               dev_dbg(rtk_phy->dev, "%s no usb_u3_tx_lfps_swing_trim: %ld\n",
-                       __func__, PTR_ERR(cell));
-       } else {
-               unsigned char *buf;
-               size_t buf_size;
-
-               buf = nvmem_cell_read(cell, &buf_size);
-               if (!IS_ERR(buf)) {
-                       value = buf[0] & USB_U3_TX_LFPS_SWING_TRIM_MASK;
-                       kfree(buf);
-               }
-               nvmem_cell_put(cell);
-       }
-
-       if (value > 0 && value < 0x8)
-               phy_parameter->efuse_usb_u3_tx_lfps_swing_trim = 0x8;
-       else
-               phy_parameter->efuse_usb_u3_tx_lfps_swing_trim = (u8)value;
-
-out:
-       return 0;
-}
-
-static void update_amplitude_control_value(struct rtk_phy *rtk_phy,
-                                          struct phy_parameter *phy_parameter)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-
-       phy_reg = &phy_parameter->phy_reg;
-       phy_cfg = rtk_phy->phy_cfg;
-
-       if (phy_parameter->amplitude_control_coarse != AMPLITUDE_CONTROL_COARSE_DEFAULT) {
-               u16 val_mask = AMPLITUDE_CONTROL_COARSE_MASK;
-               u16 data;
-
-               if (!phy_cfg->param[PHY_ADDR_0X20].addr && !phy_cfg->param[PHY_ADDR_0X20].data) {
-                       phy_cfg->param[PHY_ADDR_0X20].addr = PHY_ADDR_0X20;
-                       data = rtk_phy_read(phy_reg, PHY_ADDR_0X20);
-               } else {
-                       data = phy_cfg->param[PHY_ADDR_0X20].data;
-               }
-
-               data &= (~val_mask);
-               data |= (phy_parameter->amplitude_control_coarse & val_mask);
-
-               phy_cfg->param[PHY_ADDR_0X20].data = data;
-       }
-
-       if (phy_parameter->efuse_usb_u3_tx_lfps_swing_trim) {
-               u8 efuse_val = phy_parameter->efuse_usb_u3_tx_lfps_swing_trim;
-               u16 val_mask = USB_U3_TX_LFPS_SWING_TRIM_MASK;
-               int val_shift = USB_U3_TX_LFPS_SWING_TRIM_SHIFT;
-               u16 data;
-
-               if (!phy_cfg->param[PHY_ADDR_0X20].addr && !phy_cfg->param[PHY_ADDR_0X20].data) {
-                       phy_cfg->param[PHY_ADDR_0X20].addr = PHY_ADDR_0X20;
-                       data = rtk_phy_read(phy_reg, PHY_ADDR_0X20);
-               } else {
-                       data = phy_cfg->param[PHY_ADDR_0X20].data;
-               }
-
-               data &= ~(val_mask << val_shift);
-               data |= ((efuse_val & val_mask) << val_shift);
-
-               phy_cfg->param[PHY_ADDR_0X20].data = data;
-       }
-
-       if (phy_parameter->amplitude_control_fine != AMPLITUDE_CONTROL_FINE_DEFAULT) {
-               u16 val_mask = AMPLITUDE_CONTROL_FINE_MASK;
-
-               if (!phy_cfg->param[PHY_ADDR_0X21].addr && !phy_cfg->param[PHY_ADDR_0X21].data)
-                       phy_cfg->param[PHY_ADDR_0X21].addr = PHY_ADDR_0X21;
-
-               phy_cfg->param[PHY_ADDR_0X21].data =
-                           phy_parameter->amplitude_control_fine & val_mask;
-       }
-}
-
-static int parse_phy_data(struct rtk_phy *rtk_phy)
-{
-       struct device *dev = rtk_phy->dev;
-       struct phy_parameter *phy_parameter;
-       int ret = 0;
-       int index;
-
-       rtk_phy->phy_parameter = devm_kzalloc(dev, sizeof(struct phy_parameter) *
-                                             rtk_phy->num_phy, GFP_KERNEL);
-       if (!rtk_phy->phy_parameter)
-               return -ENOMEM;
-
-       for (index = 0; index < rtk_phy->num_phy; index++) {
-               phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-
-               phy_parameter->phy_reg.reg_mdio_ctl = of_iomap(dev->of_node, 0) + index;
-
-               /* Amplitude control address 0x20 bit 0 to bit 7 */
-               if (of_property_read_u32(dev->of_node, "realtek,amplitude-control-coarse-tuning",
-                                        &phy_parameter->amplitude_control_coarse))
-                       phy_parameter->amplitude_control_coarse = AMPLITUDE_CONTROL_COARSE_DEFAULT;
-
-               /* Amplitude control address 0x21 bit 0 to bit 16 */
-               if (of_property_read_u32(dev->of_node, "realtek,amplitude-control-fine-tuning",
-                                        &phy_parameter->amplitude_control_fine))
-                       phy_parameter->amplitude_control_fine = AMPLITUDE_CONTROL_FINE_DEFAULT;
-
-               get_phy_data_by_efuse(rtk_phy, phy_parameter, index);
-
-               update_amplitude_control_value(rtk_phy, phy_parameter);
-       }
-
-       return ret;
-}
-
-static int rtk_usb3phy_probe(struct platform_device *pdev)
-{
-       struct rtk_phy *rtk_phy;
-       struct device *dev = &pdev->dev;
-       struct phy *generic_phy;
-       struct phy_provider *phy_provider;
-       const struct phy_cfg *phy_cfg;
-       int ret;
-
-       phy_cfg = of_device_get_match_data(dev);
-       if (!phy_cfg) {
-               dev_err(dev, "phy config are not assigned!\n");
-               return -EINVAL;
-       }
-
-       rtk_phy = devm_kzalloc(dev, sizeof(*rtk_phy), GFP_KERNEL);
-       if (!rtk_phy)
-               return -ENOMEM;
-
-       rtk_phy->dev                    = &pdev->dev;
-       rtk_phy->phy.dev                = rtk_phy->dev;
-       rtk_phy->phy.label              = "rtk-usb3phy";
-       rtk_phy->phy.notify_port_status = rtk_phy_notify_port_status;
-
-       rtk_phy->phy_cfg = devm_kzalloc(dev, sizeof(*phy_cfg), GFP_KERNEL);
-
-       memcpy(rtk_phy->phy_cfg, phy_cfg, sizeof(*phy_cfg));
-
-       rtk_phy->num_phy = 1;
-
-       ret = parse_phy_data(rtk_phy);
-       if (ret)
-               goto err;
-
-       platform_set_drvdata(pdev, rtk_phy);
-
-       generic_phy = devm_phy_create(rtk_phy->dev, NULL, &ops);
-       if (IS_ERR(generic_phy))
-               return PTR_ERR(generic_phy);
-
-       phy_set_drvdata(generic_phy, rtk_phy);
-
-       phy_provider = devm_of_phy_provider_register(rtk_phy->dev, of_phy_simple_xlate);
-       if (IS_ERR(phy_provider))
-               return PTR_ERR(phy_provider);
-
-       ret = usb_add_phy_dev(&rtk_phy->phy);
-       if (ret)
-               goto err;
-
-       create_debug_files(rtk_phy);
-
-err:
-       return ret;
-}
-
-static void rtk_usb3phy_remove(struct platform_device *pdev)
-{
-       struct rtk_phy *rtk_phy = platform_get_drvdata(pdev);
-
-       remove_debug_files(rtk_phy);
-
-       usb_remove_phy(&rtk_phy->phy);
-}
-
-static const struct phy_cfg rtd1295_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [0] = {0x01, 0x4008},  [1] = {0x01, 0xe046},
-                   [2] = {0x02, 0x6046},  [3] = {0x03, 0x2779},
-                   [4] = {0x04, 0x72f5},  [5] = {0x05, 0x2ad3},
-                   [6] = {0x06, 0x000e},  [7] = {0x07, 0x2e00},
-                   [8] = {0x08, 0x3591},  [9] = {0x09, 0x525c},
-                  [10] = {0x0a, 0xa600}, [11] = {0x0b, 0xa904},
-                  [12] = {0x0c, 0xc000}, [13] = {0x0d, 0xef1c},
-                  [14] = {0x0e, 0x2000}, [15] = {0x0f, 0x0000},
-                  [16] = {0x10, 0x000c}, [17] = {0x11, 0x4c00},
-                  [18] = {0x12, 0xfc00}, [19] = {0x13, 0x0c81},
-                  [20] = {0x14, 0xde01}, [21] = {0x15, 0x0000},
-                  [22] = {0x16, 0x0000}, [23] = {0x17, 0x0000},
-                  [24] = {0x18, 0x0000}, [25] = {0x19, 0x4004},
-                  [26] = {0x1a, 0x1260}, [27] = {0x1b, 0xff00},
-                  [28] = {0x1c, 0xcb00}, [29] = {0x1d, 0xa03f},
-                  [30] = {0x1e, 0xc2e0}, [31] = {0x1f, 0x2807},
-                  [32] = {0x20, 0x947a}, [33] = {0x21, 0x88aa},
-                  [34] = {0x22, 0x0057}, [35] = {0x23, 0xab66},
-                  [36] = {0x24, 0x0800}, [37] = {0x25, 0x0000},
-                  [38] = {0x26, 0x040a}, [39] = {0x27, 0x01d6},
-                  [40] = {0x28, 0xf8c2}, [41] = {0x29, 0x3080},
-                  [42] = {0x2a, 0x3082}, [43] = {0x2b, 0x2078},
-                  [44] = {0x2c, 0xffff}, [45] = {0x2d, 0xffff},
-                  [46] = {0x2e, 0x0000}, [47] = {0x2f, 0x0040}, },
-       .check_efuse = false,
-       .do_toggle = true,
-       .do_toggle_once = false,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = false,
-};
-
-static const struct phy_cfg rtd1619_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [8] = {0x08, 0x3591},
-                  [38] = {0x26, 0x840b},
-                  [40] = {0x28, 0xf842}, },
-       .check_efuse = false,
-       .do_toggle = true,
-       .do_toggle_once = false,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = false,
-};
-
-static const struct phy_cfg rtd1319_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [1] = {0x01, 0xac86},
-                   [6] = {0x06, 0x0003},
-                   [9] = {0x09, 0x924c},
-                  [10] = {0x0a, 0xa608},
-                  [11] = {0x0b, 0xb905},
-                  [14] = {0x0e, 0x2010},
-                  [32] = {0x20, 0x705a},
-                  [33] = {0x21, 0xf645},
-                  [34] = {0x22, 0x0013},
-                  [35] = {0x23, 0xcb66},
-                  [41] = {0x29, 0xff00}, },
-       .check_efuse = true,
-       .do_toggle = true,
-       .do_toggle_once = false,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = false,
-};
-
-static const struct phy_cfg rtd1619b_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [1] = {0x01, 0xac8c},
-                   [6] = {0x06, 0x0017},
-                   [9] = {0x09, 0x724c},
-                  [10] = {0x0a, 0xb610},
-                  [11] = {0x0b, 0xb90d},
-                  [13] = {0x0d, 0xef2a},
-                  [15] = {0x0f, 0x9050},
-                  [16] = {0x10, 0x000c},
-                  [32] = {0x20, 0x70ff},
-                  [34] = {0x22, 0x0013},
-                  [35] = {0x23, 0xdb66},
-                  [38] = {0x26, 0x8609},
-                  [41] = {0x29, 0xff13},
-                  [42] = {0x2a, 0x3070}, },
-       .check_efuse = true,
-       .do_toggle = false,
-       .do_toggle_once = true,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = false,
-};
-
-static const  struct phy_cfg rtd1319d_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [1] = {0x01, 0xac89},
-                   [4] = {0x04, 0xf2f5},
-                   [6] = {0x06, 0x0017},
-                   [9] = {0x09, 0x424c},
-                  [10] = {0x0a, 0x9610},
-                  [11] = {0x0b, 0x9901},
-                  [12] = {0x0c, 0xf000},
-                  [13] = {0x0d, 0xef2a},
-                  [14] = {0x0e, 0x1000},
-                  [15] = {0x0f, 0x9050},
-                  [32] = {0x20, 0x7077},
-                  [35] = {0x23, 0x0b62},
-                  [37] = {0x25, 0x10ec},
-                  [42] = {0x2a, 0x3070}, },
-       .check_efuse = true,
-       .do_toggle = false,
-       .do_toggle_once = true,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = true,
-};
-
-static const struct of_device_id usbphy_rtk_dt_match[] = {
-       { .compatible = "realtek,rtd1295-usb3phy", .data = &rtd1295_phy_cfg },
-       { .compatible = "realtek,rtd1319-usb3phy", .data = &rtd1319_phy_cfg },
-       { .compatible = "realtek,rtd1319d-usb3phy", .data = &rtd1319d_phy_cfg },
-       { .compatible = "realtek,rtd1619-usb3phy", .data = &rtd1619_phy_cfg },
-       { .compatible = "realtek,rtd1619b-usb3phy", .data = &rtd1619b_phy_cfg },
-       {},
-};
-MODULE_DEVICE_TABLE(of, usbphy_rtk_dt_match);
-
-static struct platform_driver rtk_usb3phy_driver = {
-       .probe          = rtk_usb3phy_probe,
-       .remove_new     = rtk_usb3phy_remove,
-       .driver         = {
-               .name   = "rtk-usb3phy",
-               .of_match_table = usbphy_rtk_dt_match,
-       },
-};
-
-module_platform_driver(rtk_usb3phy_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform: rtk-usb3phy");
-MODULE_AUTHOR("Stanley Chang <stanley_chang@realtek.com>");
-MODULE_DESCRIPTION("Realtek usb 3.0 phy driver");
index d6318cb57aff2ab336e21b9209db9336225119d0..e7e827a8877a0e83f6d05ff4327dcbc0da0c2132 100644 (file)
@@ -12,7 +12,8 @@ config PINCTRL_CS42L43
 
 config PINCTRL_LOCHNAGAR
        tristate "Cirrus Logic Lochnagar pinctrl driver"
-       depends on MFD_LOCHNAGAR
+       # Avoid clash caused by MIPS defining RST, which is used in the driver
+       depends on MFD_LOCHNAGAR && !MIPS
        select GPIOLIB
        select PINMUX
        select PINCONF
index 1fa89be29b8f98b22895aed0c65f1fa68df9d7bb..f2977eb65522e26192d456752f3e3e2b7010b586 100644 (file)
@@ -1262,17 +1262,17 @@ static void pinctrl_link_add(struct pinctrl_dev *pctldev,
 static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
 {
        struct pinctrl_setting *setting, *setting2;
-       struct pinctrl_state *old_state = p->state;
+       struct pinctrl_state *old_state = READ_ONCE(p->state);
        int ret;
 
-       if (p->state) {
+       if (old_state) {
                /*
                 * For each pinmux setting in the old state, forget SW's record
                 * of mux owner for that pingroup. Any pingroups which are
                 * still owned by the new state will be re-acquired by the call
                 * to pinmux_enable_setting() in the loop below.
                 */
-               list_for_each_entry(setting, &p->state->settings, node) {
+               list_for_each_entry(setting, &old_state->settings, node) {
                        if (setting->type != PIN_MAP_TYPE_MUX_GROUP)
                                continue;
                        pinmux_disable_setting(setting);
index 7daff9f186cd8647ff2255554d986ce206cd3e06..f0cad2c501f766d0f2986da7d7eb0ae87b180380 100644 (file)
@@ -843,8 +843,8 @@ static int s32_pinctrl_probe_dt(struct platform_device *pdev,
        if (!np)
                return -ENODEV;
 
-       if (mem_regions == 0) {
-               dev_err(&pdev->dev, "mem_regions is 0\n");
+       if (mem_regions == 0 || mem_regions >= 10000) {
+               dev_err(&pdev->dev, "mem_regions is invalid: %u\n", mem_regions);
                return -EINVAL;
        }
 
index 04285c930e945ee6701ccfdf152b825d91214137..4ccfa99ed93a100dab52278c1021657b03d28134 100644 (file)
@@ -143,6 +143,7 @@ static const struct dmi_system_id cy8c95x0_dmi_acpi_irq_info[] = {
  * @pinctrl_desc:   pin controller description
  * @name:           Chip controller name
  * @tpin:           Total number of pins
+ * @gpio_reset:     GPIO line handler that can reset the IC
  */
 struct cy8c95x0_pinctrl {
        struct regmap *regmap;
index 9c7a1af4ba692a965eef68a70f3dcd67e830da74..208896593b61ecdf584a13debc800686a6056328 100644 (file)
@@ -146,7 +146,7 @@ static int rtd_pinctrl_get_function_groups(struct pinctrl_dev *pcdev,
 
 static const struct rtd_pin_desc *rtd_pinctrl_find_mux(struct rtd_pinctrl *data, unsigned int pin)
 {
-       if (!data->info->muxes[pin].name)
+       if (data->info->muxes[pin].name)
                return &data->info->muxes[pin];
 
        return NULL;
@@ -249,7 +249,7 @@ static const struct pinctrl_pin_desc
 static const struct rtd_pin_config_desc
        *rtd_pinctrl_find_config(struct rtd_pinctrl *data, unsigned int pin)
 {
-       if (!data->info->configs[pin].name)
+       if (data->info->configs[pin].name)
                return &data->info->configs[pin];
 
        return NULL;
index 64e8201c7eacd347ffa85c5139b937e0725016ec..603f900e88c18a50ea01c33b8e52ef8154327880 100644 (file)
@@ -1273,9 +1273,11 @@ static struct stm32_desc_pin *stm32_pctrl_get_desc_pin_from_gpio(struct stm32_pi
        int i;
 
        /* With few exceptions (e.g. bank 'Z'), pin number matches with pin index in array */
-       pin_desc = pctl->pins + stm32_pin_nb;
-       if (pin_desc->pin.number == stm32_pin_nb)
-               return pin_desc;
+       if (stm32_pin_nb < pctl->npins) {
+               pin_desc = pctl->pins + stm32_pin_nb;
+               if (pin_desc->pin.number == stm32_pin_nb)
+                       return pin_desc;
+       }
 
        /* Otherwise, loop all array to find the pin with the right number */
        for (i = 0; i < pctl->npins; i++) {
@@ -1368,6 +1370,11 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct fwnode
        }
 
        names = devm_kcalloc(dev, npins, sizeof(char *), GFP_KERNEL);
+       if (!names) {
+               err = -ENOMEM;
+               goto err_clk;
+       }
+
        for (i = 0; i < npins; i++) {
                stm32_pin = stm32_pctrl_get_desc_pin_from_gpio(pctl, bank, i);
                if (stm32_pin && stm32_pin->pin.name)
index cd6ac04c14680da86a2cf3b2f7b3b64830685a63..c3104714b48027956e7004899961e770aaa0eccf 100644 (file)
@@ -964,33 +964,6 @@ static const struct pci_device_id pmc_pci_ids[] = {
        { }
 };
 
-static int amd_pmc_get_dram_size(struct amd_pmc_dev *dev)
-{
-       int ret;
-
-       switch (dev->cpu_id) {
-       case AMD_CPU_ID_YC:
-               if (!(dev->major > 90 || (dev->major == 90 && dev->minor > 39))) {
-                       ret = -EINVAL;
-                       goto err_dram_size;
-               }
-               break;
-       default:
-               ret = -EINVAL;
-               goto err_dram_size;
-       }
-
-       ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true);
-       if (ret || !dev->dram_size)
-               goto err_dram_size;
-
-       return 0;
-
-err_dram_size:
-       dev_err(dev->dev, "DRAM size command not supported for this platform\n");
-       return ret;
-}
-
 static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
 {
        u32 phys_addr_low, phys_addr_hi;
@@ -1009,8 +982,8 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
                return -EIO;
 
        /* Get DRAM size */
-       ret = amd_pmc_get_dram_size(dev);
-       if (ret)
+       ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true);
+       if (ret || !dev->dram_size)
                dev->dram_size = S2D_TELEMETRY_DRAMBYTES_MAX;
 
        /* Get STB DRAM address */
index 5798b49ddaba901db74a072b123b7cfbb1e2ef0b..8c9f4f3227fc6dcf244210edd932301f491efb15 100644 (file)
@@ -588,17 +588,14 @@ static void release_attributes_data(void)
 static int hp_add_other_attributes(int attr_type)
 {
        struct kobject *attr_name_kobj;
-       union acpi_object *obj = NULL;
        int ret;
        char *attr_name;
 
-       mutex_lock(&bioscfg_drv.mutex);
-
        attr_name_kobj = kzalloc(sizeof(*attr_name_kobj), GFP_KERNEL);
-       if (!attr_name_kobj) {
-               ret = -ENOMEM;
-               goto err_other_attr_init;
-       }
+       if (!attr_name_kobj)
+               return -ENOMEM;
+
+       mutex_lock(&bioscfg_drv.mutex);
 
        /* Check if attribute type is supported */
        switch (attr_type) {
@@ -615,14 +612,14 @@ static int hp_add_other_attributes(int attr_type)
        default:
                pr_err("Error: Unknown attr_type: %d\n", attr_type);
                ret = -EINVAL;
-               goto err_other_attr_init;
+               kfree(attr_name_kobj);
+               goto unlock_drv_mutex;
        }
 
        ret = kobject_init_and_add(attr_name_kobj, &attr_name_ktype,
                                   NULL, "%s", attr_name);
        if (ret) {
                pr_err("Error encountered [%d]\n", ret);
-               kobject_put(attr_name_kobj);
                goto err_other_attr_init;
        }
 
@@ -630,27 +627,26 @@ static int hp_add_other_attributes(int attr_type)
        switch (attr_type) {
        case HPWMI_SECURE_PLATFORM_TYPE:
                ret = hp_populate_secure_platform_data(attr_name_kobj);
-               if (ret)
-                       goto err_other_attr_init;
                break;
 
        case HPWMI_SURE_START_TYPE:
                ret = hp_populate_sure_start_data(attr_name_kobj);
-               if (ret)
-                       goto err_other_attr_init;
                break;
 
        default:
                ret = -EINVAL;
-               goto err_other_attr_init;
        }
 
+       if (ret)
+               goto err_other_attr_init;
+
        mutex_unlock(&bioscfg_drv.mutex);
        return 0;
 
 err_other_attr_init:
+       kobject_put(attr_name_kobj);
+unlock_drv_mutex:
        mutex_unlock(&bioscfg_drv.mutex);
-       kfree(obj);
        return ret;
 }
 
index ac037540acfc602d45706c3a39fd87ce1e80620f..88eefccb6ed276a60a3c6d02b03f4716a48aea8f 100644 (file)
@@ -1425,18 +1425,17 @@ static int ideapad_kbd_bl_init(struct ideapad_private *priv)
        if (WARN_ON(priv->kbd_bl.initialized))
                return -EEXIST;
 
-       brightness = ideapad_kbd_bl_brightness_get(priv);
-       if (brightness < 0)
-               return brightness;
-
-       priv->kbd_bl.last_brightness = brightness;
-
        if (ideapad_kbd_bl_check_tristate(priv->kbd_bl.type)) {
                priv->kbd_bl.led.max_brightness = 2;
        } else {
                priv->kbd_bl.led.max_brightness = 1;
        }
 
+       brightness = ideapad_kbd_bl_brightness_get(priv);
+       if (brightness < 0)
+               return brightness;
+
+       priv->kbd_bl.last_brightness = brightness;
        priv->kbd_bl.led.name                    = "platform::" LED_FUNCTION_KBD_BACKLIGHT;
        priv->kbd_bl.led.brightness_get          = ideapad_kbd_bl_led_cdev_brightness_get;
        priv->kbd_bl.led.brightness_set_blocking = ideapad_kbd_bl_led_cdev_brightness_set;
index fdf55b5d69480e4a12765a15b42bb8ac6dee287b..e4be40f73eebfc3db68defdc11647e01ac5b604b 100644 (file)
@@ -102,7 +102,7 @@ static const struct telemetry_core_ops telm_defpltops = {
 /**
  * telemetry_update_events() - Update telemetry Configuration
  * @pss_evtconfig: PSS related config. No change if num_evts = 0.
- * @pss_evtconfig: IOSS related config. No change if num_evts = 0.
+ * @ioss_evtconfig: IOSS related config. No change if num_evts = 0.
  *
  * This API updates the IOSS & PSS Telemetry configuration. Old config
  * is overwritten. Call telemetry_reset_events when logging is over
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(telemetry_reset_events);
 /**
  * telemetry_get_eventconfig() - Returns the pss and ioss events enabled
  * @pss_evtconfig: Pointer to PSS related configuration.
- * @pss_evtconfig: Pointer to IOSS related configuration.
+ * @ioss_evtconfig: Pointer to IOSS related configuration.
  * @pss_len:      Number of u32 elements allocated for pss_evtconfig array
  * @ioss_len:     Number of u32 elements allocated for ioss_evtconfig array
  *
index bc3f78abb6dac85720020bdef687a97a786b9f97..709bbc448fad431d894479146982664002578584 100644 (file)
@@ -35,7 +35,7 @@ scmi_pd_set_perf_state(struct generic_pm_domain *genpd, unsigned int state)
        if (!state)
                return -EINVAL;
 
-       ret = pd->perf_ops->level_set(pd->ph, pd->domain_id, state, true);
+       ret = pd->perf_ops->level_set(pd->ph, pd->domain_id, state, false);
        if (ret)
                dev_warn(&genpd->dev, "Failed with %d when trying to set %d perf level",
                         ret, state);
index 3f7a7478880240a2d256caf624b61dcc8e7054af..7513018c9f9ac72d5c1b0055b55ae9ff36e710b0 100644 (file)
@@ -572,7 +572,8 @@ ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags,
 
        for (i = 0; i < cnt; i++) {
                event[i] = queue->buf[queue->head];
-               queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+               /* Paired with READ_ONCE() in queue_cnt() */
+               WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
        }
 
        spin_unlock_irqrestore(&queue->lock, flags);
index 3134568af622d396f6ab15049cd1a3ace3243269..15b804ba48685ee11a34b88df1ae738a136d17a1 100644 (file)
@@ -57,10 +57,11 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
        dst->t.sec = seconds;
        dst->t.nsec = remainder;
 
+       /* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */
        if (!queue_free(queue))
-               queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+               WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
 
-       queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
+       WRITE_ONCE(queue->tail, (queue->tail + 1) % PTP_MAX_TIMESTAMPS);
 
        spin_unlock_irqrestore(&queue->lock, flags);
 }
index 35fde0a0574606a04d6bdf0ab42a204da5fa6532..45f9002a5dcaea2c588c001fa83317fc318500ee 100644 (file)
@@ -85,9 +85,13 @@ struct ptp_vclock {
  * that a writer might concurrently increment the tail does not
  * matter, since the queue remains nonempty nonetheless.
  */
-static inline int queue_cnt(struct timestamp_event_queue *q)
+static inline int queue_cnt(const struct timestamp_event_queue *q)
 {
-       int cnt = q->tail - q->head;
+       /*
+        * Paired with WRITE_ONCE() in enqueue_external_timestamp(),
+        * ptp_read(), extts_fifo_show().
+        */
+       int cnt = READ_ONCE(q->tail) - READ_ONCE(q->head);
        return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
 }
 
index 7d023d9d0acbfb3d128be09578753588fa59e84d..f7a499a1bd39ec22edf6c77407a48736e137f277 100644 (file)
@@ -94,7 +94,8 @@ static ssize_t extts_fifo_show(struct device *dev,
        qcnt = queue_cnt(queue);
        if (qcnt) {
                event = queue->buf[queue->head];
-               queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+               /* Paired with READ_ONCE() in queue_cnt() */
+               WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
        }
        spin_unlock_irqrestore(&queue->lock, flags);
 
index 4902d45e929ce2ad009cb3cc9fc00fac2f45483c..c61e6427384c34b44473af0b6965232c254e1710 100644 (file)
@@ -103,10 +103,11 @@ config CCWGROUP
 config ISM
        tristate "Support for ISM vPCI Adapter"
        depends on PCI
+       imply SMC
        default n
        help
          Select this option if you want to use the Internal Shared Memory
-         vPCI Adapter.
+         vPCI Adapter. The adapter can be used with the SMC network protocol.
 
          To compile as a module choose M. The module name is ism.
          If unsure, choose N.
index 6df7f377d2f90cabb7aeb76d7f6fa680f1aa5104..81aabbfbbe2ca4240533bc3ca3485ffb3af3bd20 100644 (file)
@@ -30,7 +30,6 @@ static const struct pci_device_id ism_device_table[] = {
 MODULE_DEVICE_TABLE(pci, ism_device_table);
 
 static debug_info_t *ism_debug_info;
-static const struct smcd_ops ism_ops;
 
 #define NO_CLIENT              0xff            /* must be >= MAX_CLIENTS */
 static struct ism_client *clients[MAX_CLIENTS];        /* use an array rather than */
@@ -289,22 +288,6 @@ out:
        return ret;
 }
 
-static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid,
-                         u32 vid)
-{
-       union ism_query_rgid cmd;
-
-       memset(&cmd, 0, sizeof(cmd));
-       cmd.request.hdr.cmd = ISM_QUERY_RGID;
-       cmd.request.hdr.len = sizeof(cmd.request);
-
-       cmd.request.rgid = rgid;
-       cmd.request.vlan_valid = vid_valid;
-       cmd.request.vlan_id = vid;
-
-       return ism_cmd(ism, &cmd);
-}
-
 static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 {
        clear_bit(dmb->sba_idx, ism->sba_bitmap);
@@ -429,23 +412,6 @@ static int ism_del_vlan_id(struct ism_dev *ism, u64 vlan_id)
        return ism_cmd(ism, &cmd);
 }
 
-static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq,
-                         u32 event_code, u64 info)
-{
-       union ism_sig_ieq cmd;
-
-       memset(&cmd, 0, sizeof(cmd));
-       cmd.request.hdr.cmd = ISM_SIGNAL_IEQ;
-       cmd.request.hdr.len = sizeof(cmd.request);
-
-       cmd.request.rgid = rgid;
-       cmd.request.trigger_irq = trigger_irq;
-       cmd.request.event_code = event_code;
-       cmd.request.info = info;
-
-       return ism_cmd(ism, &cmd);
-}
-
 static unsigned int max_bytes(unsigned int start, unsigned int len,
                              unsigned int boundary)
 {
@@ -503,14 +469,6 @@ u8 *ism_get_seid(void)
 }
 EXPORT_SYMBOL_GPL(ism_get_seid);
 
-static u16 ism_get_chid(struct ism_dev *ism)
-{
-       if (!ism || !ism->pdev)
-               return 0;
-
-       return to_zpci(ism->pdev)->pchid;
-}
-
 static void ism_handle_event(struct ism_dev *ism)
 {
        struct ism_event *entry;
@@ -569,11 +527,6 @@ static irqreturn_t ism_handle_irq(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-static u64 ism_get_local_gid(struct ism_dev *ism)
-{
-       return ism->local_gid;
-}
-
 static int ism_dev_init(struct ism_dev *ism)
 {
        struct pci_dev *pdev = ism->pdev;
@@ -774,6 +727,22 @@ module_exit(ism_exit);
 /*************************** SMC-D Implementation *****************************/
 
 #if IS_ENABLED(CONFIG_SMC)
+static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid,
+                         u32 vid)
+{
+       union ism_query_rgid cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_QUERY_RGID;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.rgid = rgid;
+       cmd.request.vlan_valid = vid_valid;
+       cmd.request.vlan_id = vid;
+
+       return ism_cmd(ism, &cmd);
+}
+
 static int smcd_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid,
                           u32 vid)
 {
@@ -811,6 +780,23 @@ static int smcd_reset_vlan_required(struct smcd_dev *smcd)
        return ism_cmd_simple(smcd->priv, ISM_RESET_VLAN);
 }
 
+static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq,
+                         u32 event_code, u64 info)
+{
+       union ism_sig_ieq cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_SIGNAL_IEQ;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.rgid = rgid;
+       cmd.request.trigger_irq = trigger_irq;
+       cmd.request.event_code = event_code;
+       cmd.request.info = info;
+
+       return ism_cmd(ism, &cmd);
+}
+
 static int smcd_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq,
                           u32 event_code, u64 info)
 {
@@ -830,11 +816,24 @@ static int smcd_supports_v2(void)
                SYSTEM_EID.type[0] != '0';
 }
 
+static u64 ism_get_local_gid(struct ism_dev *ism)
+{
+       return ism->local_gid;
+}
+
 static u64 smcd_get_local_gid(struct smcd_dev *smcd)
 {
        return ism_get_local_gid(smcd->priv);
 }
 
+static u16 ism_get_chid(struct ism_dev *ism)
+{
+       if (!ism || !ism->pdev)
+               return 0;
+
+       return to_zpci(ism->pdev)->pchid;
+}
+
 static u16 smcd_get_chid(struct smcd_dev *smcd)
 {
        return ism_get_chid(smcd->priv);
index 32d1e73e46eecdcfdb4b6d30eef826202e5a0c3a..03348f605c2e9a5289082fbd2c1694c90229cf16 100644 (file)
@@ -1837,8 +1837,16 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
                }
 
                spin_lock_irqsave(qp->qp_lock_ptr, *flags);
-               if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
-                       sp->done(sp, res);
+               switch (sp->type) {
+               case SRB_SCSI_CMD:
+                       if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
+                               sp->done(sp, res);
+                       break;
+               default:
+                       if (ret_cmd)
+                               sp->done(sp, res);
+                       break;
+               }
        } else {
                sp->done(sp, res);
        }
index 67922e2c4c1915cb74dd62b7a7704d371efdb1d9..6d8218a4412264952226a3a2a628e5a6e7d8d2bc 100644 (file)
@@ -1019,7 +1019,7 @@ static ssize_t sdebug_error_write(struct file *file, const char __user *ubuf,
        struct sdebug_err_inject *inject;
        struct scsi_device *sdev = (struct scsi_device *)file->f_inode->i_private;
 
-       buf = kmalloc(count, GFP_KERNEL);
+       buf = kzalloc(count + 1, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
 
@@ -1132,7 +1132,6 @@ static const struct file_operations sdebug_target_reset_fail_fops = {
 static int sdebug_target_alloc(struct scsi_target *starget)
 {
        struct sdebug_target_info *targetip;
-       struct dentry *dentry;
 
        targetip = kzalloc(sizeof(struct sdebug_target_info), GFP_KERNEL);
        if (!targetip)
@@ -1140,15 +1139,9 @@ static int sdebug_target_alloc(struct scsi_target *starget)
 
        targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev),
                                sdebug_debugfs_root);
-       if (IS_ERR_OR_NULL(targetip->debugfs_entry))
-               pr_info("%s: failed to create debugfs directory for target %s\n",
-                       __func__, dev_name(&starget->dev));
 
        debugfs_create_file("fail_reset", 0600, targetip->debugfs_entry, starget,
                                &sdebug_target_reset_fail_fops);
-       if (IS_ERR_OR_NULL(dentry))
-               pr_info("%s: failed to create fail_reset file for target %s\n",
-                       __func__, dev_name(&starget->dev));
 
        starget->hostdata = targetip;
 
index 530918cbfce2d1840feec8dcca2dcc7479257452..542a4bbb21bce8344fcacb88a4f00f5f2bde4052 100644 (file)
@@ -1643,24 +1643,21 @@ out:
        return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
-static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
+static int sd_sync_cache(struct scsi_disk *sdkp)
 {
        int retries, res;
        struct scsi_device *sdp = sdkp->device;
        const int timeout = sdp->request_queue->rq_timeout
                * SD_FLUSH_TIMEOUT_MULTIPLIER;
-       struct scsi_sense_hdr my_sshdr;
+       struct scsi_sense_hdr sshdr;
        const struct scsi_exec_args exec_args = {
                .req_flags = BLK_MQ_REQ_PM,
-               /* caller might not be interested in sense, but we need it */
-               .sshdr = sshdr ? : &my_sshdr,
+               .sshdr = &sshdr,
        };
 
        if (!scsi_device_online(sdp))
                return -ENODEV;
 
-       sshdr = exec_args.sshdr;
-
        for (retries = 3; retries > 0; --retries) {
                unsigned char cmd[16] = { 0 };
 
@@ -1685,15 +1682,23 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
                        return res;
 
                if (scsi_status_is_check_condition(res) &&
-                   scsi_sense_valid(sshdr)) {
-                       sd_print_sense_hdr(sdkp, sshdr);
+                   scsi_sense_valid(&sshdr)) {
+                       sd_print_sense_hdr(sdkp, &sshdr);
 
                        /* we need to evaluate the error return  */
-                       if (sshdr->asc == 0x3a ||       /* medium not present */
-                           sshdr->asc == 0x20 ||       /* invalid command */
-                           (sshdr->asc == 0x74 && sshdr->ascq == 0x71))        /* drive is password locked */
+                       if (sshdr.asc == 0x3a ||        /* medium not present */
+                           sshdr.asc == 0x20 ||        /* invalid command */
+                           (sshdr.asc == 0x74 && sshdr.ascq == 0x71))  /* drive is password locked */
                                /* this is no error here */
                                return 0;
+                       /*
+                        * This drive doesn't support sync and there's not much
+                        * we can do because this is called during shutdown
+                        * or suspend so just return success so those operations
+                        * can proceed.
+                        */
+                       if (sshdr.sense_key == ILLEGAL_REQUEST)
+                               return 0;
                }
 
                switch (host_byte(res)) {
@@ -3853,7 +3858,7 @@ static void sd_shutdown(struct device *dev)
 
        if (sdkp->WCE && sdkp->media_present) {
                sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-               sd_sync_cache(sdkp, NULL);
+               sd_sync_cache(sdkp);
        }
 
        if ((system_state != SYSTEM_RESTART &&
@@ -3874,7 +3879,6 @@ static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime)
 static int sd_suspend_common(struct device *dev, bool runtime)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
-       struct scsi_sense_hdr sshdr;
        int ret = 0;
 
        if (!sdkp)      /* E.g.: runtime suspend following sd_remove() */
@@ -3883,24 +3887,13 @@ static int sd_suspend_common(struct device *dev, bool runtime)
        if (sdkp->WCE && sdkp->media_present) {
                if (!sdkp->device->silence_suspend)
                        sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-               ret = sd_sync_cache(sdkp, &sshdr);
-
-               if (ret) {
-                       /* ignore OFFLINE device */
-                       if (ret == -ENODEV)
-                               return 0;
-
-                       if (!scsi_sense_valid(&sshdr) ||
-                           sshdr.sense_key != ILLEGAL_REQUEST)
-                               return ret;
+               ret = sd_sync_cache(sdkp);
+               /* ignore OFFLINE device */
+               if (ret == -ENODEV)
+                       return 0;
 
-                       /*
-                        * sshdr.sense_key == ILLEGAL_REQUEST means this drive
-                        * doesn't support sync. There's not much to do and
-                        * suspend shouldn't fail.
-                        */
-                       ret = 0;
-               }
+               if (ret)
+                       return ret;
        }
 
        if (sd_do_start_stop(sdkp->device, runtime)) {
@@ -3956,8 +3949,15 @@ static int sd_resume(struct device *dev, bool runtime)
 
 static int sd_resume_system(struct device *dev)
 {
-       if (pm_runtime_suspended(dev))
+       if (pm_runtime_suspended(dev)) {
+               struct scsi_disk *sdkp = dev_get_drvdata(dev);
+               struct scsi_device *sdp = sdkp ? sdkp->device : NULL;
+
+               if (sdp && sdp->force_runtime_start_on_system_start)
+                       pm_request_resume(dev);
+
                return 0;
+       }
 
        return sd_resume(dev, false);
 }
index 1e15ffa792955d7382b19546226bdcfa603a5685..44e9b09de47a5a01a0a22d48c9d637bdd7c7bf68 100644 (file)
@@ -1143,7 +1143,7 @@ int tb_port_lane_bonding_enable(struct tb_port *port)
         * Only set bonding if the link was not already bonded. This
         * avoids the lane adapter to re-enter bonding state.
         */
-       if (width == TB_LINK_WIDTH_SINGLE) {
+       if (width == TB_LINK_WIDTH_SINGLE && !tb_is_upstream_port(port)) {
                ret = tb_port_set_lane_bonding(port, true);
                if (ret)
                        goto err_lane1;
@@ -2880,6 +2880,7 @@ static int tb_switch_lane_bonding_disable(struct tb_switch *sw)
        return tb_port_wait_for_link_width(down, TB_LINK_WIDTH_SINGLE, 100);
 }
 
+/* Note updating sw->link_width done in tb_switch_update_link_attributes() */
 static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width)
 {
        struct tb_port *up, *down, *port;
@@ -2919,10 +2920,10 @@ static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width)
                        return ret;
        }
 
-       sw->link_width = width;
        return 0;
 }
 
+/* Note updating sw->link_width done in tb_switch_update_link_attributes() */
 static int tb_switch_asym_disable(struct tb_switch *sw)
 {
        struct tb_port *up, *down;
@@ -2957,7 +2958,6 @@ static int tb_switch_asym_disable(struct tb_switch *sw)
                        return ret;
        }
 
-       sw->link_width = TB_LINK_WIDTH_DUAL;
        return 0;
 }
 
index 5acdeb766860da51f912677a37c99c6b0166d922..fd49f86e03532c140557af4bb822eb2d7665ef95 100644 (file)
@@ -213,7 +213,17 @@ static void tb_add_dp_resources(struct tb_switch *sw)
                if (!tb_switch_query_dp_resource(sw, port))
                        continue;
 
-               list_add(&port->list, &tcm->dp_resources);
+               /*
+                * If DP IN on device router exist, position it at the
+                * beginning of the DP resources list, so that it is used
+                * before DP IN of the host router. This way external GPU(s)
+                * will be prioritized when pairing DP IN to a DP OUT.
+                */
+               if (tb_route(sw))
+                       list_add(&port->list, &tcm->dp_resources);
+               else
+                       list_add_tail(&port->list, &tcm->dp_resources);
+
                tb_port_dbg(port, "DP IN resource available\n");
        }
 }
index 2ba8ec254dceeec5bacf2b027bb1b06ed9f54c56..0787456c2b892f773bba5cf66c09ac7918787852 100644 (file)
@@ -436,7 +436,7 @@ int ufshcd_mcq_init(struct ufs_hba *hba)
 
        for (i = 0; i < hba->nr_hw_queues; i++) {
                hwq = &hba->uhq[i];
-               hwq->max_entries = hba->nutrs;
+               hwq->max_entries = hba->nutrs + 1;
                spin_lock_init(&hwq->sq_lock);
                spin_lock_init(&hwq->cq_lock);
                mutex_init(&hwq->sq_mutex);
@@ -630,6 +630,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
        int tag = scsi_cmd_to_rq(cmd)->tag;
        struct ufshcd_lrb *lrbp = &hba->lrb[tag];
        struct ufs_hw_queue *hwq;
+       unsigned long flags;
        int err = FAILED;
 
        if (!ufshcd_cmd_inflight(lrbp->cmd)) {
@@ -670,8 +671,10 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
        }
 
        err = SUCCESS;
+       spin_lock_irqsave(&hwq->cq_lock, flags);
        if (ufshcd_cmd_inflight(lrbp->cmd))
                ufshcd_release_scsi_cmd(hba, lrbp);
+       spin_unlock_irqrestore(&hwq->cq_lock, flags);
 
 out:
        return err;
index 8b1031fb0a4400fd6355be140e1ef5e99afabb74..bce0d2a9a7f3a55497572bd3e51e9cf4fc6a0b4e 100644 (file)
@@ -6444,11 +6444,24 @@ static bool ufshcd_abort_one(struct request *rq, void *priv)
        struct scsi_device *sdev = cmd->device;
        struct Scsi_Host *shost = sdev->host;
        struct ufs_hba *hba = shost_priv(shost);
+       struct ufshcd_lrb *lrbp = &hba->lrb[tag];
+       struct ufs_hw_queue *hwq;
+       unsigned long flags;
 
        *ret = ufshcd_try_to_abort_task(hba, tag);
        dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
                hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
                *ret ? "failed" : "succeeded");
+
+       /* Release cmd in MCQ mode if abort succeeds */
+       if (is_mcq_enabled(hba) && (*ret == 0)) {
+               hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
+               spin_lock_irqsave(&hwq->cq_lock, flags);
+               if (ufshcd_cmd_inflight(lrbp->cmd))
+                       ufshcd_release_scsi_cmd(hba, lrbp);
+               spin_unlock_irqrestore(&hwq->cq_lock, flags);
+       }
+
        return *ret == 0;
 }
 
index af981778382df71d6c109e3a2f7a83b0b3fb2d6a..02f297f5637d7562a1228e0e860af608ac6beda5 100644 (file)
@@ -1529,6 +1529,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
        unsigned long flags;
        int counter = 0;
 
+       local_bh_disable();
        spin_lock_irqsave(&pdev->lock, flags);
 
        if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) {
@@ -1541,6 +1542,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
                        cdnsp_died(pdev);
 
                spin_unlock_irqrestore(&pdev->lock, flags);
+               local_bh_enable();
                return IRQ_HANDLED;
        }
 
@@ -1557,6 +1559,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
        cdnsp_update_erst_dequeue(pdev, event_ring_deq, 1);
 
        spin_unlock_irqrestore(&pdev->lock, flags);
+       local_bh_enable();
 
        return IRQ_HANDLED;
 }
index b19e38d5fd10c1a4dfd84240df9e8203e4314f8f..7f8d33f92ddb5f5cfa3dbb0c9b845cd4f2903e78 100644 (file)
@@ -1047,7 +1047,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 
                if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) {
                        dev_notice(ddev, "descriptor type invalid, skip\n");
-                       continue;
+                       goto skip_to_next_descriptor;
                }
 
                switch (cap_type) {
@@ -1078,6 +1078,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
                        break;
                }
 
+skip_to_next_descriptor:
                total_len -= length;
                buffer += length;
        }
index b4584a0cd484562633488d1552f60281d527bd6a..87480a6e6d934893a6096cbf7e58f5d1cc5358f1 100644 (file)
@@ -622,29 +622,6 @@ static int hub_ext_port_status(struct usb_hub *hub, int port1, int type,
                ret = 0;
        }
        mutex_unlock(&hub->status_mutex);
-
-       /*
-        * There is no need to lock status_mutex here, because status_mutex
-        * protects hub->status, and the phy driver only checks the port
-        * status without changing the status.
-        */
-       if (!ret) {
-               struct usb_device *hdev = hub->hdev;
-
-               /*
-                * Only roothub will be notified of port state changes,
-                * since the USB PHY only cares about changes at the next
-                * level.
-                */
-               if (is_root_hub(hdev)) {
-                       struct usb_hcd *hcd = bus_to_hcd(hdev->bus);
-
-                       if (hcd->usb_phy)
-                               usb_phy_notify_port_status(hcd->usb_phy,
-                                                          port1 - 1, *status, *change);
-               }
-       }
-
        return ret;
 }
 
index 0144ca8350c31234956b5aea4bbb38bed7134a87..5c7538d498dd1194d9d328511f583b40c557cd5e 100644 (file)
@@ -2015,15 +2015,17 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum)
 {
        struct dwc2_qtd *qtd;
        struct dwc2_host_chan *chan;
-       u32 hcint, hcintmsk;
+       u32 hcint, hcintraw, hcintmsk;
 
        chan = hsotg->hc_ptr_array[chnum];
 
-       hcint = dwc2_readl(hsotg, HCINT(chnum));
+       hcintraw = dwc2_readl(hsotg, HCINT(chnum));
        hcintmsk = dwc2_readl(hsotg, HCINTMSK(chnum));
+       hcint = hcintraw & hcintmsk;
+       dwc2_writel(hsotg, hcint, HCINT(chnum));
+
        if (!chan) {
                dev_err(hsotg->dev, "## hc_ptr_array for channel is NULL ##\n");
-               dwc2_writel(hsotg, hcint, HCINT(chnum));
                return;
        }
 
@@ -2032,11 +2034,9 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum)
                         chnum);
                dev_vdbg(hsotg->dev,
                         "  hcint 0x%08x, hcintmsk 0x%08x, hcint&hcintmsk 0x%08x\n",
-                        hcint, hcintmsk, hcint & hcintmsk);
+                        hcintraw, hcintmsk, hcint);
        }
 
-       dwc2_writel(hsotg, hcint, HCINT(chnum));
-
        /*
         * If we got an interrupt after someone called
         * dwc2_hcd_endpoint_disable() we don't want to crash below
@@ -2046,8 +2046,7 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum)
                return;
        }
 
-       chan->hcint = hcint;
-       hcint &= hcintmsk;
+       chan->hcint = hcintraw;
 
        /*
         * If the channel was halted due to a dequeue, the qtd list might
index 0328c86ef80613a8e3296c3cab8ee6a94a96604d..b101dbf8c5dcc775797da2ce3ea0f2c34ebd9323 100644 (file)
@@ -2034,6 +2034,8 @@ static int dwc3_probe(struct platform_device *pdev)
 
        pm_runtime_put(dev);
 
+       dma_set_max_seg_size(dev, UINT_MAX);
+
        return 0;
 
 err_exit_debugfs:
index 039bf241769afb789f7f09130e7af91eba62f12d..57ddd2e43022eb8f896acaf9c7e82d307bc145b2 100644 (file)
@@ -505,6 +505,7 @@ static int dwc3_setup_role_switch(struct dwc3 *dwc)
                dwc->role_switch_default_mode = USB_DR_MODE_PERIPHERAL;
                mode = DWC3_GCTL_PRTCAP_DEVICE;
        }
+       dwc3_set_mode(dwc, mode);
 
        dwc3_role_switch.fwnode = dev_fwnode(dwc->dev);
        dwc3_role_switch.set = dwc3_usb_role_switch_set;
@@ -526,7 +527,6 @@ static int dwc3_setup_role_switch(struct dwc3 *dwc)
                }
        }
 
-       dwc3_set_mode(dwc, mode);
        return 0;
 }
 #else
index 3de43df6bbe814a16e1c1d7578711129843fef10..fdf6d5d3c2ada4357bcf1a0156410e4fc0809cfa 100644 (file)
@@ -546,10 +546,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev)
                                pdata ? pdata->hs_phy_irq_index : -1);
        if (irq > 0) {
                /* Keep wakeup interrupts disabled until suspend */
-               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
                                        qcom_dwc3_resume_irq,
-                                       IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                       IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                        "qcom_dwc3 HS", qcom);
                if (ret) {
                        dev_err(qcom->dev, "hs_phy_irq failed: %d\n", ret);
@@ -561,10 +560,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev)
        irq = dwc3_qcom_get_irq(pdev, "dp_hs_phy_irq",
                                pdata ? pdata->dp_hs_phy_irq_index : -1);
        if (irq > 0) {
-               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
                                        qcom_dwc3_resume_irq,
-                                       IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                       IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                        "qcom_dwc3 DP_HS", qcom);
                if (ret) {
                        dev_err(qcom->dev, "dp_hs_phy_irq failed: %d\n", ret);
@@ -576,10 +574,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev)
        irq = dwc3_qcom_get_irq(pdev, "dm_hs_phy_irq",
                                pdata ? pdata->dm_hs_phy_irq_index : -1);
        if (irq > 0) {
-               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
                                        qcom_dwc3_resume_irq,
-                                       IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                       IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                        "qcom_dwc3 DM_HS", qcom);
                if (ret) {
                        dev_err(qcom->dev, "dm_hs_phy_irq failed: %d\n", ret);
@@ -591,10 +588,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev)
        irq = dwc3_qcom_get_irq(pdev, "ss_phy_irq",
                                pdata ? pdata->ss_phy_irq_index : -1);
        if (irq > 0) {
-               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
                                        qcom_dwc3_resume_irq,
-                                       IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                       IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                        "qcom_dwc3 SS", qcom);
                if (ret) {
                        dev_err(qcom->dev, "ss_phy_irq failed: %d\n", ret);
@@ -758,6 +754,7 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev)
        if (!qcom->dwc3) {
                ret = -ENODEV;
                dev_err(dev, "failed to get dwc3 platform device\n");
+               of_platform_depopulate(dev);
        }
 
 node_put:
@@ -766,9 +763,9 @@ node_put:
        return ret;
 }
 
-static struct platform_device *
-dwc3_qcom_create_urs_usb_platdev(struct device *dev)
+static struct platform_device *dwc3_qcom_create_urs_usb_platdev(struct device *dev)
 {
+       struct platform_device *urs_usb = NULL;
        struct fwnode_handle *fwh;
        struct acpi_device *adev;
        char name[8];
@@ -788,9 +785,26 @@ dwc3_qcom_create_urs_usb_platdev(struct device *dev)
 
        adev = to_acpi_device_node(fwh);
        if (!adev)
-               return NULL;
+               goto err_put_handle;
+
+       urs_usb = acpi_create_platform_device(adev, NULL);
+       if (IS_ERR_OR_NULL(urs_usb))
+               goto err_put_handle;
+
+       return urs_usb;
+
+err_put_handle:
+       fwnode_handle_put(fwh);
+
+       return urs_usb;
+}
 
-       return acpi_create_platform_device(adev, NULL);
+static void dwc3_qcom_destroy_urs_usb_platdev(struct platform_device *urs_usb)
+{
+       struct fwnode_handle *fwh = urs_usb->dev.fwnode;
+
+       platform_device_unregister(urs_usb);
+       fwnode_handle_put(fwh);
 }
 
 static int dwc3_qcom_probe(struct platform_device *pdev)
@@ -874,13 +888,13 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
        qcom->qscratch_base = devm_ioremap_resource(dev, parent_res);
        if (IS_ERR(qcom->qscratch_base)) {
                ret = PTR_ERR(qcom->qscratch_base);
-               goto clk_disable;
+               goto free_urs;
        }
 
        ret = dwc3_qcom_setup_irq(pdev);
        if (ret) {
                dev_err(dev, "failed to setup IRQs, err=%d\n", ret);
-               goto clk_disable;
+               goto free_urs;
        }
 
        /*
@@ -899,7 +913,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
 
        if (ret) {
                dev_err(dev, "failed to register DWC3 Core, err=%d\n", ret);
-               goto depopulate;
+               goto free_urs;
        }
 
        ret = dwc3_qcom_interconnect_init(qcom);
@@ -931,10 +945,16 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
 interconnect_exit:
        dwc3_qcom_interconnect_exit(qcom);
 depopulate:
-       if (np)
+       if (np) {
                of_platform_depopulate(&pdev->dev);
-       else
-               platform_device_put(pdev);
+       } else {
+               device_remove_software_node(&qcom->dwc3->dev);
+               platform_device_del(qcom->dwc3);
+       }
+       platform_device_put(qcom->dwc3);
+free_urs:
+       if (qcom->urs_usb)
+               dwc3_qcom_destroy_urs_usb_platdev(qcom->urs_usb);
 clk_disable:
        for (i = qcom->num_clocks - 1; i >= 0; i--) {
                clk_disable_unprepare(qcom->clks[i]);
@@ -953,11 +973,16 @@ static void dwc3_qcom_remove(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        int i;
 
-       device_remove_software_node(&qcom->dwc3->dev);
-       if (np)
+       if (np) {
                of_platform_depopulate(&pdev->dev);
-       else
-               platform_device_put(pdev);
+       } else {
+               device_remove_software_node(&qcom->dwc3->dev);
+               platform_device_del(qcom->dwc3);
+       }
+       platform_device_put(qcom->dwc3);
+
+       if (qcom->urs_usb)
+               dwc3_qcom_destroy_urs_usb_platdev(qcom->urs_usb);
 
        for (i = qcom->num_clocks - 1; i >= 0; i--) {
                clk_disable_unprepare(qcom->clks[i]);
index 590028e8fdcb2338f76c2b8c68ff84f909cc178f..3cd6b184551ce2695e31bf6cf96fe63e8121550c 100644 (file)
@@ -183,10 +183,13 @@ static enum usb_device_speed __get_dwc3_maximum_speed(struct device_node *np)
 
        ret = of_property_read_string(dwc3_np, "maximum-speed", &maximum_speed);
        if (ret < 0)
-               return USB_SPEED_UNKNOWN;
+               goto out;
 
        ret = match_string(speed_names, ARRAY_SIZE(speed_names), maximum_speed);
 
+out:
+       of_node_put(dwc3_np);
+
        return (ret < 0) ? USB_SPEED_UNKNOWN : ret;
 }
 
@@ -339,6 +342,9 @@ static int dwc3_rtk_probe_dwc3_core(struct dwc3_rtk *rtk)
 
        switch_usb2_role(rtk, rtk->cur_role);
 
+       platform_device_put(dwc3_pdev);
+       of_node_put(dwc3_node);
+
        return 0;
 
 err_pdev_put:
index 5b3cd455adecc21bd3fcc70316cb4ceab8e51609..61f3f8bbdcead3f325b60128c8bf648acf32dc2b 100644 (file)
@@ -650,9 +650,8 @@ static int check_isoc_ss_overlap(struct mu3h_sch_ep_info *sch_ep, u32 offset)
 
                if (sch_ep->ep_type == ISOC_OUT_EP) {
                        for (j = 0; j < sch_ep->num_budget_microframes; j++) {
-                               k = XHCI_MTK_BW_INDEX(base + j + CS_OFFSET);
-                               /* use cs to indicate existence of in-ss @(base+j) */
-                               if (tt->fs_bus_bw_in[k])
+                               k = XHCI_MTK_BW_INDEX(base + j);
+                               if (tt->in_ss_cnt[k])
                                        return -ESCH_SS_OVERLAP;
                        }
                } else if (sch_ep->ep_type == ISOC_IN_EP || sch_ep->ep_type == INT_IN_EP) {
@@ -769,6 +768,14 @@ static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used)
                                tt->fs_frame_bw[f] -= (u16)sch_ep->bw_budget_table[j];
                        }
                }
+
+               if (sch_ep->ep_type == ISOC_IN_EP || sch_ep->ep_type == INT_IN_EP) {
+                       k = XHCI_MTK_BW_INDEX(base);
+                       if (used)
+                               tt->in_ss_cnt[k]++;
+                       else
+                               tt->in_ss_cnt[k]--;
+               }
        }
 
        if (used)
index 865b55e23b15948f68ec933395077df1f7d71960..39f7ae7d30871d074e832a44fcd6dc9708d971c0 100644 (file)
@@ -38,6 +38,7 @@
  * @fs_bus_bw_in: save bandwidth used by FS/LS IN eps in each uframes
  * @ls_bus_bw: save bandwidth used by LS eps in each uframes
  * @fs_frame_bw: save bandwidth used by FS/LS eps in each FS frames
+ * @in_ss_cnt: the count of Start-Split for IN eps
  * @ep_list: Endpoints using this TT
  */
 struct mu3h_sch_tt {
@@ -45,6 +46,7 @@ struct mu3h_sch_tt {
        u16 fs_bus_bw_in[XHCI_MTK_MAX_ESIT];
        u8 ls_bus_bw[XHCI_MTK_MAX_ESIT];
        u16 fs_frame_bw[XHCI_MTK_FRAMES_CNT];
+       u8 in_ss_cnt[XHCI_MTK_MAX_ESIT];
        struct list_head ep_list;
 };
 
index b93161374293b3b9f272b9a9c77a282a3a5795b1..732cdeb739202e112a4bfd0a00b63dcae7c984d2 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/usb/phy.h>
 #include <linux/slab.h>
@@ -148,7 +149,7 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
        int                     ret;
        int                     irq;
        struct xhci_plat_priv   *priv = NULL;
-
+       bool                    of_match;
 
        if (usb_disabled())
                return -ENODEV;
@@ -253,16 +254,23 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
                                         &xhci->imod_interval);
        }
 
-       hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0);
-       if (IS_ERR(hcd->usb_phy)) {
-               ret = PTR_ERR(hcd->usb_phy);
-               if (ret == -EPROBE_DEFER)
-                       goto disable_clk;
-               hcd->usb_phy = NULL;
-       } else {
-               ret = usb_phy_init(hcd->usb_phy);
-               if (ret)
-                       goto disable_clk;
+       /*
+        * Drivers such as dwc3 manages PHYs themself (and rely on driver name
+        * matching for the xhci platform device).
+        */
+       of_match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev);
+       if (of_match) {
+               hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0);
+               if (IS_ERR(hcd->usb_phy)) {
+                       ret = PTR_ERR(hcd->usb_phy);
+                       if (ret == -EPROBE_DEFER)
+                               goto disable_clk;
+                       hcd->usb_phy = NULL;
+               } else {
+                       ret = usb_phy_init(hcd->usb_phy);
+                       if (ret)
+                               goto disable_clk;
+               }
        }
 
        hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node);
@@ -285,15 +293,17 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
                        goto dealloc_usb2_hcd;
                }
 
-               xhci->shared_hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev,
-                           "usb-phy", 1);
-               if (IS_ERR(xhci->shared_hcd->usb_phy)) {
-                       xhci->shared_hcd->usb_phy = NULL;
-               } else {
-                       ret = usb_phy_init(xhci->shared_hcd->usb_phy);
-                       if (ret)
-                               dev_err(sysdev, "%s init usb3phy fail (ret=%d)\n",
-                                           __func__, ret);
+               if (of_match) {
+                       xhci->shared_hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev,
+                                                                               "usb-phy", 1);
+                       if (IS_ERR(xhci->shared_hcd->usb_phy)) {
+                               xhci->shared_hcd->usb_phy = NULL;
+                       } else {
+                               ret = usb_phy_init(xhci->shared_hcd->usb_phy);
+                               if (ret)
+                                       dev_err(sysdev, "%s init usb3phy fail (ret=%d)\n",
+                                               __func__, ret);
+                       }
                }
 
                xhci->shared_hcd->tpl_support = hcd->tpl_support;
index a341b2fbb7b44f9a02a29b6ec7972be69caf9d17..2b45404e9732cefca8b6a8cd93fa4f08a015cc76 100644 (file)
@@ -432,6 +432,8 @@ static const struct usb_device_id onboard_hub_id_table[] = {
        { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2412) }, /* USB2412 USB 2.0 */
        { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */
        { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */
+       { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2744) }, /* USB5744 USB 2.0 */
+       { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x5744) }, /* USB5744 USB 3.0 */
        { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */
        { USB_DEVICE(VENDOR_ID_REALTEK, 0x5411) }, /* RTS5411 USB 2.1 */
        { USB_DEVICE(VENDOR_ID_REALTEK, 0x0414) }, /* RTS5414 USB 3.2 */
index c4e24a7b92904cc060d398c55d8b396239c813f7..292110e64a1d91c6697eb3bd18290d75dee5eebd 100644 (file)
@@ -16,6 +16,11 @@ static const struct onboard_hub_pdata microchip_usb424_data = {
        .num_supplies = 1,
 };
 
+static const struct onboard_hub_pdata microchip_usb5744_data = {
+       .reset_us = 0,
+       .num_supplies = 2,
+};
+
 static const struct onboard_hub_pdata realtek_rts5411_data = {
        .reset_us = 0,
        .num_supplies = 1,
@@ -50,6 +55,8 @@ static const struct of_device_id onboard_hub_match[] = {
        { .compatible = "usb424,2412", .data = &microchip_usb424_data, },
        { .compatible = "usb424,2514", .data = &microchip_usb424_data, },
        { .compatible = "usb424,2517", .data = &microchip_usb424_data, },
+       { .compatible = "usb424,2744", .data = &microchip_usb5744_data, },
+       { .compatible = "usb424,5744", .data = &microchip_usb5744_data, },
        { .compatible = "usb451,8140", .data = &ti_tusb8041_data, },
        { .compatible = "usb451,8142", .data = &ti_tusb8041_data, },
        { .compatible = "usb4b4,6504", .data = &cypress_hx3_data, },
index c9decd0396d4990db1c8f1d2d01e157636f416a1..35770e608c6497bbb1e5eeb73da1f8e6ef89cb72 100644 (file)
@@ -457,8 +457,8 @@ static void ljca_auxdev_acpi_bind(struct ljca_adapter *adap,
                                  u64 adr, u8 id)
 {
        struct ljca_match_ids_walk_data wd = { 0 };
-       struct acpi_device *parent, *adev;
        struct device *dev = adap->dev;
+       struct acpi_device *parent;
        char uid[4];
 
        parent = ACPI_COMPANION(dev);
@@ -466,17 +466,7 @@ static void ljca_auxdev_acpi_bind(struct ljca_adapter *adap,
                return;
 
        /*
-        * get auxdev ACPI handle from the ACPI device directly
-        * under the parent that matches _ADR.
-        */
-       adev = acpi_find_child_device(parent, adr, false);
-       if (adev) {
-               ACPI_COMPANION_SET(&auxdev->dev, adev);
-               return;
-       }
-
-       /*
-        * _ADR is a grey area in the ACPI specification, some
+        * Currently LJCA hw doesn't use _ADR instead the shipped
         * platforms use _HID to distinguish children devices.
         */
        switch (adr) {
@@ -656,10 +646,11 @@ static int ljca_enumerate_spi(struct ljca_adapter *adap)
        unsigned int i;
        int ret;
 
+       /* Not all LJCA chips implement SPI, a timeout reading the descriptors is normal */
        ret = ljca_send(adap, LJCA_CLIENT_MNG, LJCA_MNG_ENUM_SPI, NULL, 0, buf,
                        sizeof(buf), true, LJCA_ENUM_CLIENT_TIMEOUT_MS);
        if (ret < 0)
-               return ret;
+               return (ret == -ETIMEDOUT) ? 0 : ret;
 
        /* check firmware response */
        desc = (struct ljca_spi_descriptor *)buf;
index 45dcfaadaf98eb6f6216975764722228d204ea22..4dffcfefd62da22360ba1cc8b467c3946a425030 100644 (file)
@@ -203,8 +203,8 @@ static void option_instat_callback(struct urb *urb);
 #define DELL_PRODUCT_5829E_ESIM                        0x81e4
 #define DELL_PRODUCT_5829E                     0x81e6
 
-#define DELL_PRODUCT_FM101R                    0x8213
-#define DELL_PRODUCT_FM101R_ESIM               0x8215
+#define DELL_PRODUCT_FM101R_ESIM               0x8213
+#define DELL_PRODUCT_FM101R                    0x8215
 
 #define KYOCERA_VENDOR_ID                      0x0c88
 #define KYOCERA_PRODUCT_KPC650                 0x17da
@@ -609,6 +609,8 @@ static void option_instat_callback(struct urb *urb);
 #define UNISOC_VENDOR_ID                       0x1782
 /* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */
 #define TOZED_PRODUCT_LT70C                    0x4055
+/* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */
+#define LUAT_PRODUCT_AIR720U                   0x4e00
 
 /* Device flags */
 
@@ -1546,7 +1548,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0165, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0167, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
-       { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff),
+         .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0191, 0xff, 0xff, 0xff), /* ZTE EuFi890 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0196, 0xff, 0xff, 0xff) },
@@ -2249,6 +2252,7 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
        { USB_DEVICE(0x1782, 0x4d10) },                                         /* Fibocom L610 (AT mode) */
        { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) },                   /* Fibocom L610 (ECM/RNDIS mode) */
+       { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0001, 0xff, 0xff, 0xff) },    /* Fibocom L716-EU (ECM/RNDIS mode) */
        { USB_DEVICE(0x2cb7, 0x0104),                                           /* Fibocom NL678 series */
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff),                     /* Fibocom NL678 series */
@@ -2271,6 +2275,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) },
+       { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) },
        { } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
index 058d5b853b5749d0b9743554380ccaf7880c75c0..bfb6f9481e87f7aa32464ea344278eba6613b5ff 100644 (file)
@@ -4273,7 +4273,8 @@ static void run_state_machine(struct tcpm_port *port)
                                current_lim = PD_P_SNK_STDBY_MW / 5;
                        tcpm_set_current_limit(port, current_lim, 5000);
                        /* Not sink vbus if operational current is 0mA */
-                       tcpm_set_charge(port, !!pdo_max_current(port->snk_pdo[0]));
+                       tcpm_set_charge(port, !port->pd_supported ||
+                                       pdo_max_current(port->snk_pdo[0]));
 
                        if (!port->pd_supported)
                                tcpm_set_state(port, SNK_READY, 0);
@@ -5391,6 +5392,15 @@ static void _tcpm_pd_hard_reset(struct tcpm_port *port)
        if (port->bist_request == BDO_MODE_TESTDATA && port->tcpc->set_bist_data)
                port->tcpc->set_bist_data(port->tcpc, false);
 
+       switch (port->state) {
+       case ERROR_RECOVERY:
+       case PORT_RESET:
+       case PORT_RESET_WAIT_OFF:
+               return;
+       default:
+               break;
+       }
+
        if (port->ams != NONE_AMS)
                port->ams = NONE_AMS;
        if (port->hard_reset_count < PD_N_HARD_RESET_COUNT)
index 0e867f531d344bea87596b8522dc70d2375a7ef4..196535ad996d080ff50340472e32e00e2445b257 100644 (file)
@@ -968,16 +968,17 @@ static int tps25750_start_patch_burst_mode(struct tps6598x *tps)
        ret = of_property_match_string(np, "reg-names", "patch-address");
        if (ret < 0) {
                dev_err(tps->dev, "failed to get patch-address %d\n", ret);
-               return ret;
+               goto release_fw;
        }
 
        ret = of_property_read_u32_index(np, "reg", ret, &addr);
        if (ret)
-               return ret;
+               goto release_fw;
 
        if (addr == 0 || (addr >= 0x20 && addr <= 0x23)) {
                dev_err(tps->dev, "wrong patch address %u\n", addr);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto release_fw;
        }
 
        bpms_data.addr = (u8)addr;
@@ -1226,7 +1227,10 @@ static int tps6598x_probe(struct i2c_client *client)
                        TPS_REG_INT_PLUG_EVENT;
        }
 
-       tps->data = device_get_match_data(tps->dev);
+       if (dev_fwnode(tps->dev))
+               tps->data = device_get_match_data(tps->dev);
+       else
+               tps->data = i2c_get_match_data(client);
        if (!tps->data)
                return -EINVAL;
 
@@ -1425,7 +1429,7 @@ static const struct of_device_id tps6598x_of_match[] = {
 MODULE_DEVICE_TABLE(of, tps6598x_of_match);
 
 static const struct i2c_device_id tps6598x_id[] = {
-       { "tps6598x" },
+       { "tps6598x", (kernel_ulong_t)&tps6598x_data },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, tps6598x_id);
index b3a3cb16579552ccec1cd4ec1875ad36376b6cd3..b137f367934393268e57c8e559f6ba1194fab20d 100644 (file)
@@ -437,7 +437,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
        if (blk->shared_backend) {
                blk->buffer = shared_buffer;
        } else {
-               blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+               blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
                                       GFP_KERNEL);
                if (!blk->buffer) {
                        ret = -ENOMEM;
@@ -495,7 +495,7 @@ static int __init vdpasim_blk_init(void)
                goto parent_err;
 
        if (shared_backend) {
-               shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+               shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
                                         GFP_KERNEL);
                if (!shared_buffer) {
                        ret = -ENOMEM;
index 30df5c58db73a846dc3c58ad821201694869a348..da7ec77cdaff075b5e66a21be7693cd6b7d30047 100644 (file)
@@ -1582,7 +1582,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
 
 err:
        put_device(&v->dev);
-       ida_simple_remove(&vhost_vdpa_ida, v->minor);
        return r;
 }
 
index c2524a7207cfaeed4149748c7890b3ce0f7e6ccd..7a5593997e0efe64f2ce832bf0c8676bbaabf188 100644 (file)
@@ -242,7 +242,7 @@ void vp_del_vqs(struct virtio_device *vdev)
                        if (v != VIRTIO_MSI_NO_VECTOR) {
                                int irq = pci_irq_vector(vp_dev->pci_dev, v);
 
-                               irq_set_affinity_hint(irq, NULL);
+                               irq_update_affinity_hint(irq, NULL);
                                free_irq(irq, vq);
                        }
                }
@@ -443,10 +443,10 @@ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
                mask = vp_dev->msix_affinity_masks[info->msix_vector];
                irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
                if (!cpu_mask)
-                       irq_set_affinity_hint(irq, NULL);
+                       irq_update_affinity_hint(irq, NULL);
                else {
                        cpumask_copy(mask, cpu_mask);
-                       irq_set_affinity_hint(irq, mask);
+                       irq_set_affinity_and_hint(irq, mask);
                }
        }
        return 0;
index e2a1fe7bb66cc9c4da102f1559da648173c3376f..7de8b1ebabac4217b2240f6d8faaf486b4265f38 100644 (file)
@@ -294,9 +294,10 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
 
        err = -EINVAL;
        mdev->common = vp_modern_map_capability(mdev, common,
-                                     sizeof(struct virtio_pci_common_cfg), 4,
-                                     0, sizeof(struct virtio_pci_modern_common_cfg),
-                                     &mdev->common_len, NULL);
+                             sizeof(struct virtio_pci_common_cfg), 4, 0,
+                             offsetofend(struct virtio_pci_modern_common_cfg,
+                                         queue_reset),
+                             &mdev->common_len, NULL);
        if (!mdev->common)
                goto err_map_common;
        mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
index b8f2f971c2f0fc6ef6ef3b3e8ab88147b4dc39c3..e3585330cf98b1c7e99a4800913c32cb380fd48f 100644 (file)
@@ -171,11 +171,11 @@ static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
        int i;
        struct shared_info *s = HYPERVISOR_shared_info;
        struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+       evtchn_port_t evtchn;
 
        /* Timer interrupt has highest priority. */
-       irq = irq_from_virq(cpu, VIRQ_TIMER);
+       irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn);
        if (irq != -1) {
-               evtchn_port_t evtchn = evtchn_from_irq(irq);
                word_idx = evtchn / BITS_PER_LONG;
                bit_idx = evtchn % BITS_PER_LONG;
                if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
@@ -328,9 +328,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
        for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
                if (sync_test_bit(i, BM(sh->evtchn_pending))) {
                        int word_idx = i / BITS_PER_EVTCHN_WORD;
-                       printk("  %d: event %d -> irq %d%s%s%s\n",
+                       printk("  %d: event %d -> irq %u%s%s%s\n",
                               cpu_from_evtchn(i), i,
-                              get_evtchn_to_irq(i),
+                              irq_from_evtchn(i),
                               sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
                               ? "" : " l2-clear",
                               !sync_test_bit(i, BM(sh->evtchn_mask))
index 6de6b084ea60d5f4cadd56e27a2fc02f8294df06..f5edb9e27e3ca1e33edd88fce970e57ca228430c 100644 (file)
@@ -164,6 +164,8 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
 
 /* IRQ <-> IPI mapping */
 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
+/* Cache for IPI event channels - needed for hot cpu unplug (avoid RCU usage). */
+static DEFINE_PER_CPU(evtchn_port_t [XEN_NR_IPIS], ipi_to_evtchn) = {[0 ... XEN_NR_IPIS-1] = 0};
 
 /* Event channel distribution data */
 static atomic_t channels_on_cpu[NR_CPUS];
@@ -172,7 +174,7 @@ static int **evtchn_to_irq;
 #ifdef CONFIG_X86
 static unsigned long *pirq_eoi_map;
 #endif
-static bool (*pirq_needs_eoi)(unsigned irq);
+static bool (*pirq_needs_eoi)(struct irq_info *info);
 
 #define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
 #define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
@@ -188,7 +190,6 @@ static struct irq_chip xen_lateeoi_chip;
 static struct irq_chip xen_percpu_chip;
 static struct irq_chip xen_pirq_chip;
 static void enable_dynirq(struct irq_data *data);
-static void disable_dynirq(struct irq_data *data);
 
 static DEFINE_PER_CPU(unsigned int, irq_epoch);
 
@@ -246,15 +247,6 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
        return 0;
 }
 
-int get_evtchn_to_irq(evtchn_port_t evtchn)
-{
-       if (evtchn >= xen_evtchn_max_channels())
-               return -1;
-       if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
-               return -1;
-       return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
-}
-
 /* Get info for IRQ */
 static struct irq_info *info_for_irq(unsigned irq)
 {
@@ -272,6 +264,19 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info)
                irq_set_chip_data(irq, info);
 }
 
+static struct irq_info *evtchn_to_info(evtchn_port_t evtchn)
+{
+       int irq;
+
+       if (evtchn >= xen_evtchn_max_channels())
+               return NULL;
+       if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+               return NULL;
+       irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
+
+       return (irq < 0) ? NULL : info_for_irq(irq);
+}
+
 /* Per CPU channel accounting */
 static void channels_on_cpu_dec(struct irq_info *info)
 {
@@ -298,6 +303,13 @@ static void channels_on_cpu_inc(struct irq_info *info)
        info->is_accounted = 1;
 }
 
+static void xen_irq_free_desc(unsigned int irq)
+{
+       /* Legacy IRQ descriptors are managed by the arch. */
+       if (irq >= nr_legacy_irqs())
+               irq_free_desc(irq);
+}
+
 static void delayed_free_irq(struct work_struct *work)
 {
        struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
@@ -309,14 +321,11 @@ static void delayed_free_irq(struct work_struct *work)
 
        kfree(info);
 
-       /* Legacy IRQ descriptors are managed by the arch. */
-       if (irq >= nr_legacy_irqs())
-               irq_free_desc(irq);
+       xen_irq_free_desc(irq);
 }
 
 /* Constructors for packed IRQ information. */
 static int xen_irq_info_common_setup(struct irq_info *info,
-                                    unsigned irq,
                                     enum xen_irq_type type,
                                     evtchn_port_t evtchn,
                                     unsigned short cpu)
@@ -326,29 +335,27 @@ static int xen_irq_info_common_setup(struct irq_info *info,
        BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
 
        info->type = type;
-       info->irq = irq;
        info->evtchn = evtchn;
        info->cpu = cpu;
        info->mask_reason = EVT_MASK_REASON_EXPLICIT;
        raw_spin_lock_init(&info->lock);
 
-       ret = set_evtchn_to_irq(evtchn, irq);
+       ret = set_evtchn_to_irq(evtchn, info->irq);
        if (ret < 0)
                return ret;
 
-       irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
+       irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN);
 
        return xen_evtchn_port_setup(evtchn);
 }
 
-static int xen_irq_info_evtchn_setup(unsigned irq,
+static int xen_irq_info_evtchn_setup(struct irq_info *info,
                                     evtchn_port_t evtchn,
                                     struct xenbus_device *dev)
 {
-       struct irq_info *info = info_for_irq(irq);
        int ret;
 
-       ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
+       ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0);
        info->u.interdomain = dev;
        if (dev)
                atomic_inc(&dev->event_channels);
@@ -356,49 +363,37 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
        return ret;
 }
 
-static int xen_irq_info_ipi_setup(unsigned cpu,
-                                 unsigned irq,
-                                 evtchn_port_t evtchn,
-                                 enum ipi_vector ipi)
+static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu,
+                                 evtchn_port_t evtchn, enum ipi_vector ipi)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        info->u.ipi = ipi;
 
-       per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+       per_cpu(ipi_to_irq, cpu)[ipi] = info->irq;
+       per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn;
 
-       return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
+       return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0);
 }
 
-static int xen_irq_info_virq_setup(unsigned cpu,
-                                  unsigned irq,
-                                  evtchn_port_t evtchn,
-                                  unsigned virq)
+static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu,
+                                  evtchn_port_t evtchn, unsigned int virq)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        info->u.virq = virq;
 
-       per_cpu(virq_to_irq, cpu)[virq] = irq;
+       per_cpu(virq_to_irq, cpu)[virq] = info->irq;
 
-       return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
+       return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0);
 }
 
-static int xen_irq_info_pirq_setup(unsigned irq,
-                                  evtchn_port_t evtchn,
-                                  unsigned pirq,
-                                  unsigned gsi,
-                                  uint16_t domid,
-                                  unsigned char flags)
+static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn,
+                                  unsigned int pirq, unsigned int gsi,
+                                  uint16_t domid, unsigned char flags)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        info->u.pirq.pirq = pirq;
        info->u.pirq.gsi = gsi;
        info->u.pirq.domid = domid;
        info->u.pirq.flags = flags;
 
-       return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
+       return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0);
 }
 
 static void xen_irq_info_cleanup(struct irq_info *info)
@@ -412,7 +407,7 @@ static void xen_irq_info_cleanup(struct irq_info *info)
 /*
  * Accessors for packed IRQ information.
  */
-evtchn_port_t evtchn_from_irq(unsigned irq)
+static evtchn_port_t evtchn_from_irq(unsigned int irq)
 {
        const struct irq_info *info = NULL;
 
@@ -426,64 +421,51 @@ evtchn_port_t evtchn_from_irq(unsigned irq)
 
 unsigned int irq_from_evtchn(evtchn_port_t evtchn)
 {
-       return get_evtchn_to_irq(evtchn);
+       struct irq_info *info = evtchn_to_info(evtchn);
+
+       return info ? info->irq : -1;
 }
 EXPORT_SYMBOL_GPL(irq_from_evtchn);
 
-int irq_from_virq(unsigned int cpu, unsigned int virq)
+int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
+                        evtchn_port_t *evtchn)
 {
-       return per_cpu(virq_to_irq, cpu)[virq];
+       int irq = per_cpu(virq_to_irq, cpu)[virq];
+
+       *evtchn = evtchn_from_irq(irq);
+
+       return irq;
 }
 
-static enum ipi_vector ipi_from_irq(unsigned irq)
+static enum ipi_vector ipi_from_irq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        BUG_ON(info == NULL);
        BUG_ON(info->type != IRQT_IPI);
 
        return info->u.ipi;
 }
 
-static unsigned virq_from_irq(unsigned irq)
+static unsigned int virq_from_irq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        BUG_ON(info == NULL);
        BUG_ON(info->type != IRQT_VIRQ);
 
        return info->u.virq;
 }
 
-static unsigned pirq_from_irq(unsigned irq)
+static unsigned int pirq_from_irq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        BUG_ON(info == NULL);
        BUG_ON(info->type != IRQT_PIRQ);
 
        return info->u.pirq.pirq;
 }
 
-static enum xen_irq_type type_from_irq(unsigned irq)
-{
-       return info_for_irq(irq)->type;
-}
-
-static unsigned cpu_from_irq(unsigned irq)
-{
-       return info_for_irq(irq)->cpu;
-}
-
 unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
 {
-       int irq = get_evtchn_to_irq(evtchn);
-       unsigned ret = 0;
-
-       if (irq != -1)
-               ret = cpu_from_irq(irq);
+       struct irq_info *info = evtchn_to_info(evtchn);
 
-       return ret;
+       return info ? info->cpu : 0;
 }
 
 static void do_mask(struct irq_info *info, u8 reason)
@@ -515,36 +497,30 @@ static void do_unmask(struct irq_info *info, u8 reason)
 }
 
 #ifdef CONFIG_X86
-static bool pirq_check_eoi_map(unsigned irq)
+static bool pirq_check_eoi_map(struct irq_info *info)
 {
-       return test_bit(pirq_from_irq(irq), pirq_eoi_map);
+       return test_bit(pirq_from_irq(info), pirq_eoi_map);
 }
 #endif
 
-static bool pirq_needs_eoi_flag(unsigned irq)
+static bool pirq_needs_eoi_flag(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
        BUG_ON(info->type != IRQT_PIRQ);
 
        return info->u.pirq.flags & PIRQ_NEEDS_EOI;
 }
 
-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu,
                               bool force_affinity)
 {
-       int irq = get_evtchn_to_irq(evtchn);
-       struct irq_info *info = info_for_irq(irq);
-
-       BUG_ON(irq == -1);
-
        if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
-               struct irq_data *data = irq_get_irq_data(irq);
+               struct irq_data *data = irq_get_irq_data(info->irq);
 
                irq_data_update_affinity(data, cpumask_of(cpu));
                irq_data_update_effective_affinity(data, cpumask_of(cpu));
        }
 
-       xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
+       xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu);
 
        channels_on_cpu_dec(info);
        info->cpu = cpu;
@@ -601,7 +577,9 @@ static void lateeoi_list_add(struct irq_info *info)
 
        spin_lock_irqsave(&eoi->eoi_list_lock, flags);
 
-       if (list_empty(&eoi->eoi_list)) {
+       elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+                                       eoi_list);
+       if (!elem || info->eoi_time < elem->eoi_time) {
                list_add(&info->eoi_list, &eoi->eoi_list);
                mod_delayed_work_on(info->eoi_cpu, system_wq,
                                    &eoi->delayed, delay);
@@ -732,50 +710,49 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
 }
 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
 
-static void xen_irq_init(unsigned irq)
+static struct irq_info *xen_irq_init(unsigned int irq)
 {
        struct irq_info *info;
 
        info = kzalloc(sizeof(*info), GFP_KERNEL);
-       if (info == NULL)
-               panic("Unable to allocate metadata for IRQ%d\n", irq);
+       if (info) {
+               info->irq = irq;
+               info->type = IRQT_UNBOUND;
+               info->refcnt = -1;
+               INIT_RCU_WORK(&info->rwork, delayed_free_irq);
 
-       info->type = IRQT_UNBOUND;
-       info->refcnt = -1;
-       INIT_RCU_WORK(&info->rwork, delayed_free_irq);
+               set_info_for_irq(irq, info);
+               /*
+                * Interrupt affinity setting can be immediate. No point
+                * in delaying it until an interrupt is handled.
+                */
+               irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 
-       set_info_for_irq(irq, info);
-       /*
-        * Interrupt affinity setting can be immediate. No point
-        * in delaying it until an interrupt is handled.
-        */
-       irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+               INIT_LIST_HEAD(&info->eoi_list);
+               list_add_tail(&info->list, &xen_irq_list_head);
+       }
 
-       INIT_LIST_HEAD(&info->eoi_list);
-       list_add_tail(&info->list, &xen_irq_list_head);
+       return info;
 }
 
-static int __must_check xen_allocate_irqs_dynamic(int nvec)
+static struct irq_info *xen_allocate_irq_dynamic(void)
 {
-       int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
+       int irq = irq_alloc_desc_from(0, -1);
+       struct irq_info *info = NULL;
 
        if (irq >= 0) {
-               for (i = 0; i < nvec; i++)
-                       xen_irq_init(irq + i);
+               info = xen_irq_init(irq);
+               if (!info)
+                       xen_irq_free_desc(irq);
        }
 
-       return irq;
-}
-
-static inline int __must_check xen_allocate_irq_dynamic(void)
-{
-
-       return xen_allocate_irqs_dynamic(1);
+       return info;
 }
 
-static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi)
 {
        int irq;
+       struct irq_info *info;
 
        /*
         * A PV guest has no concept of a GSI (since it has no ACPI
@@ -792,15 +769,15 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
        else
                irq = irq_alloc_desc_at(gsi, -1);
 
-       xen_irq_init(irq);
+       info = xen_irq_init(irq);
+       if (!info)
+               xen_irq_free_desc(irq);
 
-       return irq;
+       return info;
 }
 
-static void xen_free_irq(unsigned irq)
+static void xen_free_irq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        if (WARN_ON(!info))
                return;
 
@@ -821,14 +798,11 @@ static void event_handler_exit(struct irq_info *info)
        clear_evtchn(info->evtchn);
 }
 
-static void pirq_query_unmask(int irq)
+static void pirq_query_unmask(struct irq_info *info)
 {
        struct physdev_irq_status_query irq_status;
-       struct irq_info *info = info_for_irq(irq);
-
-       BUG_ON(info->type != IRQT_PIRQ);
 
-       irq_status.irq = pirq_from_irq(irq);
+       irq_status.irq = pirq_from_irq(info);
        if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
                irq_status.flags = 0;
 
@@ -837,61 +811,81 @@ static void pirq_query_unmask(int irq)
                info->u.pirq.flags |= PIRQ_NEEDS_EOI;
 }
 
-static void eoi_pirq(struct irq_data *data)
+static void do_eoi_pirq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(data->irq);
-       evtchn_port_t evtchn = info ? info->evtchn : 0;
-       struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
+       struct physdev_eoi eoi = { .irq = pirq_from_irq(info) };
        int rc = 0;
 
-       if (!VALID_EVTCHN(evtchn))
+       if (!VALID_EVTCHN(info->evtchn))
                return;
 
        event_handler_exit(info);
 
-       if (pirq_needs_eoi(data->irq)) {
+       if (pirq_needs_eoi(info)) {
                rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
                WARN_ON(rc);
        }
 }
 
+static void eoi_pirq(struct irq_data *data)
+{
+       struct irq_info *info = info_for_irq(data->irq);
+
+       do_eoi_pirq(info);
+}
+
+static void do_disable_dynirq(struct irq_info *info)
+{
+       if (VALID_EVTCHN(info->evtchn))
+               do_mask(info, EVT_MASK_REASON_EXPLICIT);
+}
+
+static void disable_dynirq(struct irq_data *data)
+{
+       struct irq_info *info = info_for_irq(data->irq);
+
+       if (info)
+               do_disable_dynirq(info);
+}
+
 static void mask_ack_pirq(struct irq_data *data)
 {
-       disable_dynirq(data);
-       eoi_pirq(data);
+       struct irq_info *info = info_for_irq(data->irq);
+
+       if (info) {
+               do_disable_dynirq(info);
+               do_eoi_pirq(info);
+       }
 }
 
-static unsigned int __startup_pirq(unsigned int irq)
+static unsigned int __startup_pirq(struct irq_info *info)
 {
        struct evtchn_bind_pirq bind_pirq;
-       struct irq_info *info = info_for_irq(irq);
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
+       evtchn_port_t evtchn = info->evtchn;
        int rc;
 
-       BUG_ON(info->type != IRQT_PIRQ);
-
        if (VALID_EVTCHN(evtchn))
                goto out;
 
-       bind_pirq.pirq = pirq_from_irq(irq);
+       bind_pirq.pirq = pirq_from_irq(info);
        /* NB. We are happy to share unless we are probing. */
        bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
                                        BIND_PIRQ__WILL_SHARE : 0;
        rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
        if (rc != 0) {
-               pr_warn("Failed to obtain physical IRQ %d\n", irq);
+               pr_warn("Failed to obtain physical IRQ %d\n", info->irq);
                return 0;
        }
        evtchn = bind_pirq.port;
 
-       pirq_query_unmask(irq);
+       pirq_query_unmask(info);
 
-       rc = set_evtchn_to_irq(evtchn, irq);
+       rc = set_evtchn_to_irq(evtchn, info->irq);
        if (rc)
                goto err;
 
        info->evtchn = evtchn;
-       bind_evtchn_to_cpu(evtchn, 0, false);
+       bind_evtchn_to_cpu(info, 0, false);
 
        rc = xen_evtchn_port_setup(evtchn);
        if (rc)
@@ -900,26 +894,28 @@ static unsigned int __startup_pirq(unsigned int irq)
 out:
        do_unmask(info, EVT_MASK_REASON_EXPLICIT);
 
-       eoi_pirq(irq_get_irq_data(irq));
+       do_eoi_pirq(info);
 
        return 0;
 
 err:
-       pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
+       pr_err("irq%d: Failed to set port to irq mapping (%d)\n", info->irq,
+              rc);
        xen_evtchn_close(evtchn);
        return 0;
 }
 
 static unsigned int startup_pirq(struct irq_data *data)
 {
-       return __startup_pirq(data->irq);
+       struct irq_info *info = info_for_irq(data->irq);
+
+       return __startup_pirq(info);
 }
 
 static void shutdown_pirq(struct irq_data *data)
 {
-       unsigned int irq = data->irq;
-       struct irq_info *info = info_for_irq(irq);
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info->evtchn;
 
        BUG_ON(info->type != IRQT_PIRQ);
 
@@ -957,10 +953,14 @@ int xen_irq_from_gsi(unsigned gsi)
 }
 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
 
-static void __unbind_from_irq(unsigned int irq)
+static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
 {
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
-       struct irq_info *info = info_for_irq(irq);
+       evtchn_port_t evtchn;
+
+       if (!info) {
+               xen_irq_free_desc(irq);
+               return;
+       }
 
        if (info->refcnt > 0) {
                info->refcnt--;
@@ -968,19 +968,22 @@ static void __unbind_from_irq(unsigned int irq)
                        return;
        }
 
+       evtchn = info->evtchn;
+
        if (VALID_EVTCHN(evtchn)) {
-               unsigned int cpu = cpu_from_irq(irq);
+               unsigned int cpu = info->cpu;
                struct xenbus_device *dev;
 
                if (!info->is_static)
                        xen_evtchn_close(evtchn);
 
-               switch (type_from_irq(irq)) {
+               switch (info->type) {
                case IRQT_VIRQ:
-                       per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+                       per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1;
                        break;
                case IRQT_IPI:
-                       per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+                       per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1;
+                       per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0;
                        break;
                case IRQT_EVTCHN:
                        dev = info->u.interdomain;
@@ -994,7 +997,7 @@ static void __unbind_from_irq(unsigned int irq)
                xen_irq_info_cleanup(info);
        }
 
-       xen_free_irq(irq);
+       xen_free_irq(info);
 }
 
 /*
@@ -1010,24 +1013,24 @@ static void __unbind_from_irq(unsigned int irq)
 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
                             unsigned pirq, int shareable, char *name)
 {
-       int irq;
+       struct irq_info *info;
        struct physdev_irq irq_op;
        int ret;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = xen_irq_from_gsi(gsi);
-       if (irq != -1) {
+       ret = xen_irq_from_gsi(gsi);
+       if (ret != -1) {
                pr_info("%s: returning irq %d for gsi %u\n",
-                       __func__, irq, gsi);
+                       __func__, ret, gsi);
                goto out;
        }
 
-       irq = xen_allocate_irq_gsi(gsi);
-       if (irq < 0)
+       info = xen_allocate_irq_gsi(gsi);
+       if (!info)
                goto out;
 
-       irq_op.irq = irq;
+       irq_op.irq = info->irq;
        irq_op.vector = 0;
 
        /* Only the privileged domain can do this. For non-priv, the pcifront
@@ -1035,20 +1038,19 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
         * this in the priv domain. */
        if (xen_initial_domain() &&
            HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
-               xen_free_irq(irq);
-               irq = -ENOSPC;
+               xen_free_irq(info);
+               ret = -ENOSPC;
                goto out;
        }
 
-       ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
+       ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF,
                               shareable ? PIRQ_SHAREABLE : 0);
        if (ret < 0) {
-               __unbind_from_irq(irq);
-               irq = ret;
+               __unbind_from_irq(info, info->irq);
                goto out;
        }
 
-       pirq_query_unmask(irq);
+       pirq_query_unmask(info);
        /* We try to use the handler with the appropriate semantic for the
         * type of interrupt: if the interrupt is an edge triggered
         * interrupt we use handle_edge_irq.
@@ -1065,16 +1067,18 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
         * is the right choice either way.
         */
        if (shareable)
-               irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
+               irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
                                handle_fasteoi_irq, name);
        else
-               irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
+               irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
                                handle_edge_irq, name);
 
+       ret = info->irq;
+
 out:
        mutex_unlock(&irq_mapping_update_lock);
 
-       return irq;
+       return ret;
 }
 
 #ifdef CONFIG_PCI_MSI
@@ -1096,17 +1100,22 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
                             int pirq, int nvec, const char *name, domid_t domid)
 {
        int i, irq, ret;
+       struct irq_info *info;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = xen_allocate_irqs_dynamic(nvec);
+       irq = irq_alloc_descs(-1, 0, nvec, -1);
        if (irq < 0)
                goto out;
 
        for (i = 0; i < nvec; i++) {
+               info = xen_irq_init(irq + i);
+               if (!info)
+                       goto error_irq;
+
                irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
 
-               ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
+               ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid,
                                              i == 0 ? 0 : PIRQ_MSI_GROUP);
                if (ret < 0)
                        goto error_irq;
@@ -1118,9 +1127,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 out:
        mutex_unlock(&irq_mapping_update_lock);
        return irq;
+
 error_irq:
-       while (nvec--)
-               __unbind_from_irq(irq + nvec);
+       while (nvec--) {
+               info = info_for_irq(irq + nvec);
+               __unbind_from_irq(info, irq + nvec);
+       }
        mutex_unlock(&irq_mapping_update_lock);
        return ret;
 }
@@ -1156,67 +1168,45 @@ int xen_destroy_irq(int irq)
                }
        }
 
-       xen_free_irq(irq);
+       xen_free_irq(info);
 
 out:
        mutex_unlock(&irq_mapping_update_lock);
        return rc;
 }
 
-int xen_irq_from_pirq(unsigned pirq)
-{
-       int irq;
-
-       struct irq_info *info;
-
-       mutex_lock(&irq_mapping_update_lock);
-
-       list_for_each_entry(info, &xen_irq_list_head, list) {
-               if (info->type != IRQT_PIRQ)
-                       continue;
-               irq = info->irq;
-               if (info->u.pirq.pirq == pirq)
-                       goto out;
-       }
-       irq = -1;
-out:
-       mutex_unlock(&irq_mapping_update_lock);
-
-       return irq;
-}
-
-
 int xen_pirq_from_irq(unsigned irq)
 {
-       return pirq_from_irq(irq);
+       struct irq_info *info = info_for_irq(irq);
+
+       return pirq_from_irq(info);
 }
 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
 
 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
                                   struct xenbus_device *dev)
 {
-       int irq;
-       int ret;
+       int ret = -ENOMEM;
+       struct irq_info *info;
 
        if (evtchn >= xen_evtchn_max_channels())
                return -ENOMEM;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = get_evtchn_to_irq(evtchn);
+       info = evtchn_to_info(evtchn);
 
-       if (irq == -1) {
-               irq = xen_allocate_irq_dynamic();
-               if (irq < 0)
+       if (!info) {
+               info = xen_allocate_irq_dynamic();
+               if (!info)
                        goto out;
 
-               irq_set_chip_and_handler_name(irq, chip,
+               irq_set_chip_and_handler_name(info->irq, chip,
                                              handle_edge_irq, "event");
 
-               ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
+               ret = xen_irq_info_evtchn_setup(info, evtchn, dev);
                if (ret < 0) {
-                       __unbind_from_irq(irq);
-                       irq = ret;
+                       __unbind_from_irq(info, info->irq);
                        goto out;
                }
                /*
@@ -1226,17 +1216,17 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
                 * affinity setting is not invoked on them so nothing would
                 * bind the channel.
                 */
-               bind_evtchn_to_cpu(evtchn, 0, false);
-       } else {
-               struct irq_info *info = info_for_irq(irq);
-               if (!WARN_ON(!info || info->type != IRQT_EVTCHN))
-                       info->refcnt++;
+               bind_evtchn_to_cpu(info, 0, false);
+       } else if (!WARN_ON(info->type != IRQT_EVTCHN)) {
+               info->refcnt++;
        }
 
+       ret = info->irq;
+
 out:
        mutex_unlock(&irq_mapping_update_lock);
 
-       return irq;
+       return ret;
 }
 
 int bind_evtchn_to_irq(evtchn_port_t evtchn)
@@ -1255,18 +1245,19 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 {
        struct evtchn_bind_ipi bind_ipi;
        evtchn_port_t evtchn;
-       int ret, irq;
+       struct irq_info *info;
+       int ret;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = per_cpu(ipi_to_irq, cpu)[ipi];
+       ret = per_cpu(ipi_to_irq, cpu)[ipi];
 
-       if (irq == -1) {
-               irq = xen_allocate_irq_dynamic();
-               if (irq < 0)
+       if (ret == -1) {
+               info = xen_allocate_irq_dynamic();
+               if (!info)
                        goto out;
 
-               irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
+               irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
                                              handle_percpu_irq, "ipi");
 
                bind_ipi.vcpu = xen_vcpu_nr(cpu);
@@ -1275,25 +1266,25 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
                        BUG();
                evtchn = bind_ipi.port;
 
-               ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+               ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
                if (ret < 0) {
-                       __unbind_from_irq(irq);
-                       irq = ret;
+                       __unbind_from_irq(info, info->irq);
                        goto out;
                }
                /*
                 * Force the affinity mask to the target CPU so proc shows
                 * the correct target.
                 */
-               bind_evtchn_to_cpu(evtchn, cpu, true);
+               bind_evtchn_to_cpu(info, cpu, true);
+               ret = info->irq;
        } else {
-               struct irq_info *info = info_for_irq(irq);
+               info = info_for_irq(ret);
                WARN_ON(info == NULL || info->type != IRQT_IPI);
        }
 
  out:
        mutex_unlock(&irq_mapping_update_lock);
-       return irq;
+       return ret;
 }
 
 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
@@ -1361,22 +1352,23 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
 {
        struct evtchn_bind_virq bind_virq;
        evtchn_port_t evtchn = 0;
-       int irq, ret;
+       struct irq_info *info;
+       int ret;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = per_cpu(virq_to_irq, cpu)[virq];
+       ret = per_cpu(virq_to_irq, cpu)[virq];
 
-       if (irq == -1) {
-               irq = xen_allocate_irq_dynamic();
-               if (irq < 0)
+       if (ret == -1) {
+               info = xen_allocate_irq_dynamic();
+               if (!info)
                        goto out;
 
                if (percpu)
-                       irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
+                       irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
                                                      handle_percpu_irq, "virq");
                else
-                       irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+                       irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip,
                                                      handle_edge_irq, "virq");
 
                bind_virq.virq = virq;
@@ -1391,10 +1383,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
                        BUG_ON(ret < 0);
                }
 
-               ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+               ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
                if (ret < 0) {
-                       __unbind_from_irq(irq);
-                       irq = ret;
+                       __unbind_from_irq(info, info->irq);
                        goto out;
                }
 
@@ -1402,22 +1393,26 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
                 * Force the affinity mask for percpu interrupts so proc
                 * shows the correct target.
                 */
-               bind_evtchn_to_cpu(evtchn, cpu, percpu);
+               bind_evtchn_to_cpu(info, cpu, percpu);
+               ret = info->irq;
        } else {
-               struct irq_info *info = info_for_irq(irq);
+               info = info_for_irq(ret);
                WARN_ON(info == NULL || info->type != IRQT_VIRQ);
        }
 
 out:
        mutex_unlock(&irq_mapping_update_lock);
 
-       return irq;
+       return ret;
 }
 
 static void unbind_from_irq(unsigned int irq)
 {
+       struct irq_info *info;
+
        mutex_lock(&irq_mapping_update_lock);
-       __unbind_from_irq(irq);
+       info = info_for_irq(irq);
+       __unbind_from_irq(info, irq);
        mutex_unlock(&irq_mapping_update_lock);
 }
 
@@ -1568,13 +1563,7 @@ EXPORT_SYMBOL_GPL(xen_set_irq_priority);
 
 int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
 {
-       int irq = get_evtchn_to_irq(evtchn);
-       struct irq_info *info;
-
-       if (irq == -1)
-               return -ENOENT;
-
-       info = info_for_irq(irq);
+       struct irq_info *info = evtchn_to_info(evtchn);
 
        if (!info)
                return -ENOENT;
@@ -1590,7 +1579,6 @@ EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
 
 int evtchn_get(evtchn_port_t evtchn)
 {
-       int irq;
        struct irq_info *info;
        int err = -ENOENT;
 
@@ -1599,11 +1587,7 @@ int evtchn_get(evtchn_port_t evtchn)
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = get_evtchn_to_irq(evtchn);
-       if (irq == -1)
-               goto done;
-
-       info = info_for_irq(irq);
+       info = evtchn_to_info(evtchn);
 
        if (!info)
                goto done;
@@ -1623,16 +1607,17 @@ EXPORT_SYMBOL_GPL(evtchn_get);
 
 void evtchn_put(evtchn_port_t evtchn)
 {
-       int irq = get_evtchn_to_irq(evtchn);
-       if (WARN_ON(irq == -1))
+       struct irq_info *info = evtchn_to_info(evtchn);
+
+       if (WARN_ON(!info))
                return;
-       unbind_from_irq(irq);
+       unbind_from_irq(info->irq);
 }
 EXPORT_SYMBOL_GPL(evtchn_put);
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 {
-       int irq;
+       evtchn_port_t evtchn;
 
 #ifdef CONFIG_X86
        if (unlikely(vector == XEN_NMI_VECTOR)) {
@@ -1643,9 +1628,9 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
                return;
        }
 #endif
-       irq = per_cpu(ipi_to_irq, cpu)[vector];
-       BUG_ON(irq < 0);
-       notify_remote_via_irq(irq);
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       BUG_ON(evtchn == 0);
+       notify_remote_via_evtchn(evtchn);
 }
 
 struct evtchn_loop_ctrl {
@@ -1656,12 +1641,10 @@ struct evtchn_loop_ctrl {
 
 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
 {
-       int irq;
-       struct irq_info *info;
+       struct irq_info *info = evtchn_to_info(port);
        struct xenbus_device *dev;
 
-       irq = get_evtchn_to_irq(port);
-       if (irq == -1)
+       if (!info)
                return;
 
        /*
@@ -1686,7 +1669,6 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
                }
        }
 
-       info = info_for_irq(irq);
        if (xchg_acquire(&info->is_active, 1))
                return;
 
@@ -1700,7 +1682,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
                info->eoi_time = get_jiffies_64() + event_eoi_delay;
        }
 
-       generic_handle_irq(irq);
+       generic_handle_irq(info->irq);
 }
 
 int xen_evtchn_do_upcall(void)
@@ -1758,16 +1740,17 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
        mutex_lock(&irq_mapping_update_lock);
 
        /* After resume the irq<->evtchn mappings are all cleared out */
-       BUG_ON(get_evtchn_to_irq(evtchn) != -1);
+       BUG_ON(evtchn_to_info(evtchn));
        /* Expect irq to have been bound before,
           so there should be a proper type */
        BUG_ON(info->type == IRQT_UNBOUND);
 
-       (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
+       info->irq = irq;
+       (void)xen_irq_info_evtchn_setup(info, evtchn, NULL);
 
        mutex_unlock(&irq_mapping_update_lock);
 
-       bind_evtchn_to_cpu(evtchn, info->cpu, false);
+       bind_evtchn_to_cpu(info, info->cpu, false);
 
        /* Unmask the event channel. */
        enable_irq(irq);
@@ -1801,7 +1784,7 @@ static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
         * it, but don't do the xenlinux-level rebind in that case.
         */
        if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
-               bind_evtchn_to_cpu(evtchn, tcpu, false);
+               bind_evtchn_to_cpu(info, tcpu, false);
 
        do_unmask(info, EVT_MASK_REASON_TEMPORARY);
 
@@ -1858,28 +1841,30 @@ static void enable_dynirq(struct irq_data *data)
                do_unmask(info, EVT_MASK_REASON_EXPLICIT);
 }
 
-static void disable_dynirq(struct irq_data *data)
+static void do_ack_dynirq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(data->irq);
-       evtchn_port_t evtchn = info ? info->evtchn : 0;
+       evtchn_port_t evtchn = info->evtchn;
 
        if (VALID_EVTCHN(evtchn))
-               do_mask(info, EVT_MASK_REASON_EXPLICIT);
+               event_handler_exit(info);
 }
 
 static void ack_dynirq(struct irq_data *data)
 {
        struct irq_info *info = info_for_irq(data->irq);
-       evtchn_port_t evtchn = info ? info->evtchn : 0;
 
-       if (VALID_EVTCHN(evtchn))
-               event_handler_exit(info);
+       if (info)
+               do_ack_dynirq(info);
 }
 
 static void mask_ack_dynirq(struct irq_data *data)
 {
-       disable_dynirq(data);
-       ack_dynirq(data);
+       struct irq_info *info = info_for_irq(data->irq);
+
+       if (info) {
+               do_disable_dynirq(info);
+               do_ack_dynirq(info);
+       }
 }
 
 static void lateeoi_ack_dynirq(struct irq_data *data)
@@ -1952,13 +1937,13 @@ static void restore_pirqs(void)
                if (rc) {
                        pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
                                gsi, irq, pirq, rc);
-                       xen_free_irq(irq);
+                       xen_free_irq(info);
                        continue;
                }
 
                printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
 
-               __startup_pirq(irq);
+               __startup_pirq(info);
        }
 }
 
@@ -1966,13 +1951,15 @@ static void restore_cpu_virqs(unsigned int cpu)
 {
        struct evtchn_bind_virq bind_virq;
        evtchn_port_t evtchn;
+       struct irq_info *info;
        int virq, irq;
 
        for (virq = 0; virq < NR_VIRQS; virq++) {
                if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
                        continue;
+               info = info_for_irq(irq);
 
-               BUG_ON(virq_from_irq(irq) != virq);
+               BUG_ON(virq_from_irq(info) != virq);
 
                /* Get a new binding from Xen. */
                bind_virq.virq = virq;
@@ -1983,9 +1970,9 @@ static void restore_cpu_virqs(unsigned int cpu)
                evtchn = bind_virq.port;
 
                /* Record the new mapping. */
-               (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+               xen_irq_info_virq_setup(info, cpu, evtchn, virq);
                /* The affinity mask is still valid */
-               bind_evtchn_to_cpu(evtchn, cpu, false);
+               bind_evtchn_to_cpu(info, cpu, false);
        }
 }
 
@@ -1993,13 +1980,15 @@ static void restore_cpu_ipis(unsigned int cpu)
 {
        struct evtchn_bind_ipi bind_ipi;
        evtchn_port_t evtchn;
+       struct irq_info *info;
        int ipi, irq;
 
        for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
                if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
                        continue;
+               info = info_for_irq(irq);
 
-               BUG_ON(ipi_from_irq(irq) != ipi);
+               BUG_ON(ipi_from_irq(info) != ipi);
 
                /* Get a new binding from Xen. */
                bind_ipi.vcpu = xen_vcpu_nr(cpu);
@@ -2009,9 +1998,9 @@ static void restore_cpu_ipis(unsigned int cpu)
                evtchn = bind_ipi.port;
 
                /* Record the new mapping. */
-               (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+               xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
                /* The affinity mask is still valid */
-               bind_evtchn_to_cpu(evtchn, cpu, false);
+               bind_evtchn_to_cpu(info, cpu, false);
        }
 }
 
@@ -2025,13 +2014,6 @@ void xen_clear_irq_pending(int irq)
                event_handler_exit(info);
 }
 EXPORT_SYMBOL(xen_clear_irq_pending);
-void xen_set_irq_pending(int irq)
-{
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
-
-       if (VALID_EVTCHN(evtchn))
-               set_evtchn(evtchn);
-}
 
 bool xen_test_irq_pending(int irq)
 {
index 4d3398eff9cdf1567a5f64dcb63561680dda405e..19ae31695edcf1a2e5cc93863c515a61c43496d9 100644 (file)
@@ -33,7 +33,6 @@ struct evtchn_ops {
 
 extern const struct evtchn_ops *evtchn_ops;
 
-int get_evtchn_to_irq(evtchn_port_t evtchn);
 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
 
 unsigned int cpu_from_evtchn(evtchn_port_t evtchn);
index b3e3d1bb37f3e388d5ed27b6fb855cb419b3d54e..50865527314538a8bedbde0f2590fbbb4afce3ce 100644 (file)
@@ -47,6 +47,9 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#ifdef CONFIG_ACPI
+#include <acpi/processor.h>
+#endif
 
 /*
  * @cpu_id: Xen physical cpu logic number
@@ -400,4 +403,23 @@ bool __init xen_processor_present(uint32_t acpi_id)
 
        return online;
 }
+
+void xen_sanitize_proc_cap_bits(uint32_t *cap)
+{
+       struct xen_platform_op op = {
+               .cmd                    = XENPF_set_processor_pminfo,
+               .u.set_pminfo.id        = -1,
+               .u.set_pminfo.type      = XEN_PM_PDC,
+       };
+       u32 buf[3] = { ACPI_PDC_REVISION_ID, 1, *cap };
+       int ret;
+
+       set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
+       ret = HYPERVISOR_platform_op(&op);
+       if (ret)
+               pr_err("sanitize of _PDC buffer bits from Xen failed: %d\n",
+                      ret);
+       else
+               *cap = buf[2];
+}
 #endif
index 1ce7f3c7a950931e0655df5f8587aee3adacd6be..0eb337a8ec0fa2a1df0bd392b9ec0381a8658758 100644 (file)
@@ -1115,7 +1115,7 @@ struct privcmd_kernel_ioreq {
        spinlock_t lock; /* Protects ioeventfds list */
        struct list_head ioeventfds;
        struct list_head list;
-       struct ioreq_port ports[0];
+       struct ioreq_port ports[] __counted_by(vcpus);
 };
 
 static irqreturn_t ioeventfd_interrupt(int irq, void *dev_id)
index 946bd56f0ac53e23a4f5215fd4412901b9be296f..0e6c6c25d154f5e2e439aa261397298fb0ac8118 100644 (file)
@@ -405,4 +405,5 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
        .get_sgtable = dma_common_get_sgtable,
        .alloc_pages = dma_common_alloc_pages,
        .free_pages = dma_common_free_pages,
+       .max_mapping_size = swiotlb_max_mapping_size,
 };
index b52e0fa595a992a9d5c0a321b9fed2b725ee3de3..223870a0111b27b8f8d6c17034780bfecf8c3e90 100644 (file)
@@ -21,7 +21,7 @@
 
 #include <xen/xen-front-pgdir-shbuf.h>
 
-/**
+/*
  * This structure represents the structure of a shared page
  * that contains grant references to the pages of the shared
  * buffer. This structure is common to many Xen para-virtualized
@@ -33,7 +33,7 @@ struct xen_page_directory {
        grant_ref_t gref[]; /* Variable length */
 };
 
-/**
+/*
  * Shared buffer ops which are differently implemented
  * depending on the allocation mode, e.g. if the buffer
  * is allocated by the corresponding backend or frontend.
@@ -61,7 +61,7 @@ struct xen_front_pgdir_shbuf_ops {
        int (*unmap)(struct xen_front_pgdir_shbuf *buf);
 };
 
-/**
+/*
  * Get granted reference to the very first page of the
  * page directory. Usually this is passed to the backend,
  * so it can find/fill the grant references to the buffer's
@@ -81,7 +81,7 @@ xen_front_pgdir_shbuf_get_dir_start(struct xen_front_pgdir_shbuf *buf)
 }
 EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_get_dir_start);
 
-/**
+/*
  * Map granted references of the shared buffer.
  *
  * Depending on the shared buffer mode of allocation
@@ -102,7 +102,7 @@ int xen_front_pgdir_shbuf_map(struct xen_front_pgdir_shbuf *buf)
 }
 EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_map);
 
-/**
+/*
  * Unmap granted references of the shared buffer.
  *
  * Depending on the shared buffer mode of allocation
@@ -123,7 +123,7 @@ int xen_front_pgdir_shbuf_unmap(struct xen_front_pgdir_shbuf *buf)
 }
 EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_unmap);
 
-/**
+/*
  * Free all the resources of the shared buffer.
  *
  * \param buf shared buffer which resources to be freed.
@@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_free);
                                 offsetof(struct xen_page_directory, \
                                          gref)) / sizeof(grant_ref_t))
 
-/**
+/*
  * Get the number of pages the page directory consumes itself.
  *
  * \param buf shared buffer.
@@ -160,7 +160,7 @@ static int get_num_pages_dir(struct xen_front_pgdir_shbuf *buf)
        return DIV_ROUND_UP(buf->num_pages, XEN_NUM_GREFS_PER_PAGE);
 }
 
-/**
+/*
  * Calculate the number of grant references needed to share the buffer
  * and its pages when backend allocates the buffer.
  *
@@ -172,7 +172,7 @@ static void backend_calc_num_grefs(struct xen_front_pgdir_shbuf *buf)
        buf->num_grefs = get_num_pages_dir(buf);
 }
 
-/**
+/*
  * Calculate the number of grant references needed to share the buffer
  * and its pages when frontend allocates the buffer.
  *
@@ -190,7 +190,7 @@ static void guest_calc_num_grefs(struct xen_front_pgdir_shbuf *buf)
 #define xen_page_to_vaddr(page) \
        ((uintptr_t)pfn_to_kaddr(page_to_xen_pfn(page)))
 
-/**
+/*
  * Unmap the buffer previously mapped with grant references
  * provided by the backend.
  *
@@ -238,7 +238,7 @@ static int backend_unmap(struct xen_front_pgdir_shbuf *buf)
        return ret;
 }
 
-/**
+/*
  * Map the buffer with grant references provided by the backend.
  *
  * \param buf shared buffer.
@@ -320,7 +320,7 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf)
        return ret;
 }
 
-/**
+/*
  * Fill page directory with grant references to the pages of the
  * page directory itself.
  *
@@ -350,7 +350,7 @@ static void backend_fill_page_dir(struct xen_front_pgdir_shbuf *buf)
        page_dir->gref_dir_next_page = XEN_GREF_LIST_END;
 }
 
-/**
+/*
  * Fill page directory with grant references to the pages of the
  * page directory and the buffer we share with the backend.
  *
@@ -389,7 +389,7 @@ static void guest_fill_page_dir(struct xen_front_pgdir_shbuf *buf)
        }
 }
 
-/**
+/*
  * Grant references to the frontend's buffer pages.
  *
  * These will be shared with the backend, so it can
@@ -418,7 +418,7 @@ static int guest_grant_refs_for_buffer(struct xen_front_pgdir_shbuf *buf,
        return 0;
 }
 
-/**
+/*
  * Grant all the references needed to share the buffer.
  *
  * Grant references to the page directory pages and, if
@@ -466,7 +466,7 @@ static int grant_references(struct xen_front_pgdir_shbuf *buf)
        return 0;
 }
 
-/**
+/*
  * Allocate all required structures to mange shared buffer.
  *
  * \param buf shared buffer.
@@ -506,7 +506,7 @@ static const struct xen_front_pgdir_shbuf_ops local_ops = {
        .grant_refs_for_buffer = guest_grant_refs_for_buffer,
 };
 
-/**
+/*
  * Allocate a new instance of a shared buffer.
  *
  * \param cfg configuration to be used while allocating a new shared buffer.
index 4d04ef2d3ae7bb8eeb00ae317bc2c659f71077f0..1fa8cf23bd3609bc35d3ba1ee2e8835fae2b55f2 100644 (file)
@@ -132,8 +132,8 @@ static int afs_probe_cell_name(struct dentry *dentry)
 
        ret = dns_query(net->net, "afsdb", name, len, "srv=1",
                        NULL, NULL, false);
-       if (ret == -ENODATA)
-               ret = -EDESTADDRREQ;
+       if (ret == -ENODATA || ret == -ENOKEY)
+               ret = -ENOENT;
        return ret;
 }
 
index c9cef3782b4ae48dee3be851aead7fb7de4fd42b..a812952be1c948d059991d1d67562284e340303a 100644 (file)
@@ -553,6 +553,7 @@ struct afs_server_entry {
 };
 
 struct afs_server_list {
+       struct rcu_head         rcu;
        afs_volid_t             vids[AFS_MAXTYPES]; /* Volume IDs */
        refcount_t              usage;
        unsigned char           nr_servers;
index ed9056703505fe073bb3d7c5fa8050b70574efd2..b59896b1de0af29986fbd4670cc95f170f177b62 100644 (file)
@@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
                for (i = 0; i < slist->nr_servers; i++)
                        afs_unuse_server(net, slist->servers[i].server,
                                         afs_server_trace_put_slist);
-               kfree(slist);
+               kfree_rcu(slist, rcu);
        }
 }
 
index 95d713074dc8130125e55da2cd4e19b693682f8f..a01a0fb2cdbb5c472def4f5352a0ba3ced962cd1 100644 (file)
@@ -407,6 +407,10 @@ static int afs_validate_fc(struct fs_context *fc)
                        return PTR_ERR(volume);
 
                ctx->volume = volume;
+               if (volume->type != AFSVL_RWVOL) {
+                       ctx->flock_mode = afs_flock_mode_local;
+                       fc->sb_flags |= SB_RDONLY;
+               }
        }
 
        return 0;
index 488e58490b16e7b9781b651b4ab70e8b4b968b2a..eb415ce563600e98124521c2d45ce5a3c47b89ab 100644 (file)
@@ -58,6 +58,12 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
                }
 
                /* Status load is ordered after lookup counter load */
+               if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
+                       pr_warn("No record of cell %s\n", cell->name);
+                       vc->error = -ENOENT;
+                       return false;
+               }
+
                if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
                        vc->error = -EDESTADDRREQ;
                        return false;
@@ -285,6 +291,7 @@ failed:
  */
 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
 {
+       struct afs_cell *cell = vc->cell;
        static int count;
        int i;
 
@@ -294,6 +301,9 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
 
        rcu_read_lock();
        pr_notice("EDESTADDR occurred\n");
+       pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
+       pr_notice("DNS: src=%u st=%u lc=%x\n",
+                 cell->dns_source, cell->dns_status, cell->dns_lookup_count);
        pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
                  vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
 
index a5083d447a62f140492a638b406205a5eb3b8052..1f5db686366316d22236221a8d2d11f559019d9b 100644 (file)
@@ -309,9 +309,7 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc)
        struct autofs_fs_context *ctx = fc->fs_private;
        struct autofs_sb_info *sbi = s->s_fs_info;
        struct inode *root_inode;
-       struct dentry *root;
        struct autofs_info *ino;
-       int ret = -ENOMEM;
 
        pr_debug("starting up, sbi = %p\n", sbi);
 
@@ -328,56 +326,44 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc)
         */
        ino = autofs_new_ino(sbi);
        if (!ino)
-               goto fail;
+               return -ENOMEM;
 
        root_inode = autofs_get_inode(s, S_IFDIR | 0755);
+       if (!root_inode)
+               return -ENOMEM;
+
        root_inode->i_uid = ctx->uid;
        root_inode->i_gid = ctx->gid;
+       root_inode->i_fop = &autofs_root_operations;
+       root_inode->i_op = &autofs_dir_inode_operations;
 
-       root = d_make_root(root_inode);
-       if (!root)
-               goto fail_ino;
-
-       root->d_fsdata = ino;
+       s->s_root = d_make_root(root_inode);
+       if (unlikely(!s->s_root)) {
+               autofs_free_ino(ino);
+               return -ENOMEM;
+       }
+       s->s_root->d_fsdata = ino;
 
        if (ctx->pgrp_set) {
                sbi->oz_pgrp = find_get_pid(ctx->pgrp);
-               if (!sbi->oz_pgrp) {
-                       ret = invalf(fc, "Could not find process group %d",
-                                    ctx->pgrp);
-                       goto fail_dput;
-               }
-       } else {
+               if (!sbi->oz_pgrp)
+                       return invalf(fc, "Could not find process group %d",
+                                     ctx->pgrp);
+       } else
                sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID);
-       }
 
        if (autofs_type_trigger(sbi->type))
-               __managed_dentry_set_managed(root);
-
-       root_inode->i_fop = &autofs_root_operations;
-       root_inode->i_op = &autofs_dir_inode_operations;
+               /* s->s_root won't be contended so there's little to
+                * be gained by not taking the d_lock when setting
+                * d_flags, even when a lot mounts are being done.
+                */
+               managed_dentry_set_managed(s->s_root);
 
        pr_debug("pipe fd = %d, pgrp = %u\n",
                 sbi->pipefd, pid_nr(sbi->oz_pgrp));
 
        sbi->flags &= ~AUTOFS_SBI_CATATONIC;
-
-       /*
-        * Success! Install the root dentry now to indicate completion.
-        */
-       s->s_root = root;
        return 0;
-
-       /*
-        * Failure ... clean up.
-        */
-fail_dput:
-       dput(root);
-       goto fail;
-fail_ino:
-       autofs_free_ino(ino);
-fail:
-       return ret;
 }
 
 /*
index c08c2c7d6fbbab79e34b0321b7bdc300fd575dfb..fddc7be580223a54357deb7647b9fa41748679e3 100644 (file)
@@ -33,6 +33,18 @@ config BCACHEFS_QUOTA
        depends on BCACHEFS_FS
        select QUOTACTL
 
+config BCACHEFS_ERASURE_CODING
+       bool "bcachefs erasure coding (RAID5/6) support (EXPERIMENTAL)"
+       depends on BCACHEFS_FS
+       select QUOTACTL
+       help
+       This enables the "erasure_code" filesysystem and inode option, which
+       organizes data into reed-solomon stripes instead of ordinary
+       replication.
+
+       WARNING: this feature is still undergoing on disk format changes, and
+       should only be enabled for testing purposes.
+
 config BCACHEFS_POSIX_ACL
        bool "bcachefs POSIX ACL support"
        depends on BCACHEFS_FS
index b85c7765272f6e4ae5e8aceb5a4bbaa89c535912..1ba0eeb7552a2795d033d2603cccce611c8f3381 100644 (file)
@@ -1297,6 +1297,30 @@ out:
        return wp;
 }
 
+static noinline void
+deallocate_extra_replicas(struct bch_fs *c,
+                         struct open_buckets *ptrs,
+                         struct open_buckets *ptrs_no_use,
+                         unsigned extra_replicas)
+{
+       struct open_buckets ptrs2 = { 0 };
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, ptrs, ob, i) {
+               unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability;
+
+               if (d && d <= extra_replicas) {
+                       extra_replicas -= d;
+                       ob_push(c, ptrs_no_use, ob);
+               } else {
+                       ob_push(c, &ptrs2, ob);
+               }
+       }
+
+       *ptrs = ptrs2;
+}
+
 /*
  * Get us an open_bucket we can allocate from, return with it locked:
  */
@@ -1321,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
        int ret;
        int i;
 
+       if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
+               erasure_code = false;
+
        BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
 
        BUG_ON(!nr_replicas || !nr_replicas_required);
@@ -1382,6 +1409,9 @@ alloc_done:
        if (ret)
                goto err;
 
+       if (nr_effective > nr_replicas)
+               deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas);
+
        /* Free buckets we didn't use: */
        open_bucket_for_each(c, &wp->ptrs, ob, i)
                open_bucket_free_unused(c, ob);
index ef02c9bb0354173eddc4b86d6c998965ba747678..23c0834a97a4acaf490d13d7de32b00daf0bb399 100644 (file)
@@ -313,17 +313,17 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
                                  bp.level - 1,
                                  0);
        b = bch2_btree_iter_peek_node(iter);
-       if (IS_ERR(b))
+       if (IS_ERR_OR_NULL(b))
                goto err;
 
        BUG_ON(b->c.level != bp.level - 1);
 
-       if (b && extent_matches_bp(c, bp.btree_id, bp.level,
-                                  bkey_i_to_s_c(&b->key),
-                                  bucket, bp))
+       if (extent_matches_bp(c, bp.btree_id, bp.level,
+                             bkey_i_to_s_c(&b->key),
+                             bucket, bp))
                return b;
 
-       if (b && btree_node_will_make_reachable(b)) {
+       if (btree_node_will_make_reachable(b)) {
                b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
        } else {
                backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
index 9cb8684959ee17affdc2a558c14414477fef922d..dfa22f9d9a1d34aad88d9badc5decdba3904d8eb 100644 (file)
@@ -617,7 +617,7 @@ struct journal_seq_blacklist_table {
                u64             start;
                u64             end;
                bool            dirty;
-       }                       entries[0];
+       }                       entries[];
 };
 
 struct journal_keys {
@@ -638,6 +638,8 @@ struct journal_keys {
        size_t                  gap;
        size_t                  nr;
        size_t                  size;
+       atomic_t                ref;
+       bool                    initial_ref_held;
 };
 
 struct btree_trans_buf {
@@ -929,7 +931,7 @@ struct bch_fs {
        mempool_t               compression_bounce[2];
        mempool_t               compress_workspace[BCH_COMPRESSION_TYPE_NR];
        mempool_t               decompress_workspace;
-       ZSTD_parameters         zstd_params;
+       size_t                  zstd_workspace_size;
 
        struct crypto_shash     *sha256;
        struct crypto_sync_skcipher *chacha20;
index 0a750953ff921b9d62d9fd1918da27d375c2c6dc..1ab1f08d763b02d03a28f9bbc7abc1dc6994b525 100644 (file)
@@ -151,7 +151,11 @@ struct bpos {
 #else
 #error edit for your odd byteorder.
 #endif
-} __packed __aligned(4);
+} __packed
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+__aligned(4)
+#endif
+;
 
 #define KEY_INODE_MAX                  ((__u64)~0ULL)
 #define KEY_OFFSET_MAX                 ((__u64)~0ULL)
@@ -1528,7 +1532,7 @@ struct bch_sb_field_disk_groups {
        x(move_extent_write,                            36)     \
        x(move_extent_finish,                           37)     \
        x(move_extent_fail,                             38)     \
-       x(move_extent_alloc_mem_fail,                   39)     \
+       x(move_extent_start_fail,                       39)     \
        x(copygc,                                       40)     \
        x(copygc_wait,                                  41)     \
        x(gc_gens_end,                                  42)     \
index 0b5d09c8475d00bf35ee70c52cd9ae9483a56823..30ab78a24517404b35d3e41d8a86482f7e719096 100644 (file)
@@ -1541,8 +1541,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
                rcu_assign_pointer(ca->buckets_gc, buckets);
        }
 
-       for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
+       ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+                                 BTREE_ITER_PREFETCH, k, ({
                ca = bch_dev_bkey_exists(c, k.k->p.inode);
                g = gc_bucket(ca, k.k->p.offset);
 
@@ -1561,8 +1561,9 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
                        g->stripe               = a->stripe;
                        g->stripe_redundancy    = a->stripe_redundancy;
                }
-       }
-       bch2_trans_iter_exit(trans, &iter);
+
+               0;
+       }));
 err:
        bch2_trans_put(trans);
        if (ret)
index 37d896edb06e0475cc7146e31a2790321f842394..57c20390e10e3fe05394415d8ccabb43201c871b 100644 (file)
@@ -1358,10 +1358,9 @@ static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *
        return offset;
 }
 
-static void btree_node_read_all_replicas_done(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
 {
-       struct btree_node_read_all *ra =
-               container_of(cl, struct btree_node_read_all, cl);
+       closure_type(ra, struct btree_node_read_all, cl);
        struct bch_fs *c = ra->c;
        struct btree *b = ra->b;
        struct printbuf buf = PRINTBUF;
@@ -1567,7 +1566,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
 
        if (sync) {
                closure_sync(&ra->cl);
-               btree_node_read_all_replicas_done(&ra->cl);
+               btree_node_read_all_replicas_done(&ra->cl.work);
        } else {
                continue_at(&ra->cl, btree_node_read_all_replicas_done,
                            c->io_complete_wq);
index c2adf3fbb0b3abec5a3521d49663ec77ed32916f..8e0fe65f6101bb237bf4aea173e8deaf8496550b 100644 (file)
@@ -2981,7 +2981,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
        trans->fn_idx           = fn_idx;
        trans->locking_wait.task = current;
        trans->journal_replay_not_finished =
-               !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
+               unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) &&
+               atomic_inc_not_zero(&c->journal_keys.ref);
        closure_init_stack(&trans->ref);
 
        s = btree_trans_stats(trans);
@@ -3087,8 +3088,6 @@ void bch2_trans_put(struct btree_trans *trans)
                srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
        }
 
-       bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
        kfree(trans->extra_journal_entries.data);
 
        if (trans->fs_usage_deltas) {
@@ -3100,6 +3099,9 @@ void bch2_trans_put(struct btree_trans *trans)
                        kfree(trans->fs_usage_deltas);
        }
 
+       if (unlikely(trans->journal_replay_not_finished))
+               bch2_journal_keys_put(c);
+
        if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
                mempool_free(trans->mem, &c->btree_trans_mem_pool);
        else
index 58a981bcf3aa8ca6749c41ca9e22655098adee14..ec52f50d249d075f4fae6ad60976c330ba7e46f0 100644 (file)
@@ -80,6 +80,8 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree
        struct journal_keys *keys = &c->journal_keys;
        unsigned iters = 0;
        struct journal_key *k;
+
+       BUG_ON(*idx > keys->nr);
 search:
        if (!*idx)
                *idx = __bch2_journal_key_search(keys, btree_id, level, pos);
@@ -189,10 +191,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
                /* Since @keys was full, there was no gap: */
                memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
                kvfree(keys->d);
-               *keys = new_keys;
+               keys->d         = new_keys.d;
+               keys->nr        = new_keys.nr;
+               keys->size      = new_keys.size;
 
                /* And now the gap is at the end: */
-               keys->gap = keys->nr;
+               keys->gap       = keys->nr;
        }
 
        journal_iters_move_gap(c, keys->gap, idx);
@@ -415,10 +419,16 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
                cmp_int(l->journal_offset, r->journal_offset);
 }
 
-void bch2_journal_keys_free(struct journal_keys *keys)
+void bch2_journal_keys_put(struct bch_fs *c)
 {
+       struct journal_keys *keys = &c->journal_keys;
        struct journal_key *i;
 
+       BUG_ON(atomic_read(&keys->ref) <= 0);
+
+       if (!atomic_dec_and_test(&keys->ref))
+               return;
+
        move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
        keys->gap = keys->nr;
 
@@ -429,6 +439,8 @@ void bch2_journal_keys_free(struct journal_keys *keys)
        kvfree(keys->d);
        keys->d = NULL;
        keys->nr = keys->gap = keys->size = 0;
+
+       bch2_journal_entries_free(c);
 }
 
 static void __journal_keys_sort(struct journal_keys *keys)
index 5d64e7e22f262df66076bddd982d1b6cc8b4c85a..8ca4c100b2e3e413d7adbb8dd5599d9f42de6d30 100644 (file)
@@ -49,7 +49,15 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
                                                struct bch_fs *,
                                                struct btree *);
 
-void bch2_journal_keys_free(struct journal_keys *);
+void bch2_journal_keys_put(struct bch_fs *);
+
+static inline void bch2_journal_keys_put_initial(struct bch_fs *c)
+{
+       if (c->journal_keys.initial_ref_held)
+               bch2_journal_keys_put(c);
+       c->journal_keys.initial_ref_held = false;
+}
+
 void bch2_journal_entries_free(struct bch_fs *);
 
 int bch2_journal_keys_sort(struct bch_fs *);
index 9b78f78a75b59c0cb28dac46fc4e107f0a9cbca1..37fbf22de8fcba305d717f41e4ae8a9461502d53 100644 (file)
@@ -89,10 +89,13 @@ static void bkey_cached_free(struct btree_key_cache *bc,
        ck->btree_trans_barrier_seq =
                start_poll_synchronize_srcu(&c->btree_trans_barrier);
 
-       if (ck->c.lock.readers)
+       if (ck->c.lock.readers) {
                list_move_tail(&ck->list, &bc->freed_pcpu);
-       else
+               bc->nr_freed_pcpu++;
+       } else {
                list_move_tail(&ck->list, &bc->freed_nonpcpu);
+               bc->nr_freed_nonpcpu++;
+       }
        atomic_long_inc(&bc->nr_freed);
 
        kfree(ck->k);
@@ -109,6 +112,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
 {
        struct bkey_cached *pos;
 
+       bc->nr_freed_nonpcpu++;
+
        list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
                if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
                                 pos->btree_trans_barrier_seq)) {
@@ -158,6 +163,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
 #else
                mutex_lock(&bc->lock);
                list_move_tail(&ck->list, &bc->freed_nonpcpu);
+               bc->nr_freed_nonpcpu++;
                mutex_unlock(&bc->lock);
 #endif
        } else {
@@ -217,6 +223,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                               f->nr < ARRAY_SIZE(f->objs) / 2) {
                                ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
                                list_del_init(&ck->list);
+                               bc->nr_freed_nonpcpu--;
                                f->objs[f->nr++] = ck;
                        }
 
@@ -229,6 +236,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                if (!list_empty(&bc->freed_nonpcpu)) {
                        ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
                        list_del_init(&ck->list);
+                       bc->nr_freed_nonpcpu--;
                }
                mutex_unlock(&bc->lock);
 #endif
@@ -664,7 +672,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                goto out;
 
        bch2_journal_pin_drop(j, &ck->journal);
-       bch2_journal_preres_put(j, &ck->res);
 
        BUG_ON(!btree_node_locked(c_iter.path, 0));
 
@@ -762,18 +769,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 
        BUG_ON(insert->k.u64s > ck->u64s);
 
-       if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-               int difference;
-
-               BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
-
-               difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
-               if (difference > 0) {
-                       trans->journal_preres.u64s      -= difference;
-                       ck->res.u64s                    += difference;
-               }
-       }
-
        bkey_copy(ck->k, insert);
        ck->valid = true;
 
@@ -850,6 +845,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
         * Newest freed entries are at the end of the list - once we hit one
         * that's too new to be freed, we can bail out:
         */
+       scanned += bc->nr_freed_nonpcpu;
+
        list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -859,13 +856,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                six_lock_exit(&ck->c.lock);
                kmem_cache_free(bch2_key_cache, ck);
                atomic_long_dec(&bc->nr_freed);
-               scanned++;
                freed++;
+               bc->nr_freed_nonpcpu--;
        }
 
        if (scanned >= nr)
                goto out;
 
+       scanned += bc->nr_freed_pcpu;
+
        list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -875,8 +874,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                six_lock_exit(&ck->c.lock);
                kmem_cache_free(bch2_key_cache, ck);
                atomic_long_dec(&bc->nr_freed);
-               scanned++;
                freed++;
+               bc->nr_freed_pcpu--;
        }
 
        if (scanned >= nr)
@@ -982,6 +981,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
        }
 #endif
 
+       BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+       BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
        list_splice(&bc->freed_pcpu,    &items);
        list_splice(&bc->freed_nonpcpu, &items);
 
@@ -991,7 +993,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
                cond_resched();
 
                bch2_journal_pin_drop(&c->journal, &ck->journal);
-               bch2_journal_preres_put(&c->journal, &ck->res);
 
                list_del(&ck->list);
                kfree(ck->k);
diff --git a/fs/bcachefs/btree_key_cache_types.h b/fs/bcachefs/btree_key_cache_types.h
new file mode 100644 (file)
index 0000000..290e4e5
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+
+struct btree_key_cache_freelist {
+       struct bkey_cached      *objs[16];
+       unsigned                nr;
+};
+
+struct btree_key_cache {
+       struct mutex            lock;
+       struct rhashtable       table;
+       bool                    table_init_done;
+
+       struct list_head        freed_pcpu;
+       size_t                  nr_freed_pcpu;
+       struct list_head        freed_nonpcpu;
+       size_t                  nr_freed_nonpcpu;
+
+       struct shrinker         *shrink;
+       unsigned                shrink_iter;
+       struct btree_key_cache_freelist __percpu *pcpu_freed;
+
+       atomic_long_t           nr_freed;
+       atomic_long_t           nr_keys;
+       atomic_long_t           nr_dirty;
+};
+
+struct bkey_cached_key {
+       u32                     btree_id;
+       struct bpos             pos;
+} __packed __aligned(4);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */
index decad7b66c59c114a9315d7acd1ec3bf755a4230..12907beda98c2b9d259e7896b79867adbbb9a88e 100644 (file)
@@ -78,6 +78,53 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
                bch2_btree_init_next(trans, b);
 }
 
+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+       while (--i >= trans->updates) {
+               if (same_leaf_as_prev(trans, i))
+                       continue;
+
+               bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
+       }
+
+       trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
+       return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
+}
+
+static inline int bch2_trans_lock_write(struct btree_trans *trans)
+{
+       struct btree_insert_entry *i;
+
+       EBUG_ON(trans->write_locked);
+
+       trans_for_each_update(trans, i) {
+               if (same_leaf_as_prev(trans, i))
+                       continue;
+
+               if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
+                       return trans_lock_write_fail(trans, i);
+
+               if (!i->cached)
+                       bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
+       }
+
+       trans->write_locked = true;
+       return 0;
+}
+
+static inline void bch2_trans_unlock_write(struct btree_trans *trans)
+{
+       if (likely(trans->write_locked)) {
+               struct btree_insert_entry *i;
+
+               trans_for_each_update(trans, i)
+                       if (!same_leaf_as_prev(trans, i))
+                               bch2_btree_node_unlock_write_inlined(trans, i->path,
+                                                                    insert_l(i)->b);
+               trans->write_locked = false;
+       }
+}
+
 /* Inserting into a given leaf node (last stage of insert): */
 
 /* Handle overwrites and do insert, for non extents: */
@@ -276,17 +323,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
                bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
 }
 
-static noinline int
-bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
-                                  unsigned long trace_ip)
-{
-       return drop_locks_do(trans,
-               bch2_journal_preres_get(&trans->c->journal,
-                       &trans->journal_preres,
-                       trans->journal_preres_u64s,
-                       (flags & BCH_WATERMARK_MASK)));
-}
-
 static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
                                                      unsigned flags)
 {
@@ -321,6 +357,45 @@ static inline int btree_key_can_insert(struct btree_trans *trans,
        return 0;
 }
 
+noinline static int
+btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
+                                    struct btree_path *path, unsigned new_u64s)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_insert_entry *i;
+       struct bkey_cached *ck = (void *) path->l[0].b;
+       struct bkey_i *new_k;
+       int ret;
+
+       bch2_trans_unlock_write(trans);
+       bch2_trans_unlock(trans);
+
+       new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
+       if (!new_k) {
+               bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
+                       bch2_btree_id_str(path->btree_id), new_u64s);
+               return -BCH_ERR_ENOMEM_btree_key_cache_insert;
+       }
+
+       ret =   bch2_trans_relock(trans) ?:
+               bch2_trans_lock_write(trans);
+       if (unlikely(ret)) {
+               kfree(new_k);
+               return ret;
+       }
+
+       memcpy(new_k, ck->k, ck->u64s * sizeof(u64));
+
+       trans_for_each_update(trans, i)
+               if (i->old_v == &ck->k->v)
+                       i->old_v = &new_k->v;
+
+       kfree(ck->k);
+       ck->u64s        = new_u64s;
+       ck->k           = new_k;
+       return 0;
+}
+
 static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
                                       struct btree_path *path, unsigned u64s)
 {
@@ -347,12 +422,9 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
                return 0;
 
        new_u64s        = roundup_pow_of_two(u64s);
-       new_k           = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
-       if (!new_k) {
-               bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
-                       bch2_btree_id_str(path->btree_id), new_u64s);
-               return -BCH_ERR_ENOMEM_btree_key_cache_insert;
-       }
+       new_k           = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
+       if (unlikely(!new_k))
+               return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
 
        trans_for_each_update(trans, i)
                if (i->old_v == &ck->k->v)
@@ -732,37 +804,6 @@ revert_fs_usage:
        return ret;
 }
 
-static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
-{
-       while (--i >= trans->updates) {
-               if (same_leaf_as_prev(trans, i))
-                       continue;
-
-               bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
-       }
-
-       trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
-       return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
-}
-
-static inline int trans_lock_write(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
-
-       trans_for_each_update(trans, i) {
-               if (same_leaf_as_prev(trans, i))
-                       continue;
-
-               if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
-                       return trans_lock_write_fail(trans, i);
-
-               if (!i->cached)
-                       bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
-       }
-
-       return 0;
-}
-
 static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
 {
        struct btree_insert_entry *i;
@@ -830,15 +871,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
                }
        }
 
-       ret = bch2_journal_preres_get(&c->journal,
-                       &trans->journal_preres, trans->journal_preres_u64s,
-                       (flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
-       if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
-               ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
-       if (unlikely(ret))
-               return ret;
-
-       ret = trans_lock_write(trans);
+       ret = bch2_trans_lock_write(trans);
        if (unlikely(ret))
                return ret;
 
@@ -847,10 +880,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
        if (!ret && unlikely(trans->journal_replay_not_finished))
                bch2_drop_overwrites_from_journal(trans);
 
-       trans_for_each_update(trans, i)
-               if (!same_leaf_as_prev(trans, i))
-                       bch2_btree_node_unlock_write_inlined(trans, i->path,
-                                                       insert_l(i)->b);
+       bch2_trans_unlock_write(trans);
 
        if (!ret && trans->journal_pin)
                bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
@@ -1003,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i = NULL;
        struct btree_write_buffered_key *wb;
-       unsigned u64s;
        int ret = 0;
 
        if (!trans->nr_updates &&
@@ -1063,13 +1092,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
 
        EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
 
-       memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-
        trans->journal_u64s             = trans->extra_journal_entries.nr;
-       trans->journal_preres_u64s      = 0;
-
        trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
-
        if (trans->journal_transaction_names)
                trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
 
@@ -1085,16 +1109,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
                if (i->key_cache_already_flushed)
                        continue;
 
-               /* we're going to journal the key being updated: */
-               u64s = jset_u64s(i->k->k.u64s);
-               if (i->cached &&
-                   likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
-                       trans->journal_preres_u64s += u64s;
-
                if (i->flags & BTREE_UPDATE_NOJOURNAL)
                        continue;
 
-               trans->journal_u64s += u64s;
+               /* we're going to journal the key being updated: */
+               trans->journal_u64s += jset_u64s(i->k->k.u64s);
 
                /* and we're also going to log the overwrite: */
                if (trans->journal_transaction_names)
@@ -1126,8 +1145,6 @@ retry:
 
        trace_and_count(c, transaction_commit, trans, _RET_IP_);
 out:
-       bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
        if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
                bch2_write_ref_put(c, BCH_WRITE_REF_trans);
 out_reset:
index 941841a0c5bf68c56370cf2fadf6669351d582d8..60453ba86c4b963777f67693352d4929ac726549 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 
-//#include "bkey_methods.h"
+#include "btree_key_cache_types.h"
 #include "buckets_types.h"
 #include "darray.h"
 #include "errcode.h"
@@ -312,31 +312,6 @@ struct btree_iter {
 #endif
 };
 
-struct btree_key_cache_freelist {
-       struct bkey_cached      *objs[16];
-       unsigned                nr;
-};
-
-struct btree_key_cache {
-       struct mutex            lock;
-       struct rhashtable       table;
-       bool                    table_init_done;
-       struct list_head        freed_pcpu;
-       struct list_head        freed_nonpcpu;
-       struct shrinker         *shrink;
-       unsigned                shrink_iter;
-       struct btree_key_cache_freelist __percpu *pcpu_freed;
-
-       atomic_long_t           nr_freed;
-       atomic_long_t           nr_keys;
-       atomic_long_t           nr_dirty;
-};
-
-struct bkey_cached_key {
-       u32                     btree_id;
-       struct bpos             pos;
-} __packed __aligned(4);
-
 #define BKEY_CACHED_ACCESSED           0
 #define BKEY_CACHED_DIRTY              1
 
@@ -352,7 +327,6 @@ struct bkey_cached {
        struct rhash_head       hash;
        struct list_head        list;
 
-       struct journal_preres   res;
        struct journal_entry_pin journal;
        u64                     seq;
 
@@ -389,11 +363,7 @@ struct btree_insert_entry {
        unsigned long           ip_allocated;
 };
 
-#ifndef CONFIG_LOCKDEP
 #define BTREE_ITER_MAX         64
-#else
-#define BTREE_ITER_MAX         32
-#endif
 
 struct btree_trans_commit_hook;
 typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@@ -434,6 +404,7 @@ struct btree_trans {
        bool                    journal_transaction_names:1;
        bool                    journal_replay_not_finished:1;
        bool                    notrace_relock_fail:1;
+       bool                    write_locked:1;
        enum bch_errcode        restarted:16;
        u32                     restart_count;
        unsigned long           last_begin_ip;
@@ -465,11 +436,9 @@ struct btree_trans {
        struct journal_entry_pin *journal_pin;
 
        struct journal_res      journal_res;
-       struct journal_preres   journal_preres;
        u64                     *journal_seq;
        struct disk_reservation *disk_res;
        unsigned                journal_u64s;
-       unsigned                journal_preres_u64s;
        struct replicas_delta_list *fs_usage_deltas;
 };
 
index 39c2db68123bd1e7958cb69540721a1548d92516..6697417273aa14e7c5de09fbc1622c84eff24949 100644 (file)
@@ -513,8 +513,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
                up_read(&c->gc_lock);
        as->took_gc_lock = false;
 
-       bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
        bch2_journal_pin_drop(&c->journal, &as->journal);
        bch2_journal_pin_flush(&c->journal, &as->journal);
        bch2_disk_reservation_put(c, &as->disk_res);
@@ -734,8 +732,6 @@ err:
 
        bch2_journal_pin_drop(&c->journal, &as->journal);
 
-       bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
        mutex_lock(&c->btree_interior_update_lock);
        for (i = 0; i < as->nr_new_nodes; i++) {
                b = as->new_nodes[i];
@@ -782,9 +778,9 @@ static void btree_interior_update_work(struct work_struct *work)
        }
 }
 
-static void btree_update_set_nodes_written(struct closure *cl)
+static CLOSURE_CALLBACK(btree_update_set_nodes_written)
 {
-       struct btree_update *as = container_of(cl, struct btree_update, cl);
+       closure_type(as, struct btree_update, cl);
        struct bch_fs *c = as->c;
 
        mutex_lock(&c->btree_interior_update_lock);
@@ -1047,7 +1043,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        unsigned nr_nodes[2] = { 0, 0 };
        unsigned update_level = level;
        enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-       unsigned journal_flags = 0;
        int ret = 0;
        u32 restart_count = trans->restart_count;
 
@@ -1061,10 +1056,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        flags &= ~BCH_WATERMARK_MASK;
        flags |= watermark;
 
-       if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
-               journal_flags |= JOURNAL_RES_GET_NONBLOCK;
-       journal_flags |= watermark;
-
        while (1) {
                nr_nodes[!!update_level] += 1 + split;
                update_level++;
@@ -1080,8 +1071,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                        break;
                }
 
+               /*
+                * Always check for space for two keys, even if we won't have to
+                * split at prior level - it might have been a merge instead:
+                */
                if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
-                                       BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
+                                               BKEY_BTREE_PTR_U64s_MAX * 2))
                        break;
 
                split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
@@ -1129,27 +1124,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        if (ret)
                goto err;
 
-       ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-                                     BTREE_UPDATE_JOURNAL_RES,
-                                     journal_flags|JOURNAL_RES_GET_NONBLOCK);
-       if (ret) {
-               if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
-                       ret = -BCH_ERR_journal_reclaim_would_deadlock;
-                       goto err;
-               }
-
-               ret = drop_locks_do(trans,
-                       bch2_journal_preres_get(&c->journal, &as->journal_preres,
-                                             BTREE_UPDATE_JOURNAL_RES,
-                                             journal_flags));
-               if (ret == -BCH_ERR_journal_preres_get_blocked) {
-                       trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
-                       ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
-               }
-               if (ret)
-                       goto err;
-       }
-
        ret = bch2_disk_reservation_get(c, &as->disk_res,
                        (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
                        c->opts.metadata_replicas,
@@ -2296,6 +2270,10 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
 
        BUG_ON(!btree_node_hashed(b));
 
+       struct bch_extent_ptr *ptr;
+       bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr,
+                           !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev));
+
        ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
                                         commit_flags, skip_triggers);
 out:
index 4df21512d640dac83c8948137dfa4fd077b2ef39..031076e75fa1322a82a202e150a8eca9a75c063e 100644 (file)
@@ -55,7 +55,6 @@ struct btree_update {
        unsigned                        update_level;
 
        struct disk_reservation         disk_res;
-       struct journal_preres           journal_preres;
 
        /*
         * BTREE_INTERIOR_UPDATING_NODE:
index 58d8c6ffd955429d9f13207ddf04c1f687a68b2e..5a91d3189fcf7ea95615d46dd11a9ad83f8e8363 100644 (file)
@@ -854,8 +854,12 @@ static int __mark_pointer(struct btree_trans *trans,
                return ret;
 
        *dst_sectors += sectors;
-       *bucket_data_type = *dirty_sectors || *cached_sectors
-               ? ptr_data_type : 0;
+
+       if (!*dirty_sectors && !*cached_sectors)
+               *bucket_data_type = 0;
+       else if (*bucket_data_type != BCH_DATA_stripe)
+               *bucket_data_type = ptr_data_type;
+
        return 0;
 }
 
@@ -2091,8 +2095,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
        bucket_gens->first_bucket = ca->mi.first_bucket;
        bucket_gens->nbuckets   = nbuckets;
 
-       bch2_copygc_stop(c);
-
        if (resize) {
                down_write(&c->gc_lock);
                down_write(&ca->bucket_lock);
index a8b148ec2a2b6b8ed1f33d10ad195b72afa112e0..51af8ea230edbf997756e51ac37cfe3cfc158341 100644 (file)
@@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c,
                 */
                unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
                ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
-               ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
-                       zstd_cctx_workspace_bound(&params.cParams));
+               ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
 
                /*
                 * ZSTD requires that when we decompress we pass in the exact
@@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c,
                size_t len = zstd_compress_cctx(ctx,
                                dst + 4,        dst_len - 4 - 7,
                                src,            src_len,
-                               &c->zstd_params);
+                               &params);
                if (zstd_is_error(len))
                        return 0;
 
@@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
        size_t decompress_workspace_size = 0;
        ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
                                                 c->opts.encoded_extent_max);
+
+       /*
+        * ZSTD is lying: if we allocate the size of the workspace it says it
+        * requires, it returns memory allocation errors
+        */
+       c->zstd_workspace_size = zstd_cctx_workspace_bound(&params.cParams);
+
        struct {
                unsigned                        feature;
                enum bch_compression_type       type;
@@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
                        zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
                        zlib_inflate_workspacesize(), },
                { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
-                       zstd_cctx_workspace_bound(&params.cParams),
+                       c->zstd_workspace_size,
                        zstd_dctx_workspace_bound() },
        }, *i;
        bool have_compressed = false;
 
-       c->zstd_params = params;
-
        for (i = compression_types;
             i < compression_types + ARRAY_SIZE(compression_types);
             i++)
index 0771a6d880bf5e2e4efcbcc21d91d34b64160dd4..71aa5e59787b8bc6dca216add572553363a44492 100644 (file)
@@ -239,6 +239,34 @@ restart_drop_extra_replicas:
 
                next_pos = insert->k.p;
 
+               /*
+                * Check for nonce offset inconsistency:
+                * This is debug code - we've been seeing this bug rarely, and
+                * it's been hard to reproduce, so this should give us some more
+                * information when it does occur:
+                */
+               struct printbuf err = PRINTBUF;
+               int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
+               printbuf_exit(&err);
+
+               if (invalid) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "about to insert invalid key in data update path");
+                       prt_str(&buf, "\nold: ");
+                       bch2_bkey_val_to_text(&buf, c, old);
+                       prt_str(&buf, "\nk:   ");
+                       bch2_bkey_val_to_text(&buf, c, k);
+                       prt_str(&buf, "\nnew: ");
+                       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+                       bch2_print_string_as_lines(KERN_ERR, buf.buf);
+                       printbuf_exit(&buf);
+
+                       bch2_fatal_error(c);
+                       goto out;
+               }
+
                ret =   bch2_insert_snapshot_whiteouts(trans, m->btree_id,
                                                k.k->p, bkey_start_pos(&insert->k)) ?:
                        bch2_insert_snapshot_whiteouts(trans, m->btree_id,
@@ -328,7 +356,7 @@ void bch2_data_update_exit(struct data_update *update)
        bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
 }
 
-void bch2_update_unwritten_extent(struct btree_trans *trans,
+static void bch2_update_unwritten_extent(struct btree_trans *trans,
                                  struct data_update *update)
 {
        struct bch_fs *c = update->op.c;
@@ -408,7 +436,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
        }
 }
 
+int bch2_extent_drop_ptrs(struct btree_trans *trans,
+                         struct btree_iter *iter,
+                         struct bkey_s_c k,
+                         struct data_update_opts data_opts)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_i *n;
+       int ret;
+
+       n = bch2_bkey_make_mut_noupdate(trans, k);
+       ret = PTR_ERR_OR_ZERO(n);
+       if (ret)
+               return ret;
+
+       while (data_opts.kill_ptrs) {
+               unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+               struct bch_extent_ptr *ptr;
+
+               bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
+               data_opts.kill_ptrs ^= 1U << drop;
+       }
+
+       /*
+        * If the new extent no longer has any pointers, bch2_extent_normalize()
+        * will do the appropriate thing with it (turning it into a
+        * KEY_TYPE_error key, or just a discard if it was a cached extent)
+        */
+       bch2_extent_normalize(c, bkey_i_to_s(n));
+
+       /*
+        * Since we're not inserting through an extent iterator
+        * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+        * we aren't using the extent overwrite path to delete, we're
+        * just using the normal key deletion path:
+        */
+       if (bkey_deleted(&n->k))
+               n->k.size = 0;
+
+       return bch2_trans_relock(trans) ?:
+               bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+               bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
+}
+
 int bch2_data_update_init(struct btree_trans *trans,
+                         struct btree_iter *iter,
                          struct moving_context *ctxt,
                          struct data_update *m,
                          struct write_point_specifier wp,
@@ -424,7 +496,7 @@ int bch2_data_update_init(struct btree_trans *trans,
        const struct bch_extent_ptr *ptr;
        unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
        unsigned ptrs_locked = 0;
-       int ret;
+       int ret = 0;
 
        bch2_bkey_buf_init(&m->k);
        bch2_bkey_buf_reassemble(&m->k, c, k);
@@ -450,6 +522,8 @@ int bch2_data_update_init(struct btree_trans *trans,
        bkey_for_each_ptr(ptrs, ptr)
                percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
 
+       unsigned durability_have = 0, durability_removing = 0;
+
        i = 0;
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
                bool locked;
@@ -461,8 +535,11 @@ int bch2_data_update_init(struct btree_trans *trans,
                                reserve_sectors += k.k->size;
 
                        m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
-               } else if (!p.ptr.cached) {
+                       durability_removing += bch2_extent_ptr_desired_durability(c, &p);
+               } else if (!p.ptr.cached &&
+                          !((1U << i) & m->data_opts.kill_ptrs)) {
                        bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+                       durability_have += bch2_extent_ptr_durability(c, &p);
                }
 
                /*
@@ -501,6 +578,29 @@ int bch2_data_update_init(struct btree_trans *trans,
                i++;
        }
 
+       /*
+        * If current extent durability is less than io_opts.data_replicas,
+        * we're not trying to rereplicate the extent up to data_replicas here -
+        * unless extra_replicas was specified
+        *
+        * Increasing replication is an explicit operation triggered by
+        * rereplicate, currently, so that users don't get an unexpected -ENOSPC
+        */
+       if (durability_have >= io_opts.data_replicas) {
+               m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
+               m->data_opts.rewrite_ptrs = 0;
+               /* if iter == NULL, it's just a promote */
+               if (iter)
+                       ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+               goto done;
+       }
+
+       m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+               m->data_opts.extra_replicas;
+       m->op.nr_replicas_required = m->op.nr_replicas;
+
+       BUG_ON(!m->op.nr_replicas);
+
        if (reserve_sectors) {
                ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
                                m->data_opts.extra_replicas
@@ -510,14 +610,11 @@ int bch2_data_update_init(struct btree_trans *trans,
                        goto err;
        }
 
-       m->op.nr_replicas += m->data_opts.extra_replicas;
-       m->op.nr_replicas_required = m->op.nr_replicas;
-
-       BUG_ON(!m->op.nr_replicas);
+       if (bkey_extent_is_unwritten(k)) {
+               bch2_update_unwritten_extent(trans, m);
+               goto done;
+       }
 
-       /* Special handling required: */
-       if (bkey_extent_is_unwritten(k))
-               return -BCH_ERR_unwritten_extent_update;
        return 0;
 err:
        i = 0;
@@ -532,6 +629,9 @@ err:
        bch2_bkey_buf_exit(&m->k, c);
        bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
        return ret;
+done:
+       bch2_data_update_exit(m);
+       return ret ?: -BCH_ERR_data_update_done;
 }
 
 void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
index 9dc17b9d83795181798deb5af39401d4d6248581..991095bbd469baeb55de1c0d2636267e49d68a28 100644 (file)
@@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *);
 void bch2_data_update_read_done(struct data_update *,
                                struct bch_extent_crc_unpacked);
 
+int bch2_extent_drop_ptrs(struct btree_trans *,
+                         struct btree_iter *,
+                         struct bkey_s_c,
+                         struct data_update_opts);
+
 void bch2_data_update_exit(struct data_update *);
-void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
-int bch2_data_update_init(struct btree_trans *, struct moving_context *,
+int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
+                         struct moving_context *,
                          struct data_update *,
                          struct write_point_specifier,
                          struct bch_io_opts, struct data_update_opts,
index d613695abf9f67c2e9f2ab4ce91d863bdfd743c7..4d0cb0ccff32f2c75fa66f932f517f00b9cfdf25 100644 (file)
@@ -555,6 +555,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
        case TARGET_DEV: {
                struct bch_dev *ca;
 
+               out->atomic++;
                rcu_read_lock();
                ca = t.dev < c->sb.nr_devices
                        ? rcu_dereference(c->devs[t.dev])
@@ -570,6 +571,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
                }
 
                rcu_read_unlock();
+               out->atomic--;
                break;
        }
        case TARGET_GROUP:
@@ -580,7 +582,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
        }
 }
 
-void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
 {
        struct target t = target_decode(v);
 
index 875f7c5a6fca63337a6be502daf2dda5a48844ea..2a77de18c004e77041049b763d277028856b7da6 100644 (file)
@@ -1373,6 +1373,15 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
                        h->nr_active_devs++;
 
        rcu_read_unlock();
+
+       /*
+        * If we only have redundancy + 1 devices, we're better off with just
+        * replication:
+        */
+       if (h->nr_active_devs < h->redundancy + 2)
+               bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
+                       h->nr_active_devs, h->redundancy + 2);
+
        list_add(&h->list, &c->ec_stripe_head_list);
        return h;
 }
@@ -1424,6 +1433,11 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
 
        h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
 found:
+       if (!IS_ERR_OR_NULL(h) &&
+           h->nr_active_devs < h->redundancy + 2) {
+               mutex_unlock(&h->lock);
+               h = NULL;
+       }
        mutex_unlock(&c->ec_stripe_head_lock);
        return h;
 }
@@ -1681,8 +1695,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
        int ret;
 
        h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
-       if (!h)
-               bch_err(c, "no stripe head");
        if (IS_ERR_OR_NULL(h))
                return h;
 
index 68a1a96bb7caf526a148a988c12913151c57b6d5..ae7910bf2228c467eb8e52bab82425b8a335c3f3 100644 (file)
        x(BCH_ERR_fsck,                 fsck_repair_unimplemented)              \
        x(BCH_ERR_fsck,                 fsck_repair_impossible)                 \
        x(0,                            restart_recovery)                       \
-       x(0,                            unwritten_extent_update)                \
+       x(0,                            data_update_done)                       \
        x(EINVAL,                       device_state_not_allowed)               \
        x(EINVAL,                       member_info_missing)                    \
        x(EINVAL,                       mismatched_block_size)                  \
        x(BCH_ERR_invalid_sb,           invalid_sb_members)                     \
        x(BCH_ERR_invalid_sb,           invalid_sb_disk_groups)                 \
        x(BCH_ERR_invalid_sb,           invalid_sb_replicas)                    \
+       x(BCH_ERR_invalid_sb,           invalid_replicas_entry)                 \
        x(BCH_ERR_invalid_sb,           invalid_sb_journal)                     \
        x(BCH_ERR_invalid_sb,           invalid_sb_journal_seq_blacklist)       \
        x(BCH_ERR_invalid_sb,           invalid_sb_crypt)                       \
index a864de231b69e297e85491dfd285928152c467b8..f6c92df552702a7455baa39532bf3c6231ae69b4 100644 (file)
@@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
        return replicas;
 }
 
-unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p)
 {
-       struct bch_dev *ca;
-
        if (p->ptr.cached)
                return 0;
 
-       ca = bch_dev_bkey_exists(c, p->ptr.dev);
-
-       return ca->mi.durability +
-               (p->has_ec
-                ? p->ec.redundancy
-                : 0);
+       return p->has_ec
+               ? p->ec.redundancy + 1
+               : ca->mi.durability;
 }
 
-unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
 {
-       struct bch_dev *ca;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
 
-       if (p->ptr.cached)
-               return 0;
+       return __extent_ptr_durability(ca, p);
+}
 
-       ca = bch_dev_bkey_exists(c, p->ptr.dev);
+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
 
        if (ca->mi.state == BCH_MEMBER_STATE_failed)
                return 0;
 
-       return ca->mi.durability +
-               (p->has_ec
-                ? p->ec.redundancy
-                : 0);
+       return __extent_ptr_durability(ca, p);
 }
 
 unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
index 5b42a76c4796f90062bb86e2914d0301e52cf7d0..9a479e4de6b36a71d1bc4b3c1ef62d8787098179 100644 (file)
@@ -35,9 +35,9 @@ static void bio_check_or_release(struct bio *bio, bool check_dirty)
        }
 }
 
-static void bch2_dio_read_complete(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_dio_read_complete)
 {
-       struct dio_read *dio = container_of(cl, struct dio_read, cl);
+       closure_type(dio, struct dio_read, cl);
 
        dio->req->ki_complete(dio->req, dio->ret);
        bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
@@ -325,9 +325,9 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
        return 0;
 }
 
-static void bch2_dio_write_flush_done(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_dio_write_flush_done)
 {
-       struct dio_write *dio = container_of(cl, struct dio_write, op.cl);
+       closure_type(dio, struct dio_write, op.cl);
        struct bch_fs *c = dio->op.c;
 
        closure_debug_destroy(cl);
index 8bd9bcdd27f738a7a2f0d2ac831f0c77fdf20aa3..ff664fd0d8ef80e8b4816d7c430e87d41759b498 100644 (file)
@@ -13,7 +13,7 @@
 
 int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
                                     loff_t start, u64 end,
-                                    int fgp_flags, gfp_t gfp,
+                                    fgf_t fgp_flags, gfp_t gfp,
                                     folios *fs)
 {
        struct folio *f;
index a2222ad586e9e7530728507516abc33da4b0c128..27f712ae37a68209275cc3b2955a542314e80e68 100644 (file)
@@ -7,7 +7,7 @@
 typedef DARRAY(struct folio *) folios;
 
 int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
-                                    u64, int, gfp_t, folios *);
+                                    u64, fgf_t, gfp_t, folios *);
 int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
 
 /*
index 166d8d8abe683f1b05ddd0115763c79015208fa1..4d51be813509891458735d494b44e36c043035c6 100644 (file)
@@ -1667,8 +1667,7 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
                if (!first)
                        seq_putc(seq, ':');
                first = false;
-               seq_puts(seq, "/dev/");
-               seq_puts(seq, ca->name);
+               seq_puts(seq, ca->disk_sb.sb_name);
        }
 
        return 0;
@@ -1922,10 +1921,7 @@ out:
        return dget(sb->s_root);
 
 err_put_super:
-       sb->s_fs_info = NULL;
-       c->vfs_sb = NULL;
        deactivate_locked_super(sb);
-       bch2_fs_stop(c);
        return ERR_PTR(bch2_err_class(ret));
 }
 
@@ -1933,11 +1929,8 @@ static void bch2_kill_sb(struct super_block *sb)
 {
        struct bch_fs *c = sb->s_fs_info;
 
-       if (c)
-               c->vfs_sb = NULL;
        generic_shutdown_super(sb);
-       if (c)
-               bch2_fs_free(c);
+       bch2_fs_free(c);
 }
 
 static struct file_system_type bcache_fs_type = {
index 9f3e9bd3d767a75fb1a0734c0413193a671f3206..e0c5cd119acc938a5bfe3ff2be8cac2cf1504b11 100644 (file)
@@ -2220,7 +2220,7 @@ static int nlink_cmp(const void *_l, const void *_r)
        const struct nlink *l = _l;
        const struct nlink *r = _r;
 
-       return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
+       return cmp_int(l->inum, r->inum);
 }
 
 static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
index def77f2d88024b788b6ce7c03bda07827f73a621..c7849b0753e7a115d563aa4cfdf5bdf33b319ec0 100644 (file)
@@ -1134,7 +1134,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
                 * unlinked inodes in the snapshot leaves:
                 */
                *need_another_pass = true;
-               return 0;
+               goto out;
        }
 
        ret = 1;
@@ -1169,8 +1169,10 @@ again:
         */
        for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
                           BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-               ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p,
-                                                                    &need_another_pass));
+               ret = commit_do(trans, NULL, NULL,
+                               BTREE_INSERT_NOFAIL|
+                               BTREE_INSERT_LAZY_RW,
+                       may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
                if (ret < 0)
                        break;
 
index a56ed553dc15e6c709c5fed992d0a5b097170703..36763865facd46ba84731074981091e678a37d31 100644 (file)
@@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
        bio = &op->write.op.wbio.bio;
        bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
 
-       ret = bch2_data_update_init(trans, NULL, &op->write,
+       ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
                        writepoint_hashed((unsigned long) current),
                        opts,
                        (struct data_update_opts) {
index f02b3f7d26a016594c3de2a8f25b0006638b91c2..8ede46b1e354634763ff743a2d8aadf88b2931c6 100644 (file)
@@ -580,9 +580,9 @@ static inline void wp_update_state(struct write_point *wp, bool running)
        __wp_update_state(wp, state);
 }
 
-static void bch2_write_index(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_write_index)
 {
-       struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+       closure_type(op, struct bch_write_op, cl);
        struct write_point *wp = op->wp;
        struct workqueue_struct *wq = index_update_wq(op);
        unsigned long flags;
@@ -795,7 +795,7 @@ static int bch2_write_decrypt(struct bch_write_op *op)
         * checksum:
         */
        csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-       if (bch2_crc_cmp(op->crc.csum, csum))
+       if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
                return -EIO;
 
        ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
@@ -1208,9 +1208,9 @@ static void __bch2_nocow_write_done(struct bch_write_op *op)
                bch2_nocow_write_convert_unwritten(op);
 }
 
-static void bch2_nocow_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_nocow_write_done)
 {
-       struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+       closure_type(op, struct bch_write_op, cl);
 
        __bch2_nocow_write_done(op);
        bch2_write_done(cl);
@@ -1363,7 +1363,7 @@ err:
                op->insert_keys.top = op->insert_keys.keys;
        } else if (op->flags & BCH_WRITE_SYNC) {
                closure_sync(&op->cl);
-               bch2_nocow_write_done(&op->cl);
+               bch2_nocow_write_done(&op->cl.work);
        } else {
                /*
                 * XXX
@@ -1566,9 +1566,9 @@ err:
  * If op->discard is true, instead of inserting the data it invalidates the
  * region of the cache represented by op->bio and op->inode.
  */
-void bch2_write(struct closure *cl)
+CLOSURE_CALLBACK(bch2_write)
 {
-       struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+       closure_type(op, struct bch_write_op, cl);
        struct bio *bio = &op->wbio.bio;
        struct bch_fs *c = op->c;
        unsigned data_len;
index 9323167229eeae8900b65733f022dae6047448b5..6c276a48f95dc2051f22dbfe00e4181319f1ee76 100644 (file)
@@ -90,8 +90,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
        op->devs_need_flush     = NULL;
 }
 
-void bch2_write(struct closure *);
-
+CLOSURE_CALLBACK(bch2_write);
 void bch2_write_point_do_index_updates(struct work_struct *);
 
 static inline struct bch_write_bio *wbio_init(struct bio *bio)
index 5b5d69f2316b216746c0c08db2346c2c8c95ff16..489b34046e7807744bdc7b8462910e5d9d4dc53a 100644 (file)
@@ -321,6 +321,8 @@ static int journal_entry_open(struct journal *j)
        atomic64_inc(&j->seq);
        journal_pin_list_init(fifo_push_ref(&j->pin), 1);
 
+       BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
+
        BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
 
        bkey_extent_init(&buf->key);
@@ -526,36 +528,6 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
        return ret;
 }
 
-/* journal_preres: */
-
-static bool journal_preres_available(struct journal *j,
-                                    struct journal_preres *res,
-                                    unsigned new_u64s,
-                                    unsigned flags)
-{
-       bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
-
-       if (!ret && mutex_trylock(&j->reclaim_lock)) {
-               bch2_journal_reclaim(j);
-               mutex_unlock(&j->reclaim_lock);
-       }
-
-       return ret;
-}
-
-int __bch2_journal_preres_get(struct journal *j,
-                             struct journal_preres *res,
-                             unsigned new_u64s,
-                             unsigned flags)
-{
-       int ret;
-
-       closure_wait_event(&j->preres_wait,
-                  (ret = bch2_journal_error(j)) ||
-                  journal_preres_available(j, res, new_u64s, flags));
-       return ret;
-}
-
 /* journal_entry_res: */
 
 void bch2_journal_entry_res_resize(struct journal *j,
@@ -1306,7 +1278,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        prt_printf(out, "last_seq:\t\t%llu\n",          journal_last_seq(j));
        prt_printf(out, "last_seq_ondisk:\t%llu\n",             j->last_seq_ondisk);
        prt_printf(out, "flushed_seq_ondisk:\t%llu\n",  j->flushed_seq_ondisk);
-       prt_printf(out, "prereserved:\t\t%u/%u\n",              j->prereserved.reserved, j->prereserved.remaining);
        prt_printf(out, "watermark:\t\t%s\n",           bch2_watermarks[j->watermark]);
        prt_printf(out, "each entry reserved:\t%u\n",   j->entry_u64s_reserved);
        prt_printf(out, "nr flush writes:\t%llu\n",             j->nr_flush_writes);
index 011711e99c8d825ec968cf513f82c08a66ecabc5..4c513fca5ef2d1db0c1bc9673359235912505afc 100644 (file)
@@ -136,9 +136,7 @@ static inline u64 journal_last_seq(struct journal *j)
 
 static inline u64 journal_cur_seq(struct journal *j)
 {
-       EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
-
-       return j->pin.back - 1;
+       return atomic64_read(&j->seq);
 }
 
 static inline u64 journal_last_unwritten_seq(struct journal *j)
@@ -395,104 +393,6 @@ out:
        return 0;
 }
 
-/* journal_preres: */
-
-static inline void journal_set_watermark(struct journal *j)
-{
-       union journal_preres_state s = READ_ONCE(j->prereserved);
-       unsigned watermark = BCH_WATERMARK_stripe;
-
-       if (fifo_free(&j->pin) < j->pin.size / 4)
-               watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
-       if (fifo_free(&j->pin) < j->pin.size / 8)
-               watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
-       if (s.reserved > s.remaining)
-               watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
-       if (!s.remaining)
-               watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
-       if (watermark == j->watermark)
-               return;
-
-       swap(watermark, j->watermark);
-       if (watermark > j->watermark)
-               journal_wake(j);
-}
-
-static inline void bch2_journal_preres_put(struct journal *j,
-                                          struct journal_preres *res)
-{
-       union journal_preres_state s = { .reserved = res->u64s };
-
-       if (!res->u64s)
-               return;
-
-       s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
-       res->u64s = 0;
-
-       if (unlikely(s.waiting)) {
-               clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
-                         (unsigned long *) &j->prereserved.v);
-               closure_wake_up(&j->preres_wait);
-       }
-
-       if (s.reserved <= s.remaining && j->watermark)
-               journal_set_watermark(j);
-}
-
-int __bch2_journal_preres_get(struct journal *,
-                       struct journal_preres *, unsigned, unsigned);
-
-static inline int bch2_journal_preres_get_fast(struct journal *j,
-                                              struct journal_preres *res,
-                                              unsigned new_u64s,
-                                              unsigned flags,
-                                              bool set_waiting)
-{
-       int d = new_u64s - res->u64s;
-       union journal_preres_state old, new;
-       u64 v = atomic64_read(&j->prereserved.counter);
-       enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-       int ret;
-
-       do {
-               old.v = new.v = v;
-               ret = 0;
-
-               if (watermark == BCH_WATERMARK_reclaim ||
-                   new.reserved + d < new.remaining) {
-                       new.reserved += d;
-                       ret = 1;
-               } else if (set_waiting && !new.waiting)
-                       new.waiting = true;
-               else
-                       return 0;
-       } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-                                      old.v, new.v)) != old.v);
-
-       if (ret)
-               res->u64s += d;
-       return ret;
-}
-
-static inline int bch2_journal_preres_get(struct journal *j,
-                                         struct journal_preres *res,
-                                         unsigned new_u64s,
-                                         unsigned flags)
-{
-       if (new_u64s <= res->u64s)
-               return 0;
-
-       if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
-               return 0;
-
-       if (flags & JOURNAL_RES_GET_NONBLOCK)
-               return -BCH_ERR_journal_preres_get_blocked;
-
-       return __bch2_journal_preres_get(j, res, new_u64s, flags);
-}
-
 /* journal_entry_res: */
 
 void bch2_journal_entry_res_resize(struct journal *,
index f4bc2cdbfdd7921b4d562cf0df7d29b1f8c51c87..0f17fc5f8d6844f774d71f5a361b55e6acbb866f 100644 (file)
@@ -547,6 +547,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
        struct jset_entry_data_usage *u =
                container_of(entry, struct jset_entry_data_usage, entry);
        unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+       struct printbuf err = PRINTBUF;
        int ret = 0;
 
        if (journal_entry_err_on(bytes < sizeof(*u) ||
@@ -555,10 +556,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
                                 journal_entry_data_usage_bad_size,
                                 "invalid journal entry usage: bad size")) {
                journal_entry_null_range(entry, vstruct_next(entry));
-               return ret;
+               goto out;
        }
 
+       if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
+                                c, version, jset, entry,
+                                journal_entry_data_usage_bad_size,
+                                "invalid journal entry usage: %s", err.buf)) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+               goto out;
+       }
+out:
 fsck_err:
+       printbuf_exit(&err);
        return ret;
 }
 
@@ -1025,10 +1035,9 @@ next_block:
        return 0;
 }
 
-static void bch2_journal_read_device(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_journal_read_device)
 {
-       struct journal_device *ja =
-               container_of(cl, struct journal_device, read);
+       closure_type(ja, struct journal_device, read);
        struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
        struct bch_fs *c = ca->fs;
        struct journal_list *jlist =
@@ -1079,6 +1088,12 @@ found:
 
        if (ja->bucket_seq[ja->cur_idx] &&
            ja->sectors_free == ca->mi.bucket_size) {
+#if 0
+               /*
+                * Debug code for ZNS support, where we (probably) want to be
+                * correlated where we stopped in the journal to the zone write
+                * points:
+                */
                bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
                bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
                for (i = 0; i < 3; i++) {
@@ -1086,6 +1101,7 @@ found:
 
                        bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
                }
+#endif
                ja->sectors_free = 0;
        }
 
@@ -1513,9 +1529,9 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
        return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK);
 }
 
-static void journal_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_done)
 {
-       struct journal *j = container_of(cl, struct journal, io);
+       closure_type(j, struct journal, io);
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct journal_buf *w = journal_last_unwritten_buf(j);
        struct bch_replicas_padded replicas;
@@ -1631,9 +1647,9 @@ static void journal_write_endio(struct bio *bio)
        percpu_ref_put(&ca->io_ref);
 }
 
-static void do_journal_write(struct closure *cl)
+static CLOSURE_CALLBACK(do_journal_write)
 {
-       struct journal *j = container_of(cl, struct journal, io);
+       closure_type(j, struct journal, io);
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        struct journal_buf *w = journal_last_unwritten_buf(j);
@@ -1843,9 +1859,9 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
        return 0;
 }
 
-void bch2_journal_write(struct closure *cl)
+CLOSURE_CALLBACK(bch2_journal_write)
 {
-       struct journal *j = container_of(cl, struct journal, io);
+       closure_type(j, struct journal, io);
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        struct journal_buf *w = journal_last_unwritten_buf(j);
index a88d097b13f1294a5ca1f3c30ebba5282ef56da3..c035e7c108e19012e6e4e1f708136dec27b5387c 100644 (file)
@@ -60,6 +60,6 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
 
 int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
 
-void bch2_journal_write(struct closure *);
+CLOSURE_CALLBACK(bch2_journal_write);
 
 #endif /* _BCACHEFS_JOURNAL_IO_H */
index 9a584aaaa2eba9abadc7f2016a20c70834e0610c..e63c6eda86afeb9e9c0920554e9bef953b0a9a26 100644 (file)
@@ -50,16 +50,21 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
        return available;
 }
 
-static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
+static inline void journal_set_watermark(struct journal *j, bool low_on_space)
 {
-       union journal_preres_state old, new;
-       u64 v = atomic64_read(&j->prereserved.counter);
+       unsigned watermark = BCH_WATERMARK_stripe;
 
-       do {
-               old.v = new.v = v;
-               new.remaining = u64s_remaining;
-       } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-                                      old.v, new.v)) != old.v);
+       if (low_on_space)
+               watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+       if (fifo_free(&j->pin) < j->pin.size / 4)
+               watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+
+       if (watermark == j->watermark)
+               return;
+
+       swap(watermark, j->watermark);
+       if (watermark > j->watermark)
+               journal_wake(j);
 }
 
 static struct journal_space
@@ -162,7 +167,6 @@ void bch2_journal_space_available(struct journal *j)
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        unsigned clean, clean_ondisk, total;
-       s64 u64s_remaining = 0;
        unsigned max_entry_size  = min(j->buf[0].buf_size >> 9,
                                       j->buf[1].buf_size >> 9);
        unsigned i, nr_online = 0, nr_devs_want;
@@ -222,16 +226,10 @@ void bch2_journal_space_available(struct journal *j)
        else
                clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
 
-       u64s_remaining  = (u64) clean << 6;
-       u64s_remaining -= (u64) total << 3;
-       u64s_remaining = max(0LL, u64s_remaining);
-       u64s_remaining /= 4;
-       u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
+       journal_set_watermark(j, clean * 4 <= total);
 out:
        j->cur_entry_sectors    = !ret ? j->space[journal_space_discarded].next_entry : 0;
        j->cur_entry_error      = ret;
-       journal_set_remaining(j, u64s_remaining);
-       journal_set_watermark(j);
 
        if (!ret)
                journal_wake(j);
@@ -555,11 +553,6 @@ static u64 journal_seq_to_flush(struct journal *j)
                /* Try to keep the journal at most half full: */
                nr_buckets = ja->nr / 2;
 
-               /* And include pre-reservations: */
-               nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
-                                          (ca->mi.bucket_size << 6) -
-                                          journal_entry_overhead(j));
-
                nr_buckets = min(nr_buckets, ja->nr);
 
                bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
@@ -638,10 +631,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
                               msecs_to_jiffies(c->opts.journal_reclaim_delay)))
                        min_nr = 1;
 
-               if (j->prereserved.reserved * 4 > j->prereserved.remaining)
-                       min_nr = 1;
-
-               if (fifo_free(&j->pin) <= 32)
+               if (j->watermark != BCH_WATERMARK_stripe)
                        min_nr = 1;
 
                if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
@@ -652,8 +642,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
                trace_and_count(c, journal_reclaim_start, c,
                                direct, kicked,
                                min_nr, min_key_cache,
-                               j->prereserved.reserved,
-                               j->prereserved.remaining,
                                atomic_read(&c->btree_cache.dirty),
                                c->btree_cache.used,
                                atomic_long_read(&c->btree_key_cache.nr_dirty),
index 42504e16acb6ccf261a6699b6d468cba7d26a776..a756b69582e34955ecfe86fbaa688785aeca532f 100644 (file)
@@ -76,14 +76,6 @@ struct journal_res {
        u64                     seq;
 };
 
-/*
- * For reserving space in the journal prior to getting a reservation on a
- * particular journal entry:
- */
-struct journal_preres {
-       unsigned                u64s;
-};
-
 union journal_res_state {
        struct {
                atomic64_t      counter;
@@ -104,22 +96,6 @@ union journal_res_state {
        };
 };
 
-union journal_preres_state {
-       struct {
-               atomic64_t      counter;
-       };
-
-       struct {
-               u64             v;
-       };
-
-       struct {
-               u64             waiting:1,
-                               reserved:31,
-                               remaining:32;
-       };
-};
-
 /* bytes: */
 #define JOURNAL_ENTRY_SIZE_MIN         (64U << 10) /* 64k */
 #define JOURNAL_ENTRY_SIZE_MAX         (4U  << 20) /* 4M */
@@ -180,8 +156,6 @@ struct journal {
        union journal_res_state reservations;
        enum bch_watermark      watermark;
 
-       union journal_preres_state prereserved;
-
        } __aligned(SMP_CACHE_BYTES);
 
        unsigned long           flags;
index ab749bf2fcbc551e68753857efdf008848d140b7..54830ee0ed886795233e939158d9b4f417d11f85 100644 (file)
@@ -49,17 +49,6 @@ static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
        }
 }
 
-static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k)
-{
-       if (trace_move_extent_alloc_mem_fail_enabled()) {
-               struct printbuf buf = PRINTBUF;
-
-               bch2_bkey_val_to_text(&buf, c, k);
-               trace_move_extent_alloc_mem_fail(c, buf.buf);
-               printbuf_exit(&buf);
-       }
-}
-
 struct moving_io {
        struct list_head                read_list;
        struct list_head                io_list;
@@ -163,12 +152,18 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
                atomic_read(&ctxt->write_sectors) != sectors_pending);
 }
 
+static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
+{
+       move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+       bch2_trans_unlock_long(ctxt->trans);
+       closure_sync(&ctxt->cl);
+}
+
 void bch2_moving_ctxt_exit(struct moving_context *ctxt)
 {
        struct bch_fs *c = ctxt->trans->c;
 
-       move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
-       closure_sync(&ctxt->cl);
+       bch2_moving_ctxt_flush_all(ctxt);
 
        EBUG_ON(atomic_read(&ctxt->write_sectors));
        EBUG_ON(atomic_read(&ctxt->write_ios));
@@ -223,49 +218,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
        scnprintf(stats->name, sizeof(stats->name), "%s", name);
 }
 
-static int bch2_extent_drop_ptrs(struct btree_trans *trans,
-                                struct btree_iter *iter,
-                                struct bkey_s_c k,
-                                struct data_update_opts data_opts)
-{
-       struct bch_fs *c = trans->c;
-       struct bkey_i *n;
-       int ret;
-
-       n = bch2_bkey_make_mut_noupdate(trans, k);
-       ret = PTR_ERR_OR_ZERO(n);
-       if (ret)
-               return ret;
-
-       while (data_opts.kill_ptrs) {
-               unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
-               struct bch_extent_ptr *ptr;
-
-               bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
-               data_opts.kill_ptrs ^= 1U << drop;
-       }
-
-       /*
-        * If the new extent no longer has any pointers, bch2_extent_normalize()
-        * will do the appropriate thing with it (turning it into a
-        * KEY_TYPE_error key, or just a discard if it was a cached extent)
-        */
-       bch2_extent_normalize(c, bkey_i_to_s(n));
-
-       /*
-        * Since we're not inserting through an extent iterator
-        * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
-        * we aren't using the extent overwrite path to delete, we're
-        * just using the normal key deletion path:
-        */
-       if (bkey_deleted(&n->k))
-               n->k.size = 0;
-
-       return bch2_trans_relock(trans) ?:
-               bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-               bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
-}
-
 int bch2_move_extent(struct moving_context *ctxt,
                     struct move_bucket_in_flight *bucket_in_flight,
                     struct btree_iter *iter,
@@ -335,19 +287,11 @@ int bch2_move_extent(struct moving_context *ctxt,
        io->rbio.bio.bi_iter.bi_sector  = bkey_start_offset(k.k);
        io->rbio.bio.bi_end_io          = move_read_endio;
 
-       ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
+       ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
                                    io_opts, data_opts, iter->btree_id, k);
-       if (ret && ret != -BCH_ERR_unwritten_extent_update)
+       if (ret)
                goto err_free_pages;
 
-       if (ret == -BCH_ERR_unwritten_extent_update) {
-               bch2_update_unwritten_extent(trans, &io->write);
-               move_free(io);
-               return 0;
-       }
-
-       BUG_ON(ret);
-
        io->write.op.end_io = move_write_done;
 
        if (ctxt->rate)
@@ -391,8 +335,23 @@ err_free_pages:
 err_free:
        kfree(io);
 err:
-       this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
-       trace_move_extent_alloc_mem_fail2(c, k);
+       if (ret == -BCH_ERR_data_update_done)
+               return 0;
+
+       if (bch2_err_matches(ret, EROFS) ||
+           bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               return ret;
+
+       this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]);
+       if (trace_move_extent_start_fail_enabled()) {
+               struct printbuf buf = PRINTBUF;
+
+               bch2_bkey_val_to_text(&buf, c, k);
+               prt_str(&buf, ": ");
+               prt_str(&buf, bch2_err_str(ret));
+               trace_move_extent_start_fail(c, buf.buf);
+               printbuf_exit(&buf);
+       }
        return ret;
 }
 
@@ -482,37 +441,30 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans,
 int bch2_move_ratelimit(struct moving_context *ctxt)
 {
        struct bch_fs *c = ctxt->trans->c;
+       bool is_kthread = current->flags & PF_KTHREAD;
        u64 delay;
 
-       if (ctxt->wait_on_copygc && !c->copygc_running) {
-               bch2_trans_unlock_long(ctxt->trans);
+       if (ctxt->wait_on_copygc && c->copygc_running) {
+               bch2_moving_ctxt_flush_all(ctxt);
                wait_event_killable(c->copygc_running_wq,
                                    !c->copygc_running ||
-                                   kthread_should_stop());
+                                   (is_kthread && kthread_should_stop()));
        }
 
        do {
                delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
 
-
-               if (delay) {
-                       if (delay > HZ / 10)
-                               bch2_trans_unlock_long(ctxt->trans);
-                       else
-                               bch2_trans_unlock(ctxt->trans);
-                       set_current_state(TASK_INTERRUPTIBLE);
-               }
-
-               if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
-                       __set_current_state(TASK_RUNNING);
+               if (is_kthread && kthread_should_stop())
                        return 1;
-               }
 
                if (delay)
-                       schedule_timeout(delay);
+                       move_ctxt_wait_event_timeout(ctxt,
+                                       freezing(current) ||
+                                       (is_kthread && kthread_should_stop()),
+                                       delay);
 
                if (unlikely(freezing(current))) {
-                       move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+                       bch2_moving_ctxt_flush_all(ctxt);
                        try_to_freeze();
                }
        } while (delay);
@@ -683,6 +635,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
 {
        struct btree_trans *trans = ctxt->trans;
        struct bch_fs *c = trans->c;
+       bool is_kthread = current->flags & PF_KTHREAD;
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        struct btree_iter iter;
        struct bkey_buf sk;
@@ -728,6 +681,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
        }
 
        while (!(ret = bch2_move_ratelimit(ctxt))) {
+               if (is_kthread && kthread_should_stop())
+                       break;
+
                bch2_trans_begin(trans);
 
                ret = bch2_get_next_backpointer(trans, bucket, gen,
index 07cf9d42643b4fe537b6db513285efc1f65bd366..0906aa2d1de29c328fbbe9a43ca877eb7fc02471 100644 (file)
@@ -38,6 +38,25 @@ struct moving_context {
        wait_queue_head_t       wait;
 };
 
+#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout)                   \
+({                                                                             \
+       int _ret = 0;                                                           \
+       while (true) {                                                          \
+               bool cond_finished = false;                                     \
+               bch2_moving_ctxt_do_pending_writes(_ctxt);                      \
+                                                                               \
+               if (_cond)                                                      \
+                       break;                                                  \
+               bch2_trans_unlock_long((_ctxt)->trans);                         \
+               _ret = __wait_event_timeout((_ctxt)->wait,                      \
+                            bch2_moving_ctxt_next_pending_write(_ctxt) ||      \
+                            (cond_finished = (_cond)), _timeout);              \
+               if (_ret || ( cond_finished))                                   \
+                       break;                                                  \
+       }                                                                       \
+       _ret;                                                                   \
+})
+
 #define move_ctxt_wait_event(_ctxt, _cond)                             \
 do {                                                                   \
        bool cond_finished = false;                                     \
index 0a0576326c5b2d433fcd4aace513379972f57152..a84e79f79e5ec562fa8f9d072ef3250e60a8564f 100644 (file)
@@ -207,7 +207,7 @@ static int bch2_copygc(struct moving_context *ctxt,
                goto err;
 
        darray_for_each(buckets, i) {
-               if (unlikely(freezing(current)))
+               if (kthread_should_stop() || freezing(current))
                        break;
 
                f = move_bucket_in_flight_add(buckets_in_flight, *i);
index 9c30500ce9200af8be8f71a50f5fa02c356e4400..770ced1c62850d317eb991c8723401456735cc90 100644 (file)
@@ -167,6 +167,8 @@ static int bch2_journal_replay(struct bch_fs *c)
                        goto err;
        }
 
+       BUG_ON(!atomic_read(&keys->ref));
+
        for (i = 0; i < keys->nr; i++) {
                k = keys_sorted[i];
 
@@ -188,6 +190,9 @@ static int bch2_journal_replay(struct bch_fs *c)
                }
        }
 
+       if (!c->opts.keep_journal)
+               bch2_journal_keys_put_initial(c);
+
        replay_now_at(j, j->replay_journal_seq_end);
        j->replay_journal_seq = 0;
 
@@ -909,10 +914,8 @@ out:
        bch2_flush_fsck_errs(c);
 
        if (!c->opts.keep_journal &&
-           test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) {
-               bch2_journal_keys_free(&c->journal_keys);
-               bch2_journal_entries_free(c);
-       }
+           test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+               bch2_journal_keys_put_initial(c);
        kfree(clean);
 
        if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
index 1c3ae13bfced1d8ce9eeee118cb6e9fe1552e7a5..2008fe8bf7060d0e4da522e723ce3ae6fbc42d9e 100644 (file)
@@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
        prt_printf(out, "]");
 }
 
+int bch2_replicas_entry_validate(struct bch_replicas_entry *r,
+                                struct bch_sb *sb,
+                                struct printbuf *err)
+{
+       if (!r->nr_devs) {
+               prt_printf(err, "no devices in entry ");
+               goto bad;
+       }
+
+       if (r->nr_required > 1 &&
+           r->nr_required >= r->nr_devs) {
+               prt_printf(err, "bad nr_required in entry ");
+               goto bad;
+       }
+
+       for (unsigned i = 0; i < r->nr_devs; i++)
+               if (!bch2_dev_exists(sb, r->devs[i])) {
+                       prt_printf(err, "invalid device %u in entry ", r->devs[i]);
+                       goto bad;
+               }
+
+       return 0;
+bad:
+       bch2_replicas_entry_to_text(err, r);
+       return -BCH_ERR_invalid_replicas_entry;
+}
+
 void bch2_cpu_replicas_to_text(struct printbuf *out,
                               struct bch_replicas_cpu *r)
 {
@@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
 }
 
 static struct bch_replicas_cpu
-cpu_replicas_add_entry(struct bch_replicas_cpu *old,
+cpu_replicas_add_entry(struct bch_fs *c,
+                      struct bch_replicas_cpu *old,
                       struct bch_replicas_entry *new_entry)
 {
        unsigned i;
@@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
                                        replicas_entry_bytes(new_entry)),
        };
 
+       for (i = 0; i < new_entry->nr_devs; i++)
+               BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i]));
+
        BUG_ON(!new_entry->data_type);
        verify_replicas_entry(new_entry);
 
@@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
 
        if (c->replicas_gc.entries &&
            !__replicas_has_entry(&c->replicas_gc, new_entry)) {
-               new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
+               new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
                if (!new_gc.entries) {
                        ret = -BCH_ERR_ENOMEM_cpu_replicas;
                        goto err;
@@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
        }
 
        if (!__replicas_has_entry(&c->replicas, new_entry)) {
-               new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
+               new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
                if (!new_r.entries) {
                        ret = -BCH_ERR_ENOMEM_cpu_replicas;
                        goto err;
@@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
        if (idx < 0) {
                struct bch_replicas_cpu n;
 
-               n = cpu_replicas_add_entry(&c->replicas, r);
+               n = cpu_replicas_add_entry(c, &c->replicas, r);
                if (!n.entries)
                        return -BCH_ERR_ENOMEM_cpu_replicas;
 
@@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
                                      struct bch_sb *sb,
                                      struct printbuf *err)
 {
-       unsigned i, j;
+       unsigned i;
 
        sort_cmp_size(cpu_r->entries,
                      cpu_r->nr,
@@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
                struct bch_replicas_entry *e =
                        cpu_replicas_entry(cpu_r, i);
 
-               if (e->data_type >= BCH_DATA_NR) {
-                       prt_printf(err, "invalid data type in entry ");
-                       bch2_replicas_entry_to_text(err, e);
-                       return -BCH_ERR_invalid_sb_replicas;
-               }
-
-               if (!e->nr_devs) {
-                       prt_printf(err, "no devices in entry ");
-                       bch2_replicas_entry_to_text(err, e);
-                       return -BCH_ERR_invalid_sb_replicas;
-               }
-
-               if (e->nr_required > 1 &&
-                   e->nr_required >= e->nr_devs) {
-                       prt_printf(err, "bad nr_required in entry ");
-                       bch2_replicas_entry_to_text(err, e);
-                       return -BCH_ERR_invalid_sb_replicas;
-               }
-
-               for (j = 0; j < e->nr_devs; j++)
-                       if (!bch2_dev_exists(sb, e->devs[j])) {
-                               prt_printf(err, "invalid device %u in entry ", e->devs[j]);
-                               bch2_replicas_entry_to_text(err, e);
-                               return -BCH_ERR_invalid_sb_replicas;
-                       }
+               int ret = bch2_replicas_entry_validate(e, sb, err);
+               if (ret)
+                       return ret;
 
                if (i + 1 < cpu_r->nr) {
                        struct bch_replicas_entry *n =
index 4887675a86f09c7a3942f3eae33d76179fe3c7bc..f70a642775d1b2b8257caff0ed52c2f8b90016da 100644 (file)
@@ -9,6 +9,8 @@
 void bch2_replicas_entry_sort(struct bch_replicas_entry *);
 void bch2_replicas_entry_to_text(struct printbuf *,
                                 struct bch_replicas_entry *);
+int bch2_replicas_entry_validate(struct bch_replicas_entry *,
+                                struct bch_sb *, struct printbuf *);
 void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
 
 static inline struct bch_replicas_entry *
index b775cf0fb7cbf211a3f388cf78de0de6f33c581c..97790445e67ad2923fc4a0413d2c824cf506455e 100644 (file)
@@ -163,8 +163,11 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
                this_cpu_sub(*lock->readers, !ret);
                preempt_enable();
 
-               if (!ret && (old & SIX_LOCK_WAITING_write))
-                       ret = -1 - SIX_LOCK_write;
+               if (!ret) {
+                       smp_mb();
+                       if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
+                               ret = -1 - SIX_LOCK_write;
+               }
        } else if (type == SIX_LOCK_write && lock->readers) {
                if (try) {
                        atomic_add(SIX_LOCK_HELD_write, &lock->state);
index e9af77b384c76c694194c53b348706e354df9a22..5dac038f085195c894ace91df6d43ad296cdbd5c 100644 (file)
@@ -959,7 +959,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
                                        parent_id, id))
                        goto err;
 
-               parent->v.children[i] = le32_to_cpu(child_id);
+               parent->v.children[i] = cpu_to_le32(child_id);
 
                normalize_snapshot_child_pointers(&parent->v);
        }
index 86833445af205643b81bd08b3b204005c7bee071..2d2e66a4e4681ee5ba6ba18666d135ab961a2cbf 100644 (file)
@@ -20,7 +20,7 @@ struct snapshot_t {
 };
 
 struct snapshot_table {
-       struct snapshot_t       s[0];
+       DECLARE_FLEX_ARRAY(struct snapshot_t, s);
 };
 
 typedef struct {
index f4cad903f4d69da7776825f50bf561a1980a02a0..f3e12f7979d5ea65c2bd51b56da8392c2953bcde 100644 (file)
@@ -166,6 +166,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
        if (!IS_ERR_OR_NULL(sb->bdev))
                blkdev_put(sb->bdev, sb->holder);
        kfree(sb->holder);
+       kfree(sb->sb_name);
 
        kfree(sb->sb);
        memset(sb, 0, sizeof(*sb));
@@ -675,6 +676,10 @@ retry:
        if (!sb->holder)
                return -ENOMEM;
 
+       sb->sb_name = kstrdup(path, GFP_KERNEL);
+       if (!sb->sb_name)
+               return -ENOMEM;
+
 #ifndef __KERNEL__
        if (opt_get(*opts, direct_io) == false)
                sb->mode |= BLK_OPEN_BUFFERED;
index 24672bb31cbe9c479964dffe1d1b979dd66013c7..f63474c5c5a2937d89a6f6316d2ed4d49e6b454c 100644 (file)
@@ -423,6 +423,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);
 
+       set_bit(BCH_FS_RW, &c->flags);
+       set_bit(BCH_FS_WAS_RW, &c->flags);
+
+#ifndef BCH_WRITE_REF_DEBUG
+       percpu_ref_reinit(&c->writes);
+#else
+       for (i = 0; i < BCH_WRITE_REF_NR; i++) {
+               BUG_ON(atomic_long_read(&c->writes[i]));
+               atomic_long_inc(&c->writes[i]);
+       }
+#endif
+
        ret = bch2_gc_thread_start(c);
        if (ret) {
                bch_err(c, "error starting gc thread");
@@ -439,24 +451,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                        goto err;
        }
 
-#ifndef BCH_WRITE_REF_DEBUG
-       percpu_ref_reinit(&c->writes);
-#else
-       for (i = 0; i < BCH_WRITE_REF_NR; i++) {
-               BUG_ON(atomic_long_read(&c->writes[i]));
-               atomic_long_inc(&c->writes[i]);
-       }
-#endif
-       set_bit(BCH_FS_RW, &c->flags);
-       set_bit(BCH_FS_WAS_RW, &c->flags);
-
        bch2_do_discards(c);
        bch2_do_invalidates(c);
        bch2_do_stripe_deletes(c);
        bch2_do_pending_node_rewrites(c);
        return 0;
 err:
-       __bch2_fs_read_only(c);
+       if (test_bit(BCH_FS_RW, &c->flags))
+               bch2_fs_read_only(c);
+       else
+               __bch2_fs_read_only(c);
        return ret;
 }
 
@@ -504,8 +508,8 @@ static void __bch2_fs_free(struct bch_fs *c)
        bch2_io_clock_exit(&c->io_clock[WRITE]);
        bch2_io_clock_exit(&c->io_clock[READ]);
        bch2_fs_compress_exit(c);
-       bch2_journal_keys_free(&c->journal_keys);
-       bch2_journal_entries_free(c);
+       bch2_journal_keys_put_initial(c);
+       BUG_ON(atomic_read(&c->journal_keys.ref));
        bch2_fs_btree_write_buffer_exit(c);
        percpu_free_rwsem(&c->mark_lock);
        free_percpu(c->online_reserved);
@@ -702,6 +706,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 
        init_rwsem(&c->gc_lock);
        mutex_init(&c->gc_gens_lock);
+       atomic_set(&c->journal_keys.ref, 1);
+       c->journal_keys.initial_ref_held = true;
 
        for (i = 0; i < BCH_TIME_STAT_NR; i++)
                bch2_time_stats_init(&c->times[i]);
index 7dda4985b99fe6cfdde52c6df869e3df446d48d0..9c1fd4ca2b103478c9d943a2063f64825b8550cb 100644 (file)
@@ -5,6 +5,7 @@
 struct bch_sb_handle {
        struct bch_sb           *sb;
        struct block_device     *bdev;
+       char                    *sb_name;
        struct bio              *bio;
        void                    *holder;
        size_t                  buffer_size;
index 893304a1f06e6ea03df55020cf7be26f349d8cfe..fd49b63562c36cc4d2bedc1884be0815160ddb90 100644 (file)
@@ -196,10 +196,9 @@ DEFINE_EVENT(bio, journal_write,
 TRACE_EVENT(journal_reclaim_start,
        TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
                 u64 min_nr, u64 min_key_cache,
-                u64 prereserved, u64 prereserved_total,
                 u64 btree_cache_dirty, u64 btree_cache_total,
                 u64 btree_key_cache_dirty, u64 btree_key_cache_total),
-       TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
+       TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
                btree_cache_dirty, btree_cache_total,
                btree_key_cache_dirty, btree_key_cache_total),
 
@@ -209,8 +208,6 @@ TRACE_EVENT(journal_reclaim_start,
                __field(bool,           kicked                  )
                __field(u64,            min_nr                  )
                __field(u64,            min_key_cache           )
-               __field(u64,            prereserved             )
-               __field(u64,            prereserved_total       )
                __field(u64,            btree_cache_dirty       )
                __field(u64,            btree_cache_total       )
                __field(u64,            btree_key_cache_dirty   )
@@ -223,22 +220,18 @@ TRACE_EVENT(journal_reclaim_start,
                __entry->kicked                 = kicked;
                __entry->min_nr                 = min_nr;
                __entry->min_key_cache          = min_key_cache;
-               __entry->prereserved            = prereserved;
-               __entry->prereserved_total      = prereserved_total;
                __entry->btree_cache_dirty      = btree_cache_dirty;
                __entry->btree_cache_total      = btree_cache_total;
                __entry->btree_key_cache_dirty  = btree_key_cache_dirty;
                __entry->btree_key_cache_total  = btree_key_cache_total;
        ),
 
-       TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
+       TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->direct,
                  __entry->kicked,
                  __entry->min_nr,
                  __entry->min_key_cache,
-                 __entry->prereserved,
-                 __entry->prereserved_total,
                  __entry->btree_cache_dirty,
                  __entry->btree_cache_total,
                  __entry->btree_key_cache_dirty,
@@ -761,9 +754,9 @@ TRACE_EVENT(move_extent_fail,
        TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
 );
 
-DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
-       TP_PROTO(struct bch_fs *c, const char *k),
-       TP_ARGS(c, k)
+DEFINE_EVENT(bkey, move_extent_start_fail,
+       TP_PROTO(struct bch_fs *c, const char *str),
+       TP_ARGS(c, str)
 );
 
 TRACE_EVENT(move_data,
index a39ff0c296ecfb2a000edd6aace20bdbb8db20ea..79d982674c180307f5d5a4da42fabaa480878573 100644 (file)
@@ -552,6 +552,14 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
                s.v = v + 1;
                s.defined = true;
        } else {
+               /*
+                * Check if this option was set on the parent - if so, switched
+                * back to inheriting from the parent:
+                *
+                * rename() also has to deal with keeping inherited options up
+                * to date - see bch2_reinherit_attrs()
+                */
+               spin_lock(&dentry->d_lock);
                if (!IS_ROOT(dentry)) {
                        struct bch_inode_info *dir =
                                to_bch_ei(d_inode(dentry->d_parent));
@@ -560,6 +568,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
                } else {
                        s.v = 0;
                }
+               spin_unlock(&dentry->d_lock);
 
                s.defined = false;
        }
index 2a9344a3fcee929d971aa065e2400e8b431dff82..35c1d24d4a78424c0efba85fa2837f64da304d8f 100644 (file)
@@ -432,7 +432,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
        if (btrfs_block_can_be_shared(trans, root, buf)) {
                ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
                                               btrfs_header_level(buf), 1,
-                                              &refs, &flags);
+                                              &refs, &flags, NULL);
                if (ret)
                        return ret;
                if (unlikely(refs == 0)) {
index 9223934d95f4724cbbc4e726306049d444112ee1..891ea2fa263c935707be62167f59c2c1474e9a8d 100644 (file)
@@ -1041,7 +1041,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        }
 
-       if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) {
+       if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
@@ -1144,7 +1144,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        }
 
-       if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) {
+       if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
index 401ea09ae4b8ce284c1b07a1d24347159f9821dc..bbcc3df776461f5b6952422c246e65bbcbc1ccc6 100644 (file)
@@ -3213,6 +3213,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
                goto fail_alloc;
        }
 
+       btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid);
        /*
         * Verify the type first, if that or the checksum value are
         * corrupted, we'll find out
index c8e5b4715b495cbbcb536171fee0537be9b6505b..0455935ff558804b3e47291821a566e348d8a6d4 100644 (file)
@@ -102,7 +102,8 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
  */
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_fs_info *fs_info, u64 bytenr,
-                            u64 offset, int metadata, u64 *refs, u64 *flags)
+                            u64 offset, int metadata, u64 *refs, u64 *flags,
+                            u64 *owning_root)
 {
        struct btrfs_root *extent_root;
        struct btrfs_delayed_ref_head *head;
@@ -114,6 +115,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
        u32 item_size;
        u64 num_refs;
        u64 extent_flags;
+       u64 owner = 0;
        int ret;
 
        /*
@@ -167,6 +169,8 @@ search_again:
                                            struct btrfs_extent_item);
                        num_refs = btrfs_extent_refs(leaf, ei);
                        extent_flags = btrfs_extent_flags(leaf, ei);
+                       owner = btrfs_get_extent_owner_root(fs_info, leaf,
+                                                           path->slots[0]);
                } else {
                        ret = -EUCLEAN;
                        btrfs_err(fs_info,
@@ -226,6 +230,8 @@ out:
                *refs = num_refs;
        if (flags)
                *flags = extent_flags;
+       if (owning_root)
+               *owning_root = owner;
 out_free:
        btrfs_free_path(path);
        return ret;
@@ -5234,7 +5240,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
                /* We don't lock the tree block, it's OK to be racy here */
                ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
                                               wc->level - 1, 1, &refs,
-                                              &flags);
+                                              &flags, NULL);
                /* We don't care about errors in readahead. */
                if (ret < 0)
                        continue;
@@ -5301,7 +5307,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                ret = btrfs_lookup_extent_info(trans, fs_info,
                                               eb->start, level, 1,
                                               &wc->refs[level],
-                                              &wc->flags[level]);
+                                              &wc->flags[level],
+                                              NULL);
                BUG_ON(ret == -ENOMEM);
                if (ret)
                        return ret;
@@ -5391,6 +5398,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        u64 bytenr;
        u64 generation;
        u64 parent;
+       u64 owner_root = 0;
        struct btrfs_tree_parent_check check = { 0 };
        struct btrfs_key key;
        struct btrfs_ref ref = { 0 };
@@ -5434,7 +5442,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
        ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
                                       &wc->refs[level - 1],
-                                      &wc->flags[level - 1]);
+                                      &wc->flags[level - 1],
+                                      &owner_root);
        if (ret < 0)
                goto out_unlock;
 
@@ -5567,8 +5576,7 @@ skip:
                find_next_key(path, level, &wc->drop_progress);
 
                btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
-                                      fs_info->nodesize, parent,
-                                      btrfs_header_owner(next));
+                                      fs_info->nodesize, parent, owner_root);
                btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
                                    0, false);
                ret = btrfs_free_extent(trans, &ref);
@@ -5635,7 +5643,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        ret = btrfs_lookup_extent_info(trans, fs_info,
                                                       eb->start, level, 1,
                                                       &wc->refs[level],
-                                                      &wc->flags[level]);
+                                                      &wc->flags[level],
+                                                      NULL);
                        if (ret < 0) {
                                btrfs_tree_unlock_rw(eb, path->locks[level]);
                                path->locks[level] = 0;
@@ -5880,7 +5889,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
                        ret = btrfs_lookup_extent_info(trans, fs_info,
                                                path->nodes[level]->start,
                                                level, 1, &wc->refs[level],
-                                               &wc->flags[level]);
+                                               &wc->flags[level], NULL);
                        if (ret < 0) {
                                err = ret;
                                goto out_end_trans;
index 0716f65d9753bb91346dad8a724d40ff92c060b6..2e066035cceeeab346aad602a59433533f1de365 100644 (file)
@@ -99,7 +99,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_fs_info *fs_info, u64 bytenr,
-                            u64 offset, int metadata, u64 *refs, u64 *flags);
+                            u64 offset, int metadata, u64 *refs, u64 *flags,
+                            u64 *owner_root);
 int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
                     int reserved);
 int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
index 03cef28d9e3789d83e7037a7bcb4a818d00d4cbc..e6230a6ffa9859fd26046b12e9cc295e5cf78d35 100644 (file)
@@ -674,8 +674,8 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
  *             the array will be skipped
  *
  * Return: 0        if all pages were able to be allocated;
- *         -ENOMEM  otherwise, and the caller is responsible for freeing all
- *                  non-null page pointers in the array.
+ *         -ENOMEM  otherwise, the partially allocated pages would be freed and
+ *                  the array slots zeroed
  */
 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
 {
@@ -694,8 +694,13 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
                 * though alloc_pages_bulk_array() falls back to alloc_page()
                 * if  it could not bulk-allocate. So we must be out of memory.
                 */
-               if (allocated == last)
+               if (allocated == last) {
+                       for (int i = 0; i < allocated; i++) {
+                               __free_page(page_array[i]);
+                               page_array[i] = NULL;
+                       }
                        return -ENOMEM;
+               }
 
                memalloc_retry_wait(GFP_NOFS);
        }
index 5e3fccddde0c618e19567ea3138cd5980a88758d..9f5a9894f88f49156e9ddb7c4baac8ef1be252df 100644 (file)
@@ -6983,8 +6983,15 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
        int ret;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
+again:
        ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
                                   0, alloc_hint, &ins, 1, 1);
+       if (ret == -EAGAIN) {
+               ASSERT(btrfs_is_zoned(fs_info));
+               wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH,
+                              TASK_UNINTERRUPTIBLE);
+               goto again;
+       }
        if (ret)
                return ERR_PTR(ret);
 
index 752acff2c73436ae844958cf6b4889b5d964dddd..4e50b62db2a8feba629ee2ceb2040e28b4c2485b 100644 (file)
@@ -1528,7 +1528,7 @@ static noinline int key_in_sk(struct btrfs_key *key,
 static noinline int copy_to_sk(struct btrfs_path *path,
                               struct btrfs_key *key,
                               struct btrfs_ioctl_search_key *sk,
-                              size_t *buf_size,
+                              u64 *buf_size,
                               char __user *ubuf,
                               unsigned long *sk_offset,
                               int *num_found)
@@ -1660,7 +1660,7 @@ out:
 
 static noinline int search_ioctl(struct inode *inode,
                                 struct btrfs_ioctl_search_key *sk,
-                                size_t *buf_size,
+                                u64 *buf_size,
                                 char __user *ubuf)
 {
        struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
@@ -1733,7 +1733,7 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode,
        struct btrfs_ioctl_search_args __user *uargs = argp;
        struct btrfs_ioctl_search_key sk;
        int ret;
-       size_t buf_size;
+       u64 buf_size;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -1763,8 +1763,8 @@ static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
        struct btrfs_ioctl_search_args_v2 __user *uarg = argp;
        struct btrfs_ioctl_search_args_v2 args;
        int ret;
-       size_t buf_size;
-       const size_t buf_limit = SZ_16M;
+       u64 buf_size;
+       const u64 buf_limit = SZ_16M;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -4356,6 +4356,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
                arg->clone_sources = compat_ptr(args32.clone_sources);
                arg->parent_root = args32.parent_root;
                arg->flags = args32.flags;
+               arg->version = args32.version;
                memcpy(arg->reserved, args32.reserved,
                       sizeof(args32.reserved));
 #else
index edb84cc032377330bac7742d8a03424fa2e48061..ce446d9d7f23da3f44cb9c7100c8e0d7b3866684 100644 (file)
@@ -1888,7 +1888,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
        u64 bytenr = record->bytenr;
 
        if (!btrfs_qgroup_full_accounting(fs_info))
-               return 0;
+               return 1;
 
        lockdep_assert_held(&delayed_refs->lock);
        trace_btrfs_qgroup_trace_extent(fs_info, record);
@@ -2874,13 +2874,19 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
        qgroup_update_counters(fs_info, &qgroups, nr_old_roots, nr_new_roots,
                               num_bytes, seq);
 
+       /*
+        * We're done using the iterator, release all its qgroups while holding
+        * fs_info->qgroup_lock so that we don't race with btrfs_remove_qgroup()
+        * and trigger use-after-free accesses to qgroups.
+        */
+       qgroup_iterator_nested_clean(&qgroups);
+
        /*
         * Bump qgroup_seq to avoid seq overlap
         */
        fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
        spin_unlock(&fs_info->qgroup_lock);
 out_free:
-       qgroup_iterator_nested_clean(&qgroups);
        ulist_free(old_roots);
        ulist_free(new_roots);
        return ret;
index 944e8f1862aaaa69aec78c0de74ad6e9b1f42de6..9589362acfbf9e2ec3c99fdc4b1fb056bf3ddbc7 100644 (file)
@@ -145,7 +145,7 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
                btrfs_put_bioc(bioc);
        }
 
-       return ret;
+       return 0;
 }
 
 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
index 1f62976bee829a1865faca76f5059f66752300ae..6486f0d7e9931b4fafbc03ddc5ddca0863679d7a 100644 (file)
@@ -794,6 +794,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
                        dump_ref_action(fs_info, ra);
                        kfree(ref);
                        kfree(ra);
+                       kfree(re);
                        goto out_unlock;
                } else if (be->num_refs == 0) {
                        btrfs_err(fs_info,
@@ -803,6 +804,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
                        dump_ref_action(fs_info, ra);
                        kfree(ref);
                        kfree(ra);
+                       kfree(re);
                        goto out_unlock;
                }
 
index 9ce5be21b0360107d1a2416f06fa0cbfd1710030..f62a408671cbc3ba06bf244bb12de96bdfb4334a 100644 (file)
@@ -1868,6 +1868,9 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
         */
        ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES);
 
+       /* @found_logical_ret must be specified. */
+       ASSERT(found_logical_ret);
+
        stripe = &sctx->stripes[sctx->cur_stripe];
        scrub_reset_stripe(stripe);
        ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
@@ -1876,8 +1879,7 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
        /* Either >0 as no more extents or <0 for error. */
        if (ret)
                return ret;
-       if (found_logical_ret)
-               *found_logical_ret = stripe->logical;
+       *found_logical_ret = stripe->logical;
        sctx->cur_stripe++;
 
        /* We filled one group, submit it. */
@@ -2080,7 +2082,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 
        /* Go through each extent items inside the logical range */
        while (cur_logical < logical_end) {
-               u64 found_logical;
+               u64 found_logical = U64_MAX;
                u64 cur_physical = physical + cur_logical - logical_start;
 
                /* Canceled? */
@@ -2115,6 +2117,8 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
                if (ret < 0)
                        break;
 
+               /* queue_scrub_stripe() returned 0, @found_logical must be updated. */
+               ASSERT(found_logical != U64_MAX);
                cur_logical = found_logical + BTRFS_STRIPE_LEN;
 
                /* Don't hold CPU for too long time */
index 3b929f0e8f04f44ccb718d5c17d5910e7a8c7c85..4e36550618e580044fb0b0d573ddfee196cdca5d 100644 (file)
@@ -8158,7 +8158,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
        }
 
        sctx->send_filp = fget(arg->send_fd);
-       if (!sctx->send_filp) {
+       if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) {
                ret = -EBADF;
                goto out;
        }
index f638dc339693bc1a65c1d5637d58b998ad5b95d4..ef256b944c72aca283d00e9fd23d1af9a3adc2b6 100644 (file)
@@ -80,7 +80,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
 static void btrfs_put_super(struct super_block *sb)
 {
-       close_ctree(btrfs_sb(sb));
+       struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+
+       btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid);
+       close_ctree(fs_info);
 }
 
 enum {
index 6e63816dddcbea20f160a274cc8fa06f10ebd7a0..bfc0eb5e3b7c22f90ec83b4c5b53abd2fda5cd04 100644 (file)
@@ -1774,7 +1774,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        btrfs_release_path(path);
 
        ret = btrfs_create_qgroup(trans, objectid);
-       if (ret) {
+       if (ret && ret != -EEXIST) {
                btrfs_abort_transaction(trans, ret);
                goto fail;
        }
index a416cbea75d1ff4f009284dfbfc42650da493e45..50fdc69fdddf9d26014a65ed73c13fe694d05e4b 100644 (file)
@@ -31,6 +31,7 @@
 #include "inode-item.h"
 #include "dir-item.h"
 #include "raid-stripe-tree.h"
+#include "extent-tree.h"
 
 /*
  * Error message should follow the following format:
@@ -1276,6 +1277,8 @@ static int check_extent_item(struct extent_buffer *leaf,
        unsigned long ptr;      /* Current pointer inside inline refs */
        unsigned long end;      /* Extent item end */
        const u32 item_size = btrfs_item_size(leaf, slot);
+       u8 last_type = 0;
+       u64 last_seq = U64_MAX;
        u64 flags;
        u64 generation;
        u64 total_refs;         /* Total refs in btrfs_extent_item */
@@ -1322,6 +1325,18 @@ static int check_extent_item(struct extent_buffer *leaf,
         *    2.2) Ref type specific data
         *         Either using btrfs_extent_inline_ref::offset, or specific
         *         data structure.
+        *
+        *    All above inline items should follow the order:
+        *
+        *    - All btrfs_extent_inline_ref::type should be in an ascending
+        *      order
+        *
+        *    - Within the same type, the items should follow a descending
+        *      order by their sequence number. The sequence number is
+        *      determined by:
+        *      * btrfs_extent_inline_ref::offset for all types  other than
+        *        EXTENT_DATA_REF
+        *      * hash_extent_data_ref() for EXTENT_DATA_REF
         */
        if (unlikely(item_size < sizeof(*ei))) {
                extent_err(leaf, slot,
@@ -1403,6 +1418,7 @@ static int check_extent_item(struct extent_buffer *leaf,
                struct btrfs_extent_inline_ref *iref;
                struct btrfs_extent_data_ref *dref;
                struct btrfs_shared_data_ref *sref;
+               u64 seq;
                u64 dref_offset;
                u64 inline_offset;
                u8 inline_type;
@@ -1416,6 +1432,7 @@ static int check_extent_item(struct extent_buffer *leaf,
                iref = (struct btrfs_extent_inline_ref *)ptr;
                inline_type = btrfs_extent_inline_ref_type(leaf, iref);
                inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
+               seq = inline_offset;
                if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
                        extent_err(leaf, slot,
 "inline ref item overflows extent item, ptr %lu iref size %u end %lu",
@@ -1446,6 +1463,10 @@ static int check_extent_item(struct extent_buffer *leaf,
                case BTRFS_EXTENT_DATA_REF_KEY:
                        dref = (struct btrfs_extent_data_ref *)(&iref->offset);
                        dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
+                       seq = hash_extent_data_ref(
+                                       btrfs_extent_data_ref_root(leaf, dref),
+                                       btrfs_extent_data_ref_objectid(leaf, dref),
+                                       btrfs_extent_data_ref_offset(leaf, dref));
                        if (unlikely(!IS_ALIGNED(dref_offset,
                                                 fs_info->sectorsize))) {
                                extent_err(leaf, slot,
@@ -1475,6 +1496,24 @@ static int check_extent_item(struct extent_buffer *leaf,
                                   inline_type);
                        return -EUCLEAN;
                }
+               if (inline_type < last_type) {
+                       extent_err(leaf, slot,
+                                  "inline ref out-of-order: has type %u, prev type %u",
+                                  inline_type, last_type);
+                       return -EUCLEAN;
+               }
+               /* Type changed, allow the sequence starts from U64_MAX again. */
+               if (inline_type > last_type)
+                       last_seq = U64_MAX;
+               if (seq > last_seq) {
+                       extent_err(leaf, slot,
+"inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx",
+                                  inline_type, inline_offset, seq,
+                                  last_type, last_seq);
+                       return -EUCLEAN;
+               }
+               last_type = inline_type;
+               last_seq = seq;
                ptr += btrfs_extent_inline_ref_size(inline_type);
        }
        /* No padding is allowed */
index c87e18827a0a6cc411cd9d01411efe816a10c047..f627674b37db50bfd9f5e1149bb65d237e590f17 100644 (file)
@@ -748,13 +748,13 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 
        if (!fs_devices) {
                fs_devices = alloc_fs_devices(disk_super->fsid);
+               if (IS_ERR(fs_devices))
+                       return ERR_CAST(fs_devices);
+
                if (has_metadata_uuid)
                        memcpy(fs_devices->metadata_uuid,
                               disk_super->metadata_uuid, BTRFS_FSID_SIZE);
 
-               if (IS_ERR(fs_devices))
-                       return ERR_CAST(fs_devices);
-
                if (same_fsid_diff_dev) {
                        generate_random_uuid(fs_devices->fsid);
                        fs_devices->temp_fsid = true;
@@ -3006,15 +3006,16 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
        read_unlock(&em_tree->lock);
 
        if (!em) {
-               btrfs_crit(fs_info, "unable to find logical %llu length %llu",
+               btrfs_crit(fs_info,
+                          "unable to find chunk map for logical %llu length %llu",
                           logical, length);
                return ERR_PTR(-EINVAL);
        }
 
-       if (em->start > logical || em->start + em->len < logical) {
+       if (em->start > logical || em->start + em->len <= logical) {
                btrfs_crit(fs_info,
-                          "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
-                          logical, length, em->start, em->start + em->len);
+                          "found a bad chunk map, wanted %llu-%llu, found %llu-%llu",
+                          logical, logical + length, em->start, em->start + em->len);
                free_extent_map(em);
                return ERR_PTR(-EINVAL);
        }
index 3504ade30cb0c3426bcd64c0418b326f1d7a6153..188378ca19c7f67a1f2c519f0a90629a4107fb5d 100644 (file)
@@ -1661,13 +1661,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
        }
 
 out:
-       if (cache->alloc_offset > fs_info->zone_size) {
-               btrfs_err(fs_info,
-                       "zoned: invalid write pointer %llu in block group %llu",
-                       cache->alloc_offset, cache->start);
-               ret = -EIO;
-       }
-
        if (cache->alloc_offset > cache->zone_capacity) {
                btrfs_err(fs_info,
 "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
index c45e8c2d62e11655b4e340cb6e01edd71d9e9eed..a5ade8c163754bf09036d81eff378f3ff5c545cd 100644 (file)
@@ -84,6 +84,14 @@ int debugfs_file_get(struct dentry *dentry)
        struct debugfs_fsdata *fsd;
        void *d_fsd;
 
+       /*
+        * This could only happen if some debugfs user erroneously calls
+        * debugfs_file_get() on a dentry that isn't even a file, let
+        * them know about it.
+        */
+       if (WARN_ON(!d_is_reg(dentry)))
+               return -EINVAL;
+
        d_fsd = READ_ONCE(dentry->d_fsdata);
        if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) {
                fsd = d_fsd;
@@ -100,6 +108,14 @@ int debugfs_file_get(struct dentry *dentry)
                        kfree(fsd);
                        fsd = READ_ONCE(dentry->d_fsdata);
                }
+#ifdef CONFIG_LOCKDEP
+               fsd->lock_name = kasprintf(GFP_KERNEL, "debugfs:%pd", dentry);
+               lockdep_register_key(&fsd->key);
+               lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs",
+                                &fsd->key, 0);
+#endif
+               INIT_LIST_HEAD(&fsd->cancellations);
+               mutex_init(&fsd->cancellations_mtx);
        }
 
        /*
@@ -116,6 +132,8 @@ int debugfs_file_get(struct dentry *dentry)
        if (!refcount_inc_not_zero(&fsd->active_users))
                return -EIO;
 
+       lock_map_acquire_read(&fsd->lockdep_map);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(debugfs_file_get);
@@ -133,11 +151,93 @@ void debugfs_file_put(struct dentry *dentry)
 {
        struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata);
 
+       lock_map_release(&fsd->lockdep_map);
+
        if (refcount_dec_and_test(&fsd->active_users))
                complete(&fsd->active_users_drained);
 }
 EXPORT_SYMBOL_GPL(debugfs_file_put);
 
+/**
+ * debugfs_enter_cancellation - enter a debugfs cancellation
+ * @file: the file being accessed
+ * @cancellation: the cancellation object, the cancel callback
+ *     inside of it must be initialized
+ *
+ * When a debugfs file is removed it needs to wait for all active
+ * operations to complete. However, the operation itself may need
+ * to wait for hardware or completion of some asynchronous process
+ * or similar. As such, it may need to be cancelled to avoid long
+ * waits or even deadlocks.
+ *
+ * This function can be used inside a debugfs handler that may
+ * need to be cancelled. As soon as this function is called, the
+ * cancellation's 'cancel' callback may be called, at which point
+ * the caller should proceed to call debugfs_leave_cancellation()
+ * and leave the debugfs handler function as soon as possible.
+ * Note that the 'cancel' callback is only ever called in the
+ * context of some kind of debugfs_remove().
+ *
+ * This function must be paired with debugfs_leave_cancellation().
+ */
+void debugfs_enter_cancellation(struct file *file,
+                               struct debugfs_cancellation *cancellation)
+{
+       struct debugfs_fsdata *fsd;
+       struct dentry *dentry = F_DENTRY(file);
+
+       INIT_LIST_HEAD(&cancellation->list);
+
+       if (WARN_ON(!d_is_reg(dentry)))
+               return;
+
+       if (WARN_ON(!cancellation->cancel))
+               return;
+
+       fsd = READ_ONCE(dentry->d_fsdata);
+       if (WARN_ON(!fsd ||
+                   ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)))
+               return;
+
+       mutex_lock(&fsd->cancellations_mtx);
+       list_add(&cancellation->list, &fsd->cancellations);
+       mutex_unlock(&fsd->cancellations_mtx);
+
+       /* if we're already removing wake it up to cancel */
+       if (d_unlinked(dentry))
+               complete(&fsd->active_users_drained);
+}
+EXPORT_SYMBOL_GPL(debugfs_enter_cancellation);
+
+/**
+ * debugfs_leave_cancellation - leave cancellation section
+ * @file: the file being accessed
+ * @cancellation: the cancellation previously registered with
+ *     debugfs_enter_cancellation()
+ *
+ * See the documentation of debugfs_enter_cancellation().
+ */
+void debugfs_leave_cancellation(struct file *file,
+                               struct debugfs_cancellation *cancellation)
+{
+       struct debugfs_fsdata *fsd;
+       struct dentry *dentry = F_DENTRY(file);
+
+       if (WARN_ON(!d_is_reg(dentry)))
+               return;
+
+       fsd = READ_ONCE(dentry->d_fsdata);
+       if (WARN_ON(!fsd ||
+                   ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)))
+               return;
+
+       mutex_lock(&fsd->cancellations_mtx);
+       if (!list_empty(&cancellation->list))
+               list_del(&cancellation->list);
+       mutex_unlock(&fsd->cancellations_mtx);
+}
+EXPORT_SYMBOL_GPL(debugfs_leave_cancellation);
+
 /*
  * Only permit access to world-readable files when the kernel is locked down.
  * We also need to exclude any file that has ways to write or alter it as root
index 5d41765e0c77695c21a4b53fa4db052190a66597..e4e7fe1bd9fbfaa316364404a3dcfe1cd9961bfe 100644 (file)
@@ -236,17 +236,29 @@ static const struct super_operations debugfs_super_operations = {
 
 static void debugfs_release_dentry(struct dentry *dentry)
 {
-       void *fsd = dentry->d_fsdata;
+       struct debugfs_fsdata *fsd = dentry->d_fsdata;
 
-       if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))
-               kfree(dentry->d_fsdata);
+       if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
+               return;
+
+       /* check it wasn't a dir (no fsdata) or automount (no real_fops) */
+       if (fsd && fsd->real_fops) {
+#ifdef CONFIG_LOCKDEP
+               lockdep_unregister_key(&fsd->key);
+               kfree(fsd->lock_name);
+#endif
+               WARN_ON(!list_empty(&fsd->cancellations));
+               mutex_destroy(&fsd->cancellations_mtx);
+       }
+
+       kfree(fsd);
 }
 
 static struct vfsmount *debugfs_automount(struct path *path)
 {
-       debugfs_automount_t f;
-       f = (debugfs_automount_t)path->dentry->d_fsdata;
-       return f(path->dentry, d_inode(path->dentry)->i_private);
+       struct debugfs_fsdata *fsd = path->dentry->d_fsdata;
+
+       return fsd->automount(path->dentry, d_inode(path->dentry)->i_private);
 }
 
 static const struct dentry_operations debugfs_dops = {
@@ -634,13 +646,23 @@ struct dentry *debugfs_create_automount(const char *name,
                                        void *data)
 {
        struct dentry *dentry = start_creating(name, parent);
+       struct debugfs_fsdata *fsd;
        struct inode *inode;
 
        if (IS_ERR(dentry))
                return dentry;
 
+       fsd = kzalloc(sizeof(*fsd), GFP_KERNEL);
+       if (!fsd) {
+               failed_creating(dentry);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       fsd->automount = f;
+
        if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
                failed_creating(dentry);
+               kfree(fsd);
                return ERR_PTR(-EPERM);
        }
 
@@ -648,13 +670,14 @@ struct dentry *debugfs_create_automount(const char *name,
        if (unlikely(!inode)) {
                pr_err("out of free dentries, can not create automount '%s'\n",
                       name);
+               kfree(fsd);
                return failed_creating(dentry);
        }
 
        make_empty_dir_inode(inode);
        inode->i_flags |= S_AUTOMOUNT;
        inode->i_private = data;
-       dentry->d_fsdata = (void *)f;
+       dentry->d_fsdata = fsd;
        /* directory inodes start off with i_nlink == 2 (for "." entry) */
        inc_nlink(inode);
        d_instantiate(dentry, inode);
@@ -731,8 +754,40 @@ static void __debugfs_file_removed(struct dentry *dentry)
        fsd = READ_ONCE(dentry->d_fsdata);
        if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
                return;
-       if (!refcount_dec_and_test(&fsd->active_users))
+
+       lock_map_acquire(&fsd->lockdep_map);
+       lock_map_release(&fsd->lockdep_map);
+
+       /* if we hit zero, just wait for all to finish */
+       if (!refcount_dec_and_test(&fsd->active_users)) {
+               wait_for_completion(&fsd->active_users_drained);
+               return;
+       }
+
+       /* if we didn't hit zero, try to cancel any we can */
+       while (refcount_read(&fsd->active_users)) {
+               struct debugfs_cancellation *c;
+
+               /*
+                * Lock the cancellations. Note that the cancellations
+                * structs are meant to be on the stack, so we need to
+                * ensure we either use them here or don't touch them,
+                * and debugfs_leave_cancellation() will wait for this
+                * to be finished processing before exiting one. It may
+                * of course win and remove the cancellation, but then
+                * chances are we never even got into this bit, we only
+                * do if the refcount isn't zero already.
+                */
+               mutex_lock(&fsd->cancellations_mtx);
+               while ((c = list_first_entry_or_null(&fsd->cancellations,
+                                                    typeof(*c), list))) {
+                       list_del_init(&c->list);
+                       c->cancel(dentry, c->cancel_data);
+               }
+               mutex_unlock(&fsd->cancellations_mtx);
+
                wait_for_completion(&fsd->active_users_drained);
+       }
 }
 
 static void remove_one(struct dentry *victim)
index 92af8ae313134654e52bcb9f90b22280a6ae775e..0c4c68cf161f8742cf25c072291a26095e35f74e 100644 (file)
@@ -7,6 +7,8 @@
 
 #ifndef _DEBUGFS_INTERNAL_H_
 #define _DEBUGFS_INTERNAL_H_
+#include <linux/lockdep.h>
+#include <linux/list.h>
 
 struct file_operations;
 
@@ -17,8 +19,23 @@ extern const struct file_operations debugfs_full_proxy_file_operations;
 
 struct debugfs_fsdata {
        const struct file_operations *real_fops;
-       refcount_t active_users;
-       struct completion active_users_drained;
+       union {
+               /* automount_fn is used when real_fops is NULL */
+               debugfs_automount_t automount;
+               struct {
+                       refcount_t active_users;
+                       struct completion active_users_drained;
+#ifdef CONFIG_LOCKDEP
+                       struct lockdep_map lockdep_map;
+                       struct lock_class_key key;
+                       char *lock_name;
+#endif
+
+                       /* protect cancellations */
+                       struct mutex cancellations_mtx;
+                       struct list_head cancellations;
+               };
+       };
 };
 
 /*
index a25dd3d20008bfa40d2fc81b797f4dfa3f9118db..b0e8774c435a4b1c2cf54b0efb3cd514a673ec9d 100644 (file)
@@ -998,6 +998,14 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap,
        return rc;
 }
 
+static int ecryptfs_do_getattr(const struct path *path, struct kstat *stat,
+                              u32 request_mask, unsigned int flags)
+{
+       if (flags & AT_GETATTR_NOSEC)
+               return vfs_getattr_nosec(path, stat, request_mask, flags);
+       return vfs_getattr(path, stat, request_mask, flags);
+}
+
 static int ecryptfs_getattr(struct mnt_idmap *idmap,
                            const struct path *path, struct kstat *stat,
                            u32 request_mask, unsigned int flags)
@@ -1006,8 +1014,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap,
        struct kstat lower_stat;
        int rc;
 
-       rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat,
-                        request_mask, flags);
+       rc = ecryptfs_do_getattr(ecryptfs_dentry_to_lower_path(dentry),
+                                &lower_stat, request_mask, flags);
        if (!rc) {
                fsstack_copy_attr_all(d_inode(dentry),
                                      ecryptfs_inode_to_lower(d_inode(dentry)));
index e540648dedc28084596e97b2052124c76d247693..1d318f85232de9361714471ac973762ed2e6b0e6 100644 (file)
@@ -21,7 +21,7 @@ config EROFS_FS
          performance under extremely memory pressure without extra cost.
 
          See the documentation at <file:Documentation/filesystems/erofs.rst>
-         for more details.
+         and the web pages at <https://erofs.docs.kernel.org> for more details.
 
          If unsure, say N.
 
index 029c761670bfcd20389d7b1fb5ec75d32aa89376..c98aeda8abb215e9be577d1b27dea2713b0b6e87 100644 (file)
@@ -220,7 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
                        up_read(&devs->rwsem);
                        return 0;
                }
-               map->m_bdev = dif->bdev_handle->bdev;
+               map->m_bdev = dif->bdev_handle ? dif->bdev_handle->bdev : NULL;
                map->m_daxdev = dif->dax_dev;
                map->m_dax_part_off = dif->dax_part_off;
                map->m_fscache = dif->fscache;
@@ -238,7 +238,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
                        if (map->m_pa >= startoff &&
                            map->m_pa < startoff + length) {
                                map->m_pa -= startoff;
-                               map->m_bdev = dif->bdev_handle->bdev;
+                               map->m_bdev = dif->bdev_handle ?
+                                             dif->bdev_handle->bdev : NULL;
                                map->m_daxdev = dif->dax_dev;
                                map->m_dax_part_off = dif->dax_part_off;
                                map->m_fscache = dif->fscache;
index b8ad05b4509d58f1e64ab2a1994c405e0aa80b04..14a79d3226abf4d6ce49c27123bba14394bf5f29 100644 (file)
@@ -15,11 +15,11 @@ static void *erofs_read_inode(struct erofs_buf *buf,
        struct erofs_sb_info *sbi = EROFS_SB(sb);
        struct erofs_inode *vi = EROFS_I(inode);
        const erofs_off_t inode_loc = erofs_iloc(inode);
-
        erofs_blk_t blkaddr, nblks = 0;
        void *kaddr;
        struct erofs_inode_compact *dic;
        struct erofs_inode_extended *die, *copied = NULL;
+       union erofs_inode_i_u iu;
        unsigned int ifmt;
        int err;
 
@@ -35,9 +35,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 
        dic = kaddr + *ofs;
        ifmt = le16_to_cpu(dic->i_format);
-
        if (ifmt & ~EROFS_I_ALL) {
-               erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu",
+               erofs_err(sb, "unsupported i_format %u of nid %llu",
                          ifmt, vi->nid);
                err = -EOPNOTSUPP;
                goto err_out;
@@ -45,7 +44,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 
        vi->datalayout = erofs_inode_datalayout(ifmt);
        if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) {
-               erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu",
+               erofs_err(sb, "unsupported datalayout %u of nid %llu",
                          vi->datalayout, vi->nid);
                err = -EOPNOTSUPP;
                goto err_out;
@@ -82,40 +81,15 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
 
                inode->i_mode = le16_to_cpu(die->i_mode);
-               switch (inode->i_mode & S_IFMT) {
-               case S_IFREG:
-               case S_IFDIR:
-               case S_IFLNK:
-                       vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr);
-                       break;
-               case S_IFCHR:
-               case S_IFBLK:
-                       inode->i_rdev =
-                               new_decode_dev(le32_to_cpu(die->i_u.rdev));
-                       break;
-               case S_IFIFO:
-               case S_IFSOCK:
-                       inode->i_rdev = 0;
-                       break;
-               default:
-                       goto bogusimode;
-               }
+               iu = die->i_u;
                i_uid_write(inode, le32_to_cpu(die->i_uid));
                i_gid_write(inode, le32_to_cpu(die->i_gid));
                set_nlink(inode, le32_to_cpu(die->i_nlink));
-
-               /* extended inode has its own timestamp */
+               /* each extended inode has its own timestamp */
                inode_set_ctime(inode, le64_to_cpu(die->i_mtime),
                                le32_to_cpu(die->i_mtime_nsec));
 
                inode->i_size = le64_to_cpu(die->i_size);
-
-               /* total blocks for compressed files */
-               if (erofs_inode_is_data_compressed(vi->datalayout))
-                       nblks = le32_to_cpu(die->i_u.compressed_blocks);
-               else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
-                       /* fill chunked inode summary info */
-                       vi->chunkformat = le16_to_cpu(die->i_u.c.format);
                kfree(copied);
                copied = NULL;
                break;
@@ -125,49 +99,51 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
 
                inode->i_mode = le16_to_cpu(dic->i_mode);
-               switch (inode->i_mode & S_IFMT) {
-               case S_IFREG:
-               case S_IFDIR:
-               case S_IFLNK:
-                       vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr);
-                       break;
-               case S_IFCHR:
-               case S_IFBLK:
-                       inode->i_rdev =
-                               new_decode_dev(le32_to_cpu(dic->i_u.rdev));
-                       break;
-               case S_IFIFO:
-               case S_IFSOCK:
-                       inode->i_rdev = 0;
-                       break;
-               default:
-                       goto bogusimode;
-               }
+               iu = dic->i_u;
                i_uid_write(inode, le16_to_cpu(dic->i_uid));
                i_gid_write(inode, le16_to_cpu(dic->i_gid));
                set_nlink(inode, le16_to_cpu(dic->i_nlink));
-
                /* use build time for compact inodes */
                inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec);
 
                inode->i_size = le32_to_cpu(dic->i_size);
-               if (erofs_inode_is_data_compressed(vi->datalayout))
-                       nblks = le32_to_cpu(dic->i_u.compressed_blocks);
-               else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
-                       vi->chunkformat = le16_to_cpu(dic->i_u.c.format);
                break;
        default:
-               erofs_err(inode->i_sb,
-                         "unsupported on-disk inode version %u of nid %llu",
+               erofs_err(sb, "unsupported on-disk inode version %u of nid %llu",
                          erofs_inode_version(ifmt), vi->nid);
                err = -EOPNOTSUPP;
                goto err_out;
        }
 
-       if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
+       case S_IFDIR:
+       case S_IFLNK:
+               vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr);
+               break;
+       case S_IFCHR:
+       case S_IFBLK:
+               inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev));
+               break;
+       case S_IFIFO:
+       case S_IFSOCK:
+               inode->i_rdev = 0;
+               break;
+       default:
+               erofs_err(sb, "bogus i_mode (%o) @ nid %llu", inode->i_mode,
+                         vi->nid);
+               err = -EFSCORRUPTED;
+               goto err_out;
+       }
+
+       /* total blocks for compressed files */
+       if (erofs_inode_is_data_compressed(vi->datalayout)) {
+               nblks = le32_to_cpu(iu.compressed_blocks);
+       } else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+               /* fill chunked inode summary info */
+               vi->chunkformat = le16_to_cpu(iu.c.format);
                if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
-                       erofs_err(inode->i_sb,
-                                 "unsupported chunk format %x of nid %llu",
+                       erofs_err(sb, "unsupported chunk format %x of nid %llu",
                                  vi->chunkformat, vi->nid);
                        err = -EOPNOTSUPP;
                        goto err_out;
@@ -191,10 +167,6 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                inode->i_blocks = nblks << (sb->s_blocksize_bits - 9);
        return kaddr;
 
-bogusimode:
-       erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu",
-                 inode->i_mode, vi->nid);
-       err = -EFSCORRUPTED;
 err_out:
        DBG_BUGON(1);
        kfree(copied);
index 1039e5bf90afd3d5053ebf2f746a4a0ba68539b9..4ddc36f4dbd407950994b278d658ad453482efed 100644 (file)
@@ -258,7 +258,6 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
                        goto out_unlock;
                }
 
-               iocb->ki_pos += status;
                ret += status;
                endbyte = pos + status - 1;
                ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
index edcd8a61975f34c7a4cf467589848870430b3b8a..f238d987dec901150ac759d51a7996dc208bd1f8 100644 (file)
@@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        lockdep_set_class_and_name(&mapping->invalidate_lock,
                                   &sb->s_type->invalidate_lock_key,
                                   "mapping.invalidate_lock");
+       if (sb->s_iflags & SB_I_STABLE_WRITES)
+               mapping_set_stable_writes(mapping);
        inode->i_private = NULL;
        inode->i_mapping = mapping;
        INIT_HLIST_HEAD(&inode->i_dentry);      /* buggered by rcu freeing */
index e9440d55073c50962486f3f4f047a7872544fcff..c2aa6fd4795c44340fbc16c0c4138ff004b8f53e 100644 (file)
@@ -399,6 +399,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
                return -EINVAL;
        }
 
+       /* In this case, ->private_data is protected by f_pos_lock */
+       file->private_data = NULL;
        return vfs_setpos(file, offset, U32_MAX);
 }
 
@@ -428,7 +430,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
                          inode->i_ino, fs_umode_to_dtype(inode->i_mode));
 }
 
-static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 {
        struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
        XA_STATE(xas, &so_ctx->xa, ctx->pos);
@@ -437,7 +439,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
        while (true) {
                dentry = offset_find_next(&xas);
                if (!dentry)
-                       break;
+                       return ERR_PTR(-ENOENT);
 
                if (!offset_dir_emit(ctx, dentry)) {
                        dput(dentry);
@@ -447,6 +449,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
                dput(dentry);
                ctx->pos = xas.xa_index + 1;
        }
+       return NULL;
 }
 
 /**
@@ -479,7 +482,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
        if (!dir_emit_dots(file, ctx))
                return 0;
 
-       offset_iterate_dir(d_inode(dir), ctx);
+       /* In this case, ->private_data is protected by f_pos_lock */
+       if (ctx->pos == 2)
+               file->private_data = NULL;
+       else if (file->private_data == ERR_PTR(-ENOENT))
+               return 0;
+       file->private_data = offset_iterate_dir(d_inode(dir), ctx);
        return 0;
 }
 
index 929248c6ca84c47f828feecbdcbbb18286d5819f..4cbe0434cbb8ce973865153ddf4a9d7c332bea5e 100644 (file)
@@ -84,8 +84,8 @@ int   nfsd_net_reply_cache_init(struct nfsd_net *nn);
 void   nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
 int    nfsd_reply_cache_init(struct nfsd_net *);
 void   nfsd_reply_cache_shutdown(struct nfsd_net *);
-int    nfsd_cache_lookup(struct svc_rqst *rqstp,
-                         struct nfsd_cacherep **cacherep);
+int    nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+                         unsigned int len, struct nfsd_cacherep **cacherep);
 void   nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
                          int cachetype, __be32 *statp);
 int    nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
index 4045c852a450e7ab172f26ecfcfc5e2805bfb6df..40415929e2aef385fce9b658ba68b32425a6462f 100644 (file)
@@ -2804,7 +2804,7 @@ static int client_opens_release(struct inode *inode, struct file *file)
 
        /* XXX: alternatively, we could get/drop in seq start/stop */
        drop_client(clp);
-       return 0;
+       return seq_release(inode, file);
 }
 
 static const struct file_operations client_states_fops = {
index fd56a52aa5fb678859e5fd12eb8e00492e671372..d3273a3966598b83e46679102fa9b7e00d879184 100644 (file)
@@ -369,33 +369,52 @@ nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
        return freed;
 }
 
-/*
- * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
+/**
+ * nfsd_cache_csum - Checksum incoming NFS Call arguments
+ * @buf: buffer containing a whole RPC Call message
+ * @start: starting byte of the NFS Call header
+ * @remaining: size of the NFS Call header, in bytes
+ *
+ * Compute a weak checksum of the leading bytes of an NFS procedure
+ * call header to help verify that a retransmitted Call matches an
+ * entry in the duplicate reply cache.
+ *
+ * To avoid assumptions about how the RPC message is laid out in
+ * @buf and what else it might contain (eg, a GSS MIC suffix), the
+ * caller passes us the exact location and length of the NFS Call
+ * header.
+ *
+ * Returns a 32-bit checksum value, as defined in RFC 793.
  */
-static __wsum
-nfsd_cache_csum(struct svc_rqst *rqstp)
+static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start,
+                             unsigned int remaining)
 {
+       unsigned int base, len;
+       struct xdr_buf subbuf;
+       __wsum csum = 0;
+       void *p;
        int idx;
-       unsigned int base;
-       __wsum csum;
-       struct xdr_buf *buf = &rqstp->rq_arg;
-       const unsigned char *p = buf->head[0].iov_base;
-       size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
-                               RC_CSUMLEN);
-       size_t len = min(buf->head[0].iov_len, csum_len);
+
+       if (remaining > RC_CSUMLEN)
+               remaining = RC_CSUMLEN;
+       if (xdr_buf_subsegment(buf, &subbuf, start, remaining))
+               return csum;
 
        /* rq_arg.head first */
-       csum = csum_partial(p, len, 0);
-       csum_len -= len;
+       if (subbuf.head[0].iov_len) {
+               len = min_t(unsigned int, subbuf.head[0].iov_len, remaining);
+               csum = csum_partial(subbuf.head[0].iov_base, len, csum);
+               remaining -= len;
+       }
 
        /* Continue into page array */
-       idx = buf->page_base / PAGE_SIZE;
-       base = buf->page_base & ~PAGE_MASK;
-       while (csum_len) {
-               p = page_address(buf->pages[idx]) + base;
-               len = min_t(size_t, PAGE_SIZE - base, csum_len);
+       idx = subbuf.page_base / PAGE_SIZE;
+       base = subbuf.page_base & ~PAGE_MASK;
+       while (remaining) {
+               p = page_address(subbuf.pages[idx]) + base;
+               len = min_t(unsigned int, PAGE_SIZE - base, remaining);
                csum = csum_partial(p, len, csum);
-               csum_len -= len;
+               remaining -= len;
                base = 0;
                ++idx;
        }
@@ -466,6 +485,8 @@ out:
 /**
  * nfsd_cache_lookup - Find an entry in the duplicate reply cache
  * @rqstp: Incoming Call to find
+ * @start: starting byte in @rqstp->rq_arg of the NFS Call header
+ * @len: size of the NFS Call header, in bytes
  * @cacherep: OUT: DRC entry for this request
  *
  * Try to find an entry matching the current call in the cache. When none
@@ -479,7 +500,8 @@ out:
  *   %RC_REPLY: Reply from cache
  *   %RC_DROPIT: Do not process the request further
  */
-int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+                     unsigned int len, struct nfsd_cacherep **cacherep)
 {
        struct nfsd_net         *nn;
        struct nfsd_cacherep    *rp, *found;
@@ -495,7 +517,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
                goto out;
        }
 
-       csum = nfsd_cache_csum(rqstp);
+       csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
 
        /*
         * Since the common case is a cache miss followed by an insert,
@@ -641,24 +663,17 @@ void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
        return;
 }
 
-/*
- * Copy cached reply to current reply buffer. Should always fit.
- * FIXME as reply is in a page, we should just attach the page, and
- * keep a refcount....
- */
 static int
 nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
 {
-       struct kvec     *vec = &rqstp->rq_res.head[0];
-
-       if (vec->iov_len + data->iov_len > PAGE_SIZE) {
-               printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
-                               data->iov_len);
-               return 0;
-       }
-       memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
-       vec->iov_len += data->iov_len;
-       return 1;
+       __be32 *p;
+
+       p = xdr_reserve_space(&rqstp->rq_res_stream, data->iov_len);
+       if (unlikely(!p))
+               return false;
+       memcpy(p, data->iov_base, data->iov_len);
+       xdr_commit_encode(&rqstp->rq_res_stream);
+       return true;
 }
 
 /*
index d6122bb2d167b44b21e888ebb0fab8cda24c4cf8..fe61d9bbcc1faa2d704f9ec926812a022d78381d 100644 (file)
@@ -981,6 +981,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
        const struct svc_procedure *proc = rqstp->rq_procinfo;
        __be32 *statp = rqstp->rq_accept_statp;
        struct nfsd_cacherep *rp;
+       unsigned int start, len;
+       __be32 *nfs_reply;
 
        /*
         * Give the xdr decoder a chance to change this if it wants
@@ -988,6 +990,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
         */
        rqstp->rq_cachetype = proc->pc_cachetype;
 
+       /*
+        * ->pc_decode advances the argument stream past the NFS
+        * Call header, so grab the header's starting location and
+        * size now for the call to nfsd_cache_lookup().
+        */
+       start = xdr_stream_pos(&rqstp->rq_arg_stream);
+       len = xdr_stream_remaining(&rqstp->rq_arg_stream);
        if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
                goto out_decode_err;
 
@@ -1001,7 +1010,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
        smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
 
        rp = NULL;
-       switch (nfsd_cache_lookup(rqstp, &rp)) {
+       switch (nfsd_cache_lookup(rqstp, start, len, &rp)) {
        case RC_DOIT:
                break;
        case RC_REPLY:
@@ -1010,6 +1019,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
                goto out_dropit;
        }
 
+       nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
        *statp = proc->pc_func(rqstp);
        if (test_bit(RQ_DROPME, &rqstp->rq_flags))
                goto out_update_drop;
@@ -1023,7 +1033,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
         */
        smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
 
-       nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
+       nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
 out_cached_reply:
        return 1;
 
index 345b8f161ca4c3da0b81fb397e9ef3dc21dde0ea..c63b31a460befcc9f35a573618a1fb13cd20657f 100644 (file)
@@ -171,7 +171,7 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
 
        type = ovl_path_real(dentry, &realpath);
        old_cred = ovl_override_creds(dentry->d_sb);
-       err = vfs_getattr(&realpath, stat, request_mask, flags);
+       err = ovl_do_getattr(&realpath, stat, request_mask, flags);
        if (err)
                goto out;
 
@@ -196,8 +196,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
                                        (!is_dir ? STATX_NLINK : 0);
 
                        ovl_path_lower(dentry, &realpath);
-                       err = vfs_getattr(&realpath, &lowerstat,
-                                         lowermask, flags);
+                       err = ovl_do_getattr(&realpath, &lowerstat, lowermask,
+                                            flags);
                        if (err)
                                goto out;
 
@@ -249,8 +249,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
 
                        ovl_path_lowerdata(dentry, &realpath);
                        if (realpath.dentry) {
-                               err = vfs_getattr(&realpath, &lowerdatastat,
-                                                 lowermask, flags);
+                               err = ovl_do_getattr(&realpath, &lowerdatastat,
+                                                    lowermask, flags);
                                if (err)
                                        goto out;
                        } else {
index ca88b2636a5729604221000fe450729d661e8e7e..05c3dd597fa8d98650da5660dbdb9a786f583aad 100644 (file)
@@ -408,6 +408,14 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
        return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
 }
 
+static inline int ovl_do_getattr(const struct path *path, struct kstat *stat,
+                                u32 request_mask, unsigned int flags)
+{
+       if (flags & AT_GETATTR_NOSEC)
+               return vfs_getattr_nosec(path, stat, request_mask, flags);
+       return vfs_getattr(path, stat, request_mask, flags);
+}
+
 /* util.c */
 int ovl_get_write_access(struct dentry *dentry);
 void ovl_put_write_access(struct dentry *dentry);
index ddab9ea267d1283b73f8245aa1adb81232f2a202..3fe2dde1598f9ead48dbbaafcd2c9660fd358f00 100644 (file)
@@ -430,7 +430,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
        struct ovl_fs_context *ctx = fc->fs_private;
        struct ovl_fs_context_layer *l;
        char *dup = NULL, *iter;
-       ssize_t nr_lower = 0, nr = 0, nr_data = 0;
+       ssize_t nr_lower, nr;
        bool data_layer = false;
 
        /*
@@ -482,6 +482,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
        iter = dup;
        l = ctx->lower;
        for (nr = 0; nr < nr_lower; nr++, l++) {
+               ctx->nr++;
                memset(l, 0, sizeof(*l));
 
                err = ovl_mount_dir(iter, &l->path);
@@ -498,10 +499,10 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
                        goto out_put;
 
                if (data_layer)
-                       nr_data++;
+                       ctx->nr_data++;
 
                /* Calling strchr() again would overrun. */
-               if ((nr + 1) == nr_lower)
+               if (ctx->nr == nr_lower)
                        break;
 
                err = -EINVAL;
@@ -511,7 +512,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
                         * This is a regular layer so we require that
                         * there are no data layers.
                         */
-                       if ((ctx->nr_data + nr_data) > 0) {
+                       if (ctx->nr_data > 0) {
                                pr_err("regular lower layers cannot follow data lower layers");
                                goto out_put;
                        }
@@ -524,8 +525,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
                data_layer = true;
                iter++;
        }
-       ctx->nr = nr_lower;
-       ctx->nr_data += nr_data;
        kfree(dup);
        return 0;
 
index 50a201e9cd398aab74a61634f26e51a7f6b47893..c3f020ca13a8c705e312400390b5727e8b4dd312 100644 (file)
@@ -978,7 +978,7 @@ int ovl_set_protattr(struct inode *inode, struct dentry *upper,
        return 0;
 }
 
-/**
+/*
  * Caller must hold a reference to inode to prevent it from being freed while
  * it is marked inuse.
  */
index 6f3285f1dfee58390d157dc8223d269220a730e1..af7849e5974ff36619405a12e667e7543bb3926f 100644 (file)
@@ -64,8 +64,8 @@ struct key_type cifs_spnego_key_type = {
  * strlen(";sec=ntlmsspi") */
 #define MAX_MECH_STR_LEN       13
 
-/* strlen of "host=" */
-#define HOST_KEY_LEN           5
+/* strlen of ";host=" */
+#define HOST_KEY_LEN           6
 
 /* strlen of ";ip4=" or ";ip6=" */
 #define IP_KEY_LEN             5
index 6ffbd81bd109a0acf71f1ed1e60146c6538e1384..7558167f603c383d706a84b6ebbf4314091abe5b 100644 (file)
@@ -191,7 +191,13 @@ struct cifs_open_info_data {
                bool reparse_point;
                bool symlink;
        };
-       __u32 reparse_tag;
+       struct {
+               __u32 tag;
+               union {
+                       struct reparse_data_buffer *buf;
+                       struct reparse_posix_data *posix;
+               };
+       } reparse;
        char *symlink_target;
        union {
                struct smb2_file_all_info fi;
@@ -395,8 +401,7 @@ struct smb_version_operations {
                             struct cifs_tcon *tcon,
                             struct cifs_sb_info *cifs_sb,
                             const char *full_path,
-                            char **target_path,
-                            struct kvec *rsp_iov);
+                            char **target_path);
        /* open a file for non-posix mounts */
        int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock,
                    void *buf);
@@ -551,6 +556,9 @@ struct smb_version_operations {
        bool (*is_status_io_timeout)(char *buf);
        /* Check for STATUS_NETWORK_NAME_DELETED */
        bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
+       int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb,
+                                  struct kvec *rsp_iov,
+                                  struct cifs_open_info_data *data);
 };
 
 struct smb_version_values {
index a75220db5c1e1893ea852c0ad761a63bd7adbfdf..83ccc51a54d03363df2c6b22ad84494305259374 100644 (file)
@@ -1356,7 +1356,7 @@ typedef struct smb_com_transaction_ioctl_rsp {
        __le32 DataDisplacement;
        __u8 SetupCount;        /* 1 */
        __le16 ReturnedDataLen;
-       __u16 ByteCount;
+       __le16 ByteCount;
 } __attribute__((packed)) TRANSACT_IOCTL_RSP;
 
 #define CIFS_ACL_OWNER 1
@@ -1509,7 +1509,7 @@ struct reparse_posix_data {
        __le16  ReparseDataLength;
        __u16   Reserved;
        __le64  InodeType; /* LNK, FIFO, CHR etc. */
-       char    PathBuffer[];
+       __u8    DataBuffer[];
 } __attribute__((packed));
 
 struct cifs_quota_data {
index d87e2c26cce2b22dc8a2f85870410cf80176e6e5..46feaa0880bdf18f54eff409694c7d06c501ebf4 100644 (file)
@@ -210,7 +210,7 @@ int cifs_get_inode_info(struct inode **inode, const char *full_path,
                        const struct cifs_fid *fid);
 bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
                                 struct cifs_fattr *fattr,
-                                u32 tag);
+                                struct cifs_open_info_data *data);
 extern int smb311_posix_get_inode_info(struct inode **pinode, const char *search_path,
                        struct super_block *sb, unsigned int xid);
 extern int cifs_get_inode_info_unix(struct inode **pinode,
@@ -458,6 +458,12 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
                        struct cifs_tcon *tcon,
                        const unsigned char *searchName, char **syminfo,
                        const struct nls_table *nls_codepage, int remap);
+extern int cifs_query_reparse_point(const unsigned int xid,
+                                   struct cifs_tcon *tcon,
+                                   struct cifs_sb_info *cifs_sb,
+                                   const char *full_path,
+                                   u32 *tag, struct kvec *rsp,
+                                   int *rsp_buftype);
 extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
                               __u16 fid, char **symlinkinfo,
                               const struct nls_table *nls_codepage);
@@ -659,6 +665,12 @@ void cifs_put_tcp_super(struct super_block *sb);
 int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix);
 char *extract_hostname(const char *unc);
 char *extract_sharename(const char *unc);
+int parse_reparse_point(struct reparse_data_buffer *buf,
+                       u32 plen, struct cifs_sb_info *cifs_sb,
+                       bool unicode, struct cifs_open_info_data *data);
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+                      struct dentry *dentry, struct cifs_tcon *tcon,
+                      const char *full_path, umode_t mode, dev_t dev);
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
 static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
index 25503f1a4fd21313304fabe4b8d6fab89b231cd5..bad91ba6c3a9cc5bf1a1d97a3054fa263c426285 100644 (file)
@@ -2690,136 +2690,97 @@ querySymLinkRetry:
        return rc;
 }
 
-/*
- *     Recent Windows versions now create symlinks more frequently
- *     and they use the "reparse point" mechanism below.  We can of course
- *     do symlinks nicely to Samba and other servers which support the
- *     CIFS Unix Extensions and we can also do SFU symlinks and "client only"
- *     "MF" symlinks optionally, but for recent Windows we really need to
- *     reenable the code below and fix the cifs_symlink callers to handle this.
- *     In the interim this code has been moved to its own config option so
- *     it is not compiled in by default until callers fixed up and more tested.
- */
-int
-CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
-                   __u16 fid, char **symlinkinfo,
-                   const struct nls_table *nls_codepage)
+int cifs_query_reparse_point(const unsigned int xid,
+                            struct cifs_tcon *tcon,
+                            struct cifs_sb_info *cifs_sb,
+                            const char *full_path,
+                            u32 *tag, struct kvec *rsp,
+                            int *rsp_buftype)
 {
-       int rc = 0;
-       int bytes_returned;
-       struct smb_com_transaction_ioctl_req *pSMB;
-       struct smb_com_transaction_ioctl_rsp *pSMBr;
-       bool is_unicode;
-       unsigned int sub_len;
-       char *sub_start;
-       struct reparse_symlink_data *reparse_buf;
-       struct reparse_posix_data *posix_buf;
+       struct cifs_open_parms oparms;
+       TRANSACT_IOCTL_REQ *io_req = NULL;
+       TRANSACT_IOCTL_RSP *io_rsp = NULL;
+       struct cifs_fid fid;
        __u32 data_offset, data_count;
-       char *end_of_smb;
+       __u8 *start, *end;
+       int io_rsp_len;
+       int oplock = 0;
+       int rc;
 
-       cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid);
-       rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
-                     (void **) &pSMBr);
+       cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path);
+
+       if (cap_unix(tcon->ses))
+               return -EOPNOTSUPP;
+
+       oparms = (struct cifs_open_parms) {
+               .tcon = tcon,
+               .cifs_sb = cifs_sb,
+               .desired_access = FILE_READ_ATTRIBUTES,
+               .create_options = cifs_create_options(cifs_sb,
+                                                     OPEN_REPARSE_POINT),
+               .disposition = FILE_OPEN,
+               .path = full_path,
+               .fid = &fid,
+       };
+
+       rc = CIFS_open(xid, &oparms, &oplock, NULL);
        if (rc)
                return rc;
 
-       pSMB->TotalParameterCount = 0 ;
-       pSMB->TotalDataCount = 0;
-       pSMB->MaxParameterCount = cpu_to_le32(2);
-       /* BB find exact data count max from sess structure BB */
-       pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
-       pSMB->MaxSetupCount = 4;
-       pSMB->Reserved = 0;
-       pSMB->ParameterOffset = 0;
-       pSMB->DataCount = 0;
-       pSMB->DataOffset = 0;
-       pSMB->SetupCount = 4;
-       pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
-       pSMB->ParameterCount = pSMB->TotalParameterCount;
-       pSMB->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT);
-       pSMB->IsFsctl = 1; /* FSCTL */
-       pSMB->IsRootFlag = 0;
-       pSMB->Fid = fid; /* file handle always le */
-       pSMB->ByteCount = 0;
+       rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon,
+                     (void **)&io_req, (void **)&io_rsp);
+       if (rc)
+               goto error;
 
-       rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
-                        (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-       if (rc) {
-               cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc);
-               goto qreparse_out;
-       }
+       io_req->TotalParameterCount = 0;
+       io_req->TotalDataCount = 0;
+       io_req->MaxParameterCount = cpu_to_le32(2);
+       /* BB find exact data count max from sess structure BB */
+       io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
+       io_req->MaxSetupCount = 4;
+       io_req->Reserved = 0;
+       io_req->ParameterOffset = 0;
+       io_req->DataCount = 0;
+       io_req->DataOffset = 0;
+       io_req->SetupCount = 4;
+       io_req->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
+       io_req->ParameterCount = io_req->TotalParameterCount;
+       io_req->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT);
+       io_req->IsFsctl = 1;
+       io_req->IsRootFlag = 0;
+       io_req->Fid = fid.netfid;
+       io_req->ByteCount = 0;
+
+       rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)io_req,
+                        (struct smb_hdr *)io_rsp, &io_rsp_len, 0);
+       if (rc)
+               goto error;
 
-       data_offset = le32_to_cpu(pSMBr->DataOffset);
-       data_count = le32_to_cpu(pSMBr->DataCount);
-       if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
-               /* BB also check enough total bytes returned */
-               rc = -EIO;      /* bad smb */
-               goto qreparse_out;
-       }
-       if (!data_count || (data_count > 2048)) {
+       data_offset = le32_to_cpu(io_rsp->DataOffset);
+       data_count = le32_to_cpu(io_rsp->DataCount);
+       if (get_bcc(&io_rsp->hdr) < 2 || data_offset > 512 ||
+           !data_count || data_count > 2048) {
                rc = -EIO;
-               cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n");
-               goto qreparse_out;
-       }
-       end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
-       reparse_buf = (struct reparse_symlink_data *)
-                               ((char *)&pSMBr->hdr.Protocol + data_offset);
-       if ((char *)reparse_buf >= end_of_smb) {
-               rc = -EIO;
-               goto qreparse_out;
-       }
-       if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) {
-               cifs_dbg(FYI, "NFS style reparse tag\n");
-               posix_buf =  (struct reparse_posix_data *)reparse_buf;
-
-               if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) {
-                       cifs_dbg(FYI, "unsupported file type 0x%llx\n",
-                                le64_to_cpu(posix_buf->InodeType));
-                       rc = -EOPNOTSUPP;
-                       goto qreparse_out;
-               }
-               is_unicode = true;
-               sub_len = le16_to_cpu(reparse_buf->ReparseDataLength);
-               if (posix_buf->PathBuffer + sub_len > end_of_smb) {
-                       cifs_dbg(FYI, "reparse buf beyond SMB\n");
-                       rc = -EIO;
-                       goto qreparse_out;
-               }
-               *symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer,
-                               sub_len, is_unicode, nls_codepage);
-               goto qreparse_out;
-       } else if (reparse_buf->ReparseTag !=
-                       cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) {
-               rc = -EOPNOTSUPP;
-               goto qreparse_out;
+               goto error;
        }
 
-       /* Reparse tag is NTFS symlink */
-       sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) +
-                               reparse_buf->PathBuffer;
-       sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength);
-       if (sub_start + sub_len > end_of_smb) {
-               cifs_dbg(FYI, "reparse buf beyond SMB\n");
+       end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount;
+       start = (__u8 *)&io_rsp->hdr.Protocol + data_offset;
+       if (start >= end) {
                rc = -EIO;
-               goto qreparse_out;
+               goto error;
        }
-       if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
-               is_unicode = true;
-       else
-               is_unicode = false;
-
-       /* BB FIXME investigate remapping reserved chars here */
-       *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode,
-                                              nls_codepage);
-       if (!*symlinkinfo)
-               rc = -ENOMEM;
-qreparse_out:
-       cifs_buf_release(pSMB);
 
-       /*
-        * Note: On -EAGAIN error only caller can retry on handle based calls
-        * since file handle passed in no longer valid.
-        */
+       *tag = le32_to_cpu(((struct reparse_data_buffer *)start)->ReparseTag);
+       rsp->iov_base = io_rsp;
+       rsp->iov_len = io_rsp_len;
+       *rsp_buftype = CIFS_LARGE_BUFFER;
+       CIFSSMBClose(xid, tcon, fid.netfid);
+       return 0;
+
+error:
+       cifs_buf_release(io_req);
+       CIFSSMBClose(xid, tcon, fid.netfid);
        return rc;
 }
 
index 57c2a7df34578370a046822a9e7844a87b2af8e4..f896f60c924bfa462e8fa764fdf806bcd67b0cfb 100644 (file)
@@ -2065,6 +2065,12 @@ void __cifs_put_smb_ses(struct cifs_ses *ses)
                ses->chans[i].server = NULL;
        }
 
+       /* we now account for primary channel in iface->refcount */
+       if (ses->chans[0].iface) {
+               kref_put(&ses->chans[0].iface->refcount, release_iface);
+               ses->chans[0].server = NULL;
+       }
+
        sesInfoFree(ses);
        cifs_put_tcp_session(server, 0);
 }
index 86fbd3f847d657f33558455ac8e3ef9b2c991592..47f49be69ced2563b87161538cba78800375eca6 100644 (file)
@@ -459,8 +459,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path,
                        return -EOPNOTSUPP;
                rc = server->ops->query_symlink(xid, tcon,
                                                cifs_sb, full_path,
-                                               &fattr->cf_symlink_target,
-                                               NULL);
+                                               &fattr->cf_symlink_target);
                cifs_dbg(FYI, "%s: query_symlink: %d\n", __func__, rc);
        }
        return rc;
@@ -722,10 +721,51 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr,
                fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink);
 }
 
+static inline dev_t nfs_mkdev(struct reparse_posix_data *buf)
+{
+       u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer);
+
+       return MKDEV(v >> 32, v & 0xffffffff);
+}
+
 bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
                                 struct cifs_fattr *fattr,
-                                u32 tag)
+                                struct cifs_open_info_data *data)
 {
+       struct reparse_posix_data *buf = data->reparse.posix;
+       u32 tag = data->reparse.tag;
+
+       if (tag == IO_REPARSE_TAG_NFS && buf) {
+               switch (le64_to_cpu(buf->InodeType)) {
+               case NFS_SPECFILE_CHR:
+                       fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_CHR;
+                       fattr->cf_rdev = nfs_mkdev(buf);
+                       break;
+               case NFS_SPECFILE_BLK:
+                       fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_BLK;
+                       fattr->cf_rdev = nfs_mkdev(buf);
+                       break;
+               case NFS_SPECFILE_FIFO:
+                       fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_FIFO;
+                       break;
+               case NFS_SPECFILE_SOCK:
+                       fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_SOCK;
+                       break;
+               case NFS_SPECFILE_LNK:
+                       fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_LNK;
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+                       return false;
+               }
+               return true;
+       }
+
        switch (tag) {
        case IO_REPARSE_TAG_LX_SYMLINK:
                fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode;
@@ -791,7 +831,7 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr,
        fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
 
        if (cifs_open_data_reparse(data) &&
-           cifs_reparse_point_to_fattr(cifs_sb, fattr, data->reparse_tag))
+           cifs_reparse_point_to_fattr(cifs_sb, fattr, data))
                goto out_reparse;
 
        if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
@@ -856,7 +896,7 @@ cifs_get_file_info(struct file *filp)
                data.adjust_tz = false;
                if (data.symlink_target) {
                        data.symlink = true;
-                       data.reparse_tag = IO_REPARSE_TAG_SYMLINK;
+                       data.reparse.tag = IO_REPARSE_TAG_SYMLINK;
                }
                cifs_open_info_to_fattr(&fattr, &data, inode->i_sb);
                break;
@@ -1025,7 +1065,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
        struct kvec rsp_iov, *iov = NULL;
        int rsp_buftype = CIFS_NO_BUFFER;
-       u32 tag = data->reparse_tag;
+       u32 tag = data->reparse.tag;
        int rc = 0;
 
        if (!tag && server->ops->query_reparse_point) {
@@ -1035,22 +1075,28 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
                if (!rc)
                        iov = &rsp_iov;
        }
-       switch ((data->reparse_tag = tag)) {
+
+       rc = -EOPNOTSUPP;
+       switch ((data->reparse.tag = tag)) {
        case 0: /* SMB1 symlink */
-               iov = NULL;
-               fallthrough;
-       case IO_REPARSE_TAG_NFS:
-       case IO_REPARSE_TAG_SYMLINK:
-               if (!data->symlink_target && server->ops->query_symlink) {
+               if (server->ops->query_symlink) {
                        rc = server->ops->query_symlink(xid, tcon,
                                                        cifs_sb, full_path,
-                                                       &data->symlink_target,
-                                                       iov);
+                                                       &data->symlink_target);
                }
                break;
        case IO_REPARSE_TAG_MOUNT_POINT:
                cifs_create_junction_fattr(fattr, sb);
+               rc = 0;
                goto out;
+       default:
+               if (data->symlink_target) {
+                       rc = 0;
+               } else if (server->ops->parse_reparse_point) {
+                       rc = server->ops->parse_reparse_point(cifs_sb,
+                                                             iov, data);
+               }
+               break;
        }
 
        cifs_open_info_to_fattr(fattr, data, sb);
index 47fc22de8d20c79c6d03647cfad95c4b392bb584..d30ea2005eb361a9d9af8ba39568d4168409a3ee 100644 (file)
@@ -153,6 +153,10 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
 static void
 cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 {
+       struct cifs_open_info_data data = {
+               .reparse = { .tag = fattr->cf_cifstag, },
+       };
+
        fattr->cf_uid = cifs_sb->ctx->linux_uid;
        fattr->cf_gid = cifs_sb->ctx->linux_gid;
 
@@ -165,7 +169,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
         * reasonably map some of them to directories vs. files vs. symlinks
         */
        if ((fattr->cf_cifsattrs & ATTR_REPARSE) &&
-           cifs_reparse_point_to_fattr(cifs_sb, fattr, fattr->cf_cifstag))
+           cifs_reparse_point_to_fattr(cifs_sb, fattr, &data))
                goto out_reparse;
 
        if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
index 0bb2ac9290617941619228caea487d7ba6f41ea6..816e01c5589b450a9a5b6887c99aec4f48a5f361 100644 (file)
@@ -322,28 +322,32 @@ cifs_disable_secondary_channels(struct cifs_ses *ses)
                iface = ses->chans[i].iface;
                server = ses->chans[i].server;
 
+               /*
+                * remove these references first, since we need to unlock
+                * the chan_lock here, since iface_lock is a higher lock
+                */
+               ses->chans[i].iface = NULL;
+               ses->chans[i].server = NULL;
+               spin_unlock(&ses->chan_lock);
+
                if (iface) {
                        spin_lock(&ses->iface_lock);
-                       kref_put(&iface->refcount, release_iface);
-                       ses->chans[i].iface = NULL;
                        iface->num_channels--;
                        if (iface->weight_fulfilled)
                                iface->weight_fulfilled--;
+                       kref_put(&iface->refcount, release_iface);
                        spin_unlock(&ses->iface_lock);
                }
 
-               spin_unlock(&ses->chan_lock);
-               if (server && !server->terminate) {
-                       server->terminate = true;
-                       cifs_signal_cifsd_for_reconnect(server, false);
-               }
-               spin_lock(&ses->chan_lock);
-
                if (server) {
-                       ses->chans[i].server = NULL;
+                       if (!server->terminate) {
+                               server->terminate = true;
+                               cifs_signal_cifsd_for_reconnect(server, false);
+                       }
                        cifs_put_tcp_session(server, false);
                }
 
+               spin_lock(&ses->chan_lock);
        }
 
 done:
index 9bf8735cdd1e8f61f737435fd82a0cd2a8bed73a..a9eaba8083b0d6b2745ebedb1d3d4705c0f4809d 100644 (file)
@@ -976,64 +976,37 @@ static int cifs_query_symlink(const unsigned int xid,
                              struct cifs_tcon *tcon,
                              struct cifs_sb_info *cifs_sb,
                              const char *full_path,
-                             char **target_path,
-                             struct kvec *rsp_iov)
+                             char **target_path)
 {
        int rc;
-       int oplock = 0;
-       bool is_reparse_point = !!rsp_iov;
-       struct cifs_fid fid;
-       struct cifs_open_parms oparms;
 
-       cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+       cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path);
 
-       if (is_reparse_point) {
-               cifs_dbg(VFS, "reparse points not handled for SMB1 symlinks\n");
+       if (!cap_unix(tcon->ses))
                return -EOPNOTSUPP;
-       }
-
-       /* Check for unix extensions */
-       if (cap_unix(tcon->ses)) {
-               rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
-                                            cifs_sb->local_nls,
-                                            cifs_remap(cifs_sb));
-               if (rc == -EREMOTE)
-                       rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
-                                                   target_path,
-                                                   cifs_sb->local_nls);
-
-               goto out;
-       }
-
-       oparms = (struct cifs_open_parms) {
-               .tcon = tcon,
-               .cifs_sb = cifs_sb,
-               .desired_access = FILE_READ_ATTRIBUTES,
-               .create_options = cifs_create_options(cifs_sb,
-                                                     OPEN_REPARSE_POINT),
-               .disposition = FILE_OPEN,
-               .path = full_path,
-               .fid = &fid,
-       };
-
-       rc = CIFS_open(xid, &oparms, &oplock, NULL);
-       if (rc)
-               goto out;
-
-       rc = CIFSSMBQuerySymLink(xid, tcon, fid.netfid, target_path,
-                                cifs_sb->local_nls);
-       if (rc)
-               goto out_close;
 
-       convert_delimiter(*target_path, '/');
-out_close:
-       CIFSSMBClose(xid, tcon, fid.netfid);
-out:
-       if (!rc)
-               cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+       rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
+                                    cifs_sb->local_nls, cifs_remap(cifs_sb));
+       if (rc == -EREMOTE)
+               rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
+                                           target_path, cifs_sb->local_nls);
        return rc;
 }
 
+static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb,
+                                   struct kvec *rsp_iov,
+                                   struct cifs_open_info_data *data)
+{
+       struct reparse_data_buffer *buf;
+       TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base;
+       bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE);
+       u32 plen = le16_to_cpu(io->ByteCount);
+
+       buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol +
+                                            le32_to_cpu(io->DataOffset));
+       return parse_reparse_point(buf, plen, cifs_sb, unicode, data);
+}
+
 static bool
 cifs_is_read_op(__u32 oplock)
 {
@@ -1068,15 +1041,7 @@ cifs_make_node(unsigned int xid, struct inode *inode,
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct inode *newinode = NULL;
-       int rc = -EPERM;
-       struct cifs_open_info_data buf = {};
-       struct cifs_io_parms io_parms;
-       __u32 oplock = 0;
-       struct cifs_fid fid;
-       struct cifs_open_parms oparms;
-       unsigned int bytes_written;
-       struct win_dev *pdev;
-       struct kvec iov[2];
+       int rc;
 
        if (tcon->unix_ext) {
                /*
@@ -1110,74 +1075,18 @@ cifs_make_node(unsigned int xid, struct inode *inode,
                        d_instantiate(dentry, newinode);
                return rc;
        }
-
        /*
-        * SMB1 SFU emulation: should work with all servers, but only
-        * support block and char device (no socket & fifo)
+        * Check if mounted with mount parm 'sfu' mount parm.
+        * SFU emulation should work with all servers, but only
+        * supports block and char device (no socket & fifo),
+        * and was used by default in earlier versions of Windows
         */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
-               return rc;
-
-       if (!S_ISCHR(mode) && !S_ISBLK(mode))
-               return rc;
-
-       cifs_dbg(FYI, "sfu compat create special file\n");
-
-       oparms = (struct cifs_open_parms) {
-               .tcon = tcon,
-               .cifs_sb = cifs_sb,
-               .desired_access = GENERIC_WRITE,
-               .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
-                                                     CREATE_OPTION_SPECIAL),
-               .disposition = FILE_CREATE,
-               .path = full_path,
-               .fid = &fid,
-       };
-
-       if (tcon->ses->server->oplocks)
-               oplock = REQ_OPLOCK;
-       else
-               oplock = 0;
-       rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
-       if (rc)
-               return rc;
-
-       /*
-        * BB Do not bother to decode buf since no local inode yet to put
-        * timestamps in, but we can reuse it safely.
-        */
-
-       pdev = (struct win_dev *)&buf.fi;
-       io_parms.pid = current->tgid;
-       io_parms.tcon = tcon;
-       io_parms.offset = 0;
-       io_parms.length = sizeof(struct win_dev);
-       iov[1].iov_base = &buf.fi;
-       iov[1].iov_len = sizeof(struct win_dev);
-       if (S_ISCHR(mode)) {
-               memcpy(pdev->type, "IntxCHR", 8);
-               pdev->major = cpu_to_le64(MAJOR(dev));
-               pdev->minor = cpu_to_le64(MINOR(dev));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
-       } else if (S_ISBLK(mode)) {
-               memcpy(pdev->type, "IntxBLK", 8);
-               pdev->major = cpu_to_le64(MAJOR(dev));
-               pdev->minor = cpu_to_le64(MINOR(dev));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
-       }
-       tcon->ses->server->ops->close(xid, tcon, &fid);
-       d_drop(dentry);
-
-       /* FIXME: add code here to set EAs */
-
-       cifs_free_open_info(&buf);
-       return rc;
+               return -EPERM;
+       return cifs_sfu_make_node(xid, inode, dentry, tcon,
+                                 full_path, mode, dev);
 }
 
-
-
 struct smb_version_operations smb1_operations = {
        .send_cancel = send_nt_cancel,
        .compare_fids = cifs_compare_fids,
@@ -1214,6 +1123,7 @@ struct smb_version_operations smb1_operations = {
        .is_path_accessible = cifs_is_path_accessible,
        .can_echo = cifs_can_echo,
        .query_path_info = cifs_query_path_info,
+       .query_reparse_point = cifs_query_reparse_point,
        .query_file_info = cifs_query_file_info,
        .get_srv_inum = cifs_get_srv_inum,
        .set_path_size = CIFSSMBSetEOF,
@@ -1229,6 +1139,7 @@ struct smb_version_operations smb1_operations = {
        .rename = CIFSSMBRename,
        .create_hardlink = CIFSCreateHardLink,
        .query_symlink = cifs_query_symlink,
+       .parse_reparse_point = cifs_parse_reparse_point,
        .open = cifs_open_file,
        .set_fid = cifs_set_fid,
        .close = cifs_close_file,
index 0b89f7008ac0f429cc1a6b004c70117e19658837..c94940af5d4b8d79453ac766e4e46216b714b47a 100644 (file)
@@ -555,7 +555,7 @@ static int parse_create_response(struct cifs_open_info_data *data,
                break;
        }
        data->reparse_point = reparse_point;
-       data->reparse_tag = tag;
+       data->reparse.tag = tag;
        return rc;
 }
 
index a959ed2c9b22e44a59dc31950c373c2661cc425e..82ab62fd00404d76d11e34632b47f8d6ac182992 100644 (file)
@@ -2866,115 +2866,119 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
        return rc;
 }
 
-static int
-parse_reparse_posix(struct reparse_posix_data *symlink_buf,
-                     u32 plen, char **target_path,
-                     struct cifs_sb_info *cifs_sb)
+/* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */
+static int parse_reparse_posix(struct reparse_posix_data *buf,
+                              struct cifs_sb_info *cifs_sb,
+                              struct cifs_open_info_data *data)
 {
        unsigned int len;
-
-       /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */
-       len = le16_to_cpu(symlink_buf->ReparseDataLength);
-
-       if (le64_to_cpu(symlink_buf->InodeType) != NFS_SPECFILE_LNK) {
-               cifs_dbg(VFS, "%lld not a supported symlink type\n",
-                       le64_to_cpu(symlink_buf->InodeType));
+       u64 type;
+
+       switch ((type = le64_to_cpu(buf->InodeType))) {
+       case NFS_SPECFILE_LNK:
+               len = le16_to_cpu(buf->ReparseDataLength);
+               data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer,
+                                                              len, true,
+                                                              cifs_sb->local_nls);
+               if (!data->symlink_target)
+                       return -ENOMEM;
+               convert_delimiter(data->symlink_target, '/');
+               cifs_dbg(FYI, "%s: target path: %s\n",
+                        __func__, data->symlink_target);
+               break;
+       case NFS_SPECFILE_CHR:
+       case NFS_SPECFILE_BLK:
+       case NFS_SPECFILE_FIFO:
+       case NFS_SPECFILE_SOCK:
+               break;
+       default:
+               cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n",
+                        __func__, type);
                return -EOPNOTSUPP;
        }
-
-       *target_path = cifs_strndup_from_utf16(
-                               symlink_buf->PathBuffer,
-                               len, true, cifs_sb->local_nls);
-       if (!(*target_path))
-               return -ENOMEM;
-
-       convert_delimiter(*target_path, '/');
-       cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
-
        return 0;
 }
 
-static int
-parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf,
-                     u32 plen, char **target_path,
-                     struct cifs_sb_info *cifs_sb)
+static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym,
+                                u32 plen, bool unicode,
+                                struct cifs_sb_info *cifs_sb,
+                                struct cifs_open_info_data *data)
 {
-       unsigned int sub_len;
-       unsigned int sub_offset;
+       unsigned int len;
+       unsigned int offs;
 
        /* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */
 
-       sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset);
-       sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength);
-       if (sub_offset + 20 > plen ||
-           sub_offset + sub_len + 20 > plen) {
+       offs = le16_to_cpu(sym->SubstituteNameOffset);
+       len = le16_to_cpu(sym->SubstituteNameLength);
+       if (offs + 20 > plen || offs + len + 20 > plen) {
                cifs_dbg(VFS, "srv returned malformed symlink buffer\n");
                return -EIO;
        }
 
-       *target_path = cifs_strndup_from_utf16(
-                               symlink_buf->PathBuffer + sub_offset,
-                               sub_len, true, cifs_sb->local_nls);
-       if (!(*target_path))
+       data->symlink_target = cifs_strndup_from_utf16(sym->PathBuffer + offs,
+                                                      len, unicode,
+                                                      cifs_sb->local_nls);
+       if (!data->symlink_target)
                return -ENOMEM;
 
-       convert_delimiter(*target_path, '/');
-       cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+       convert_delimiter(data->symlink_target, '/');
+       cifs_dbg(FYI, "%s: target path: %s\n", __func__, data->symlink_target);
 
        return 0;
 }
 
-static int
-parse_reparse_point(struct reparse_data_buffer *buf,
-                   u32 plen, char **target_path,
-                   struct cifs_sb_info *cifs_sb)
+int parse_reparse_point(struct reparse_data_buffer *buf,
+                       u32 plen, struct cifs_sb_info *cifs_sb,
+                       bool unicode, struct cifs_open_info_data *data)
 {
-       if (plen < sizeof(struct reparse_data_buffer)) {
-               cifs_dbg(VFS, "reparse buffer is too small. Must be at least 8 bytes but was %d\n",
-                        plen);
+       if (plen < sizeof(*buf)) {
+               cifs_dbg(VFS, "%s: reparse buffer is too small. Must be at least 8 bytes but was %d\n",
+                        __func__, plen);
                return -EIO;
        }
 
-       if (plen < le16_to_cpu(buf->ReparseDataLength) +
-           sizeof(struct reparse_data_buffer)) {
-               cifs_dbg(VFS, "srv returned invalid reparse buf length: %d\n",
-                        plen);
+       if (plen < le16_to_cpu(buf->ReparseDataLength) + sizeof(*buf)) {
+               cifs_dbg(VFS, "%s: invalid reparse buf length: %d\n",
+                        __func__, plen);
                return -EIO;
        }
 
+       data->reparse.buf = buf;
+
        /* See MS-FSCC 2.1.2 */
        switch (le32_to_cpu(buf->ReparseTag)) {
        case IO_REPARSE_TAG_NFS:
-               return parse_reparse_posix(
-                       (struct reparse_posix_data *)buf,
-                       plen, target_path, cifs_sb);
+               return parse_reparse_posix((struct reparse_posix_data *)buf,
+                                          cifs_sb, data);
        case IO_REPARSE_TAG_SYMLINK:
                return parse_reparse_symlink(
                        (struct reparse_symlink_data_buffer *)buf,
-                       plen, target_path, cifs_sb);
+                       plen, unicode, cifs_sb, data);
+       case IO_REPARSE_TAG_LX_SYMLINK:
+       case IO_REPARSE_TAG_AF_UNIX:
+       case IO_REPARSE_TAG_LX_FIFO:
+       case IO_REPARSE_TAG_LX_CHR:
+       case IO_REPARSE_TAG_LX_BLK:
+               return 0;
        default:
-               cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n",
-                        le32_to_cpu(buf->ReparseTag));
+               cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n",
+                        __func__, le32_to_cpu(buf->ReparseTag));
                return -EOPNOTSUPP;
        }
 }
 
-static int smb2_query_symlink(const unsigned int xid,
-                             struct cifs_tcon *tcon,
-                             struct cifs_sb_info *cifs_sb,
-                             const char *full_path,
-                             char **target_path,
-                             struct kvec *rsp_iov)
+static int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
+                                   struct kvec *rsp_iov,
+                                   struct cifs_open_info_data *data)
 {
        struct reparse_data_buffer *buf;
        struct smb2_ioctl_rsp *io = rsp_iov->iov_base;
        u32 plen = le32_to_cpu(io->OutputCount);
 
-       cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
-
        buf = (struct reparse_data_buffer *)((u8 *)io +
                                             le32_to_cpu(io->OutputOffset));
-       return parse_reparse_point(buf, plen, target_path, cifs_sb);
+       return parse_reparse_point(buf, plen, cifs_sb, true, data);
 }
 
 static int smb2_query_reparse_point(const unsigned int xid,
@@ -5064,41 +5068,24 @@ smb2_next_header(char *buf)
        return le32_to_cpu(hdr->NextCommand);
 }
 
-static int
-smb2_make_node(unsigned int xid, struct inode *inode,
-              struct dentry *dentry, struct cifs_tcon *tcon,
-              const char *full_path, umode_t mode, dev_t dev)
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+                      struct dentry *dentry, struct cifs_tcon *tcon,
+                      const char *full_path, umode_t mode, dev_t dev)
 {
-       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-       int rc = -EPERM;
        struct cifs_open_info_data buf = {};
-       struct cifs_io_parms io_parms = {0};
-       __u32 oplock = 0;
-       struct cifs_fid fid;
+       struct TCP_Server_Info *server = tcon->ses->server;
        struct cifs_open_parms oparms;
+       struct cifs_io_parms io_parms = {};
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+       struct cifs_fid fid;
        unsigned int bytes_written;
        struct win_dev *pdev;
        struct kvec iov[2];
-
-       /*
-        * Check if mounted with mount parm 'sfu' mount parm.
-        * SFU emulation should work with all servers, but only
-        * supports block and char device (no socket & fifo),
-        * and was used by default in earlier versions of Windows
-        */
-       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
-               return rc;
-
-       /*
-        * TODO: Add ability to create instead via reparse point. Windows (e.g.
-        * their current NFS server) uses this approach to expose special files
-        * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions
-        */
+       __u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
+       int rc;
 
        if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
-               return rc;
-
-       cifs_dbg(FYI, "sfu compat create special file\n");
+               return -EPERM;
 
        oparms = (struct cifs_open_parms) {
                .tcon = tcon,
@@ -5111,11 +5098,7 @@ smb2_make_node(unsigned int xid, struct inode *inode,
                .fid = &fid,
        };
 
-       if (tcon->ses->server->oplocks)
-               oplock = REQ_OPLOCK;
-       else
-               oplock = 0;
-       rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
+       rc = server->ops->open(xid, &oparms, &oplock, &buf);
        if (rc)
                return rc;
 
@@ -5123,42 +5106,56 @@ smb2_make_node(unsigned int xid, struct inode *inode,
         * BB Do not bother to decode buf since no local inode yet to put
         * timestamps in, but we can reuse it safely.
         */
-
        pdev = (struct win_dev *)&buf.fi;
        io_parms.pid = current->tgid;
        io_parms.tcon = tcon;
-       io_parms.offset = 0;
-       io_parms.length = sizeof(struct win_dev);
-       iov[1].iov_base = &buf.fi;
-       iov[1].iov_len = sizeof(struct win_dev);
+       io_parms.length = sizeof(*pdev);
+       iov[1].iov_base = pdev;
+       iov[1].iov_len = sizeof(*pdev);
        if (S_ISCHR(mode)) {
                memcpy(pdev->type, "IntxCHR", 8);
                pdev->major = cpu_to_le64(MAJOR(dev));
                pdev->minor = cpu_to_le64(MINOR(dev));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
        } else if (S_ISBLK(mode)) {
                memcpy(pdev->type, "IntxBLK", 8);
                pdev->major = cpu_to_le64(MAJOR(dev));
                pdev->minor = cpu_to_le64(MINOR(dev));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
        } else if (S_ISFIFO(mode)) {
                memcpy(pdev->type, "LnxFIFO", 8);
-               pdev->major = 0;
-               pdev->minor = 0;
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
        }
-       tcon->ses->server->ops->close(xid, tcon, &fid);
-       d_drop(dentry);
 
+       rc = server->ops->sync_write(xid, &fid, &io_parms,
+                                    &bytes_written, iov, 1);
+       server->ops->close(xid, tcon, &fid);
+       d_drop(dentry);
        /* FIXME: add code here to set EAs */
-
        cifs_free_open_info(&buf);
        return rc;
 }
 
+static int smb2_make_node(unsigned int xid, struct inode *inode,
+                         struct dentry *dentry, struct cifs_tcon *tcon,
+                         const char *full_path, umode_t mode, dev_t dev)
+{
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+       /*
+        * Check if mounted with mount parm 'sfu' mount parm.
+        * SFU emulation should work with all servers, but only
+        * supports block and char device (no socket & fifo),
+        * and was used by default in earlier versions of Windows
+        */
+       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
+               return -EPERM;
+       /*
+        * TODO: Add ability to create instead via reparse point. Windows (e.g.
+        * their current NFS server) uses this approach to expose special files
+        * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions
+        */
+       return cifs_sfu_make_node(xid, inode, dentry, tcon,
+                                 full_path, mode, dev);
+}
+
 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 struct smb_version_operations smb20_operations = {
        .compare_fids = smb2_compare_fids,
@@ -5209,7 +5206,7 @@ struct smb_version_operations smb20_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
-       .query_symlink = smb2_query_symlink,
+       .parse_reparse_point = smb2_parse_reparse_point,
        .query_mf_symlink = smb3_query_mf_symlink,
        .create_mf_symlink = smb3_create_mf_symlink,
        .open = smb2_open_file,
@@ -5311,7 +5308,7 @@ struct smb_version_operations smb21_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
-       .query_symlink = smb2_query_symlink,
+       .parse_reparse_point = smb2_parse_reparse_point,
        .query_mf_symlink = smb3_query_mf_symlink,
        .create_mf_symlink = smb3_create_mf_symlink,
        .open = smb2_open_file,
@@ -5416,7 +5413,7 @@ struct smb_version_operations smb30_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
-       .query_symlink = smb2_query_symlink,
+       .parse_reparse_point = smb2_parse_reparse_point,
        .query_mf_symlink = smb3_query_mf_symlink,
        .create_mf_symlink = smb3_create_mf_symlink,
        .open = smb2_open_file,
@@ -5530,7 +5527,7 @@ struct smb_version_operations smb311_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
-       .query_symlink = smb2_query_symlink,
+       .parse_reparse_point = smb2_parse_reparse_point,
        .query_mf_symlink = smb3_query_mf_symlink,
        .create_mf_symlink = smb3_create_mf_symlink,
        .open = smb2_open_file,
index 84ea67301303c45c805fafc9bb2f5648655ec728..5a3ca62d2f07f72584392975221cbc9b12276fe8 100644 (file)
@@ -458,6 +458,8 @@ generate_smb3signingkey(struct cifs_ses *ses,
                                  ptriplet->encryption.context,
                                  ses->smb3encryptionkey,
                                  SMB3_ENC_DEC_KEY_SIZE);
+               if (rc)
+                       return rc;
                rc = generate_key(ses, ptriplet->decryption.label,
                                  ptriplet->decryption.context,
                                  ses->smb3decryptionkey,
@@ -466,9 +468,6 @@ generate_smb3signingkey(struct cifs_ses *ses,
                        return rc;
        }
 
-       if (rc)
-               return rc;
-
 #ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
        cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__);
        /*
index a2ed441e837ae44827ef59aae7313d8ec7779c99..d7c676c151e209e89cb29b68eb9d14e7e3b9d70c 100644 (file)
@@ -56,6 +56,9 @@ void ksmbd_free_work_struct(struct ksmbd_work *work)
        kfree(work->tr_buf);
        kvfree(work->request_buf);
        kfree(work->iov);
+       if (!list_empty(&work->interim_entry))
+               list_del(&work->interim_entry);
+
        if (work->async_id)
                ksmbd_release_id(&work->conn->async_ida, work->async_id);
        kmem_cache_free(work_cache, work);
@@ -106,7 +109,7 @@ static inline void __ksmbd_iov_pin(struct ksmbd_work *work, void *ib,
 static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len,
                               void *aux_buf, unsigned int aux_size)
 {
-       struct aux_read *ar;
+       struct aux_read *ar = NULL;
        int need_iov_cnt = 1;
 
        if (aux_size) {
@@ -123,8 +126,11 @@ static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len,
                new = krealloc(work->iov,
                               sizeof(struct kvec) * work->iov_alloc_cnt,
                               GFP_KERNEL | __GFP_ZERO);
-               if (!new)
+               if (!new) {
+                       kfree(ar);
+                       work->iov_alloc_cnt -= 4;
                        return -ENOMEM;
+               }
                work->iov = new;
        }
 
index 9bc0103720f57c323846b7faa6a7f8bcd0756ad5..50c68beb71d6c49855b3f3dc8b8b2789e25e90c5 100644 (file)
@@ -833,7 +833,8 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
                                             interim_entry);
                        setup_async_work(in_work, NULL, NULL);
                        smb2_send_interim_resp(in_work, STATUS_PENDING);
-                       list_del(&in_work->interim_entry);
+                       list_del_init(&in_work->interim_entry);
+                       release_async_work(in_work);
                }
                INIT_WORK(&work->work, __smb2_lease_break_noti);
                ksmbd_queue_work(work);
index 658209839729ce665d6514fc1b59efd8d3d3294a..d369b98a6e10373bbc331beba1daa4414db619a9 100644 (file)
@@ -657,13 +657,9 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
 
 int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
 {
-       struct smb2_hdr *rsp_hdr;
        struct ksmbd_conn *conn = work->conn;
        int id;
 
-       rsp_hdr = ksmbd_resp_buf_next(work);
-       rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
-
        id = ksmbd_acquire_async_msg_id(&conn->async_ida);
        if (id < 0) {
                pr_err("Failed to alloc async message id\n");
@@ -671,7 +667,6 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
        }
        work->asynchronous = true;
        work->async_id = id;
-       rsp_hdr->Id.AsyncId = cpu_to_le64(id);
 
        ksmbd_debug(SMB,
                    "Send interim Response to inform async request id : %d\n",
@@ -723,6 +718,8 @@ void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
               __SMB2_HEADER_STRUCTURE_SIZE);
 
        rsp_hdr = smb2_get_msg(in_work->response_buf);
+       rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
+       rsp_hdr->Id.AsyncId = cpu_to_le64(work->async_id);
        smb2_set_err_rsp(in_work);
        rsp_hdr->Status = status;
 
@@ -2380,7 +2377,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
                        rc = 0;
                } else {
                        rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value,
-                                               le16_to_cpu(eabuf->EaValueLength), 0);
+                                               le16_to_cpu(eabuf->EaValueLength),
+                                               0, true);
                        if (rc < 0) {
                                ksmbd_debug(SMB,
                                            "ksmbd_vfs_setxattr is failed(%d)\n",
@@ -2443,7 +2441,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
                return -EBADF;
        }
 
-       rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0);
+       rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0, false);
        if (rc < 0)
                pr_err("Failed to store XATTR stream name :%d\n", rc);
        return 0;
@@ -2518,7 +2516,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *
        da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
                XATTR_DOSINFO_ITIME;
 
-       rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da);
+       rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, false);
        if (rc)
                ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
 }
@@ -2608,7 +2606,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
            sizeof(struct create_sd_buf_req))
                return -EINVAL;
        return set_info_sec(work->conn, work->tcon, path, &sd_buf->ntsd,
-                           le32_to_cpu(sd_buf->ccontext.DataLength), true);
+                           le32_to_cpu(sd_buf->ccontext.DataLength), true, false);
 }
 
 static void ksmbd_acls_fattr(struct smb_fattr *fattr,
@@ -2690,7 +2688,7 @@ int smb2_open(struct ksmbd_work *work)
                    *(char *)req->Buffer == '\\') {
                        pr_err("not allow directory name included leading slash\n");
                        rc = -EINVAL;
-                       goto err_out1;
+                       goto err_out2;
                }
 
                name = smb2_get_name(req->Buffer,
@@ -2701,7 +2699,7 @@ int smb2_open(struct ksmbd_work *work)
                        if (rc != -ENOMEM)
                                rc = -ENOENT;
                        name = NULL;
-                       goto err_out1;
+                       goto err_out2;
                }
 
                ksmbd_debug(SMB, "converted name = %s\n", name);
@@ -2709,28 +2707,28 @@ int smb2_open(struct ksmbd_work *work)
                        if (!test_share_config_flag(work->tcon->share_conf,
                                                    KSMBD_SHARE_FLAG_STREAMS)) {
                                rc = -EBADF;
-                               goto err_out1;
+                               goto err_out2;
                        }
                        rc = parse_stream_name(name, &stream_name, &s_type);
                        if (rc < 0)
-                               goto err_out1;
+                               goto err_out2;
                }
 
                rc = ksmbd_validate_filename(name);
                if (rc < 0)
-                       goto err_out1;
+                       goto err_out2;
 
                if (ksmbd_share_veto_filename(share, name)) {
                        rc = -ENOENT;
                        ksmbd_debug(SMB, "Reject open(), vetoed file: %s\n",
                                    name);
-                       goto err_out1;
+                       goto err_out2;
                }
        } else {
                name = kstrdup("", GFP_KERNEL);
                if (!name) {
                        rc = -ENOMEM;
-                       goto err_out1;
+                       goto err_out2;
                }
        }
 
@@ -2743,14 +2741,14 @@ int smb2_open(struct ksmbd_work *work)
                       le32_to_cpu(req->ImpersonationLevel));
                rc = -EIO;
                rsp->hdr.Status = STATUS_BAD_IMPERSONATION_LEVEL;
-               goto err_out1;
+               goto err_out2;
        }
 
        if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK_LE)) {
                pr_err("Invalid create options : 0x%x\n",
                       le32_to_cpu(req->CreateOptions));
                rc = -EINVAL;
-               goto err_out1;
+               goto err_out2;
        } else {
                if (req->CreateOptions & FILE_SEQUENTIAL_ONLY_LE &&
                    req->CreateOptions & FILE_RANDOM_ACCESS_LE)
@@ -2760,13 +2758,13 @@ int smb2_open(struct ksmbd_work *work)
                    (FILE_OPEN_BY_FILE_ID_LE | CREATE_TREE_CONNECTION |
                     FILE_RESERVE_OPFILTER_LE)) {
                        rc = -EOPNOTSUPP;
-                       goto err_out1;
+                       goto err_out2;
                }
 
                if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
                        if (req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE) {
                                rc = -EINVAL;
-                               goto err_out1;
+                               goto err_out2;
                        } else if (req->CreateOptions & FILE_NO_COMPRESSION_LE) {
                                req->CreateOptions = ~(FILE_NO_COMPRESSION_LE);
                        }
@@ -2778,21 +2776,21 @@ int smb2_open(struct ksmbd_work *work)
                pr_err("Invalid create disposition : 0x%x\n",
                       le32_to_cpu(req->CreateDisposition));
                rc = -EINVAL;
-               goto err_out1;
+               goto err_out2;
        }
 
        if (!(req->DesiredAccess & DESIRED_ACCESS_MASK)) {
                pr_err("Invalid desired access : 0x%x\n",
                       le32_to_cpu(req->DesiredAccess));
                rc = -EACCES;
-               goto err_out1;
+               goto err_out2;
        }
 
        if (req->FileAttributes && !(req->FileAttributes & FILE_ATTRIBUTE_MASK_LE)) {
                pr_err("Invalid file attribute : 0x%x\n",
                       le32_to_cpu(req->FileAttributes));
                rc = -EINVAL;
-               goto err_out1;
+               goto err_out2;
        }
 
        if (req->CreateContextsOffset) {
@@ -2800,19 +2798,19 @@ int smb2_open(struct ksmbd_work *work)
                context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4);
                if (IS_ERR(context)) {
                        rc = PTR_ERR(context);
-                       goto err_out1;
+                       goto err_out2;
                } else if (context) {
                        ea_buf = (struct create_ea_buf_req *)context;
                        if (le16_to_cpu(context->DataOffset) +
                            le32_to_cpu(context->DataLength) <
                            sizeof(struct create_ea_buf_req)) {
                                rc = -EINVAL;
-                               goto err_out1;
+                               goto err_out2;
                        }
                        if (req->CreateOptions & FILE_NO_EA_KNOWLEDGE_LE) {
                                rsp->hdr.Status = STATUS_ACCESS_DENIED;
                                rc = -EACCES;
-                               goto err_out1;
+                               goto err_out2;
                        }
                }
 
@@ -2820,7 +2818,7 @@ int smb2_open(struct ksmbd_work *work)
                                                 SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4);
                if (IS_ERR(context)) {
                        rc = PTR_ERR(context);
-                       goto err_out1;
+                       goto err_out2;
                } else if (context) {
                        ksmbd_debug(SMB,
                                    "get query maximal access context\n");
@@ -2831,11 +2829,11 @@ int smb2_open(struct ksmbd_work *work)
                                                 SMB2_CREATE_TIMEWARP_REQUEST, 4);
                if (IS_ERR(context)) {
                        rc = PTR_ERR(context);
-                       goto err_out1;
+                       goto err_out2;
                } else if (context) {
                        ksmbd_debug(SMB, "get timewarp context\n");
                        rc = -EBADF;
-                       goto err_out1;
+                       goto err_out2;
                }
 
                if (tcon->posix_extensions) {
@@ -2843,7 +2841,7 @@ int smb2_open(struct ksmbd_work *work)
                                                         SMB2_CREATE_TAG_POSIX, 16);
                        if (IS_ERR(context)) {
                                rc = PTR_ERR(context);
-                               goto err_out1;
+                               goto err_out2;
                        } else if (context) {
                                struct create_posix *posix =
                                        (struct create_posix *)context;
@@ -2851,7 +2849,7 @@ int smb2_open(struct ksmbd_work *work)
                                    le32_to_cpu(context->DataLength) <
                                    sizeof(struct create_posix) - 4) {
                                        rc = -EINVAL;
-                                       goto err_out1;
+                                       goto err_out2;
                                }
                                ksmbd_debug(SMB, "get posix context\n");
 
@@ -2863,7 +2861,7 @@ int smb2_open(struct ksmbd_work *work)
 
        if (ksmbd_override_fsids(work)) {
                rc = -ENOMEM;
-               goto err_out1;
+               goto err_out2;
        }
 
        rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS,
@@ -3038,7 +3036,7 @@ int smb2_open(struct ksmbd_work *work)
                }
        }
 
-       rc = ksmbd_query_inode_status(d_inode(path.dentry->d_parent));
+       rc = ksmbd_query_inode_status(path.dentry->d_parent);
        if (rc == KSMBD_INODE_STATUS_PENDING_DELETE) {
                rc = -EBUSY;
                goto err_out;
@@ -3152,7 +3150,8 @@ int smb2_open(struct ksmbd_work *work)
                                                                    idmap,
                                                                    &path,
                                                                    pntsd,
-                                                                   pntsd_size);
+                                                                   pntsd_size,
+                                                                   false);
                                        kfree(pntsd);
                                        if (rc)
                                                pr_err("failed to store ntacl in xattr : %d\n",
@@ -3175,11 +3174,6 @@ int smb2_open(struct ksmbd_work *work)
 
        fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE |
                        FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE));
-       if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
-           !fp->attrib_only && !stream_name) {
-               smb_break_all_oplock(work, fp);
-               need_truncate = 1;
-       }
 
        /* fp should be searchable through ksmbd_inode.m_fp_list
         * after daccess, saccess, attrib_only, and stream are
@@ -3195,13 +3189,39 @@ int smb2_open(struct ksmbd_work *work)
                goto err_out;
        }
 
+       rc = ksmbd_vfs_getattr(&path, &stat);
+       if (rc)
+               goto err_out;
+
+       if (stat.result_mask & STATX_BTIME)
+               fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
+       else
+               fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
+       if (req->FileAttributes || fp->f_ci->m_fattr == 0)
+               fp->f_ci->m_fattr =
+                       cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
+
+       if (!created)
+               smb2_update_xattrs(tcon, &path, fp);
+       else
+               smb2_new_xattrs(tcon, &path, fp);
+
+       if (file_present || created)
+               ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
+       if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
+           !fp->attrib_only && !stream_name) {
+               smb_break_all_oplock(work, fp);
+               need_truncate = 1;
+       }
+
        share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp);
        if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) ||
            (req_op_level == SMB2_OPLOCK_LEVEL_LEASE &&
             !(conn->vals->capabilities & SMB2_GLOBAL_CAP_LEASING))) {
                if (share_ret < 0 && !S_ISDIR(file_inode(fp->filp)->i_mode)) {
                        rc = share_ret;
-                       goto err_out;
+                       goto err_out1;
                }
        } else {
                if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
@@ -3211,7 +3231,7 @@ int smb2_open(struct ksmbd_work *work)
                                    name, req_op_level, lc->req_state);
                        rc = find_same_lease_key(sess, fp->f_ci, lc);
                        if (rc)
-                               goto err_out;
+                               goto err_out1;
                } else if (open_flags == O_RDONLY &&
                           (req_op_level == SMB2_OPLOCK_LEVEL_BATCH ||
                            req_op_level == SMB2_OPLOCK_LEVEL_EXCLUSIVE))
@@ -3222,16 +3242,16 @@ int smb2_open(struct ksmbd_work *work)
                                      le32_to_cpu(req->hdr.Id.SyncId.TreeId),
                                      lc, share_ret);
                if (rc < 0)
-                       goto err_out;
+                       goto err_out1;
        }
 
        if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)
                ksmbd_fd_set_delete_on_close(fp, file_info);
 
        if (need_truncate) {
-               rc = smb2_create_truncate(&path);
+               rc = smb2_create_truncate(&fp->filp->f_path);
                if (rc)
-                       goto err_out;
+                       goto err_out1;
        }
 
        if (req->CreateContextsOffset) {
@@ -3241,7 +3261,7 @@ int smb2_open(struct ksmbd_work *work)
                                        SMB2_CREATE_ALLOCATION_SIZE, 4);
                if (IS_ERR(az_req)) {
                        rc = PTR_ERR(az_req);
-                       goto err_out;
+                       goto err_out1;
                } else if (az_req) {
                        loff_t alloc_size;
                        int err;
@@ -3250,7 +3270,7 @@ int smb2_open(struct ksmbd_work *work)
                            le32_to_cpu(az_req->ccontext.DataLength) <
                            sizeof(struct create_alloc_size_req)) {
                                rc = -EINVAL;
-                               goto err_out;
+                               goto err_out1;
                        }
                        alloc_size = le64_to_cpu(az_req->AllocationSize);
                        ksmbd_debug(SMB,
@@ -3268,30 +3288,13 @@ int smb2_open(struct ksmbd_work *work)
                context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4);
                if (IS_ERR(context)) {
                        rc = PTR_ERR(context);
-                       goto err_out;
+                       goto err_out1;
                } else if (context) {
                        ksmbd_debug(SMB, "get query on disk id context\n");
                        query_disk_id = 1;
                }
        }
 
-       rc = ksmbd_vfs_getattr(&path, &stat);
-       if (rc)
-               goto err_out;
-
-       if (stat.result_mask & STATX_BTIME)
-               fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
-       else
-               fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
-       if (req->FileAttributes || fp->f_ci->m_fattr == 0)
-               fp->f_ci->m_fattr =
-                       cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
-
-       if (!created)
-               smb2_update_xattrs(tcon, &path, fp);
-       else
-               smb2_new_xattrs(tcon, &path, fp);
-
        memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
 
        rsp->StructureSize = cpu_to_le16(89);
@@ -3398,13 +3401,13 @@ int smb2_open(struct ksmbd_work *work)
        }
 
 err_out:
-       if (file_present || created) {
-               inode_unlock(d_inode(parent_path.dentry));
-               path_put(&path);
-               path_put(&parent_path);
-       }
-       ksmbd_revert_fsids(work);
+       if (rc && (file_present || created))
+               ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
 err_out1:
+       ksmbd_revert_fsids(work);
+
+err_out2:
        if (!rc) {
                ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED);
                rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len);
@@ -5537,7 +5540,7 @@ static int smb2_rename(struct ksmbd_work *work,
                rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp),
                                        &fp->filp->f_path,
                                        xattr_stream_name,
-                                       NULL, 0, 0);
+                                       NULL, 0, 0, true);
                if (rc < 0) {
                        pr_err("failed to store stream name in xattr: %d\n",
                               rc);
@@ -5630,11 +5633,9 @@ static int smb2_create_link(struct ksmbd_work *work,
        if (rc)
                rc = -EINVAL;
 out:
-       if (file_present) {
-               inode_unlock(d_inode(parent_path.dentry));
-               path_put(&path);
-               path_put(&parent_path);
-       }
+       if (file_present)
+               ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
        if (!IS_ERR(link_name))
                kfree(link_name);
        kfree(pathname);
@@ -5701,7 +5702,8 @@ static int set_file_basic_info(struct ksmbd_file *fp,
                da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
                        XATTR_DOSINFO_ITIME;
 
-               rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da);
+               rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da,
+                               true);
                if (rc)
                        ksmbd_debug(SMB,
                                    "failed to restore file attribute in EA\n");
@@ -6013,7 +6015,7 @@ static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info,
        fp->saccess |= FILE_SHARE_DELETE_LE;
 
        return set_info_sec(fp->conn, fp->tcon, &fp->filp->f_path, pntsd,
-                       buf_len, false);
+                       buf_len, false, true);
 }
 
 /**
@@ -7582,7 +7584,8 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
 
                da.attr = le32_to_cpu(fp->f_ci->m_fattr);
                ret = ksmbd_vfs_set_dos_attrib_xattr(idmap,
-                                                    &fp->filp->f_path, &da);
+                                                    &fp->filp->f_path,
+                                                    &da, true);
                if (ret)
                        fp->f_ci->m_fattr = old_fattr;
        }
@@ -8231,7 +8234,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
                return;
 
 err_out:
-       opinfo->op_state = OPLOCK_STATE_NONE;
        wake_up_interruptible_all(&opinfo->oplock_q);
        atomic_dec(&opinfo->breaking_cnt);
        wake_up_interruptible_all(&opinfo->oplock_brk);
index 51b8bfab74813fb3f79bc85e2155eeebd84f2d79..1164365533f08957d80a082f17a10499ddec37ee 100644 (file)
@@ -1185,7 +1185,7 @@ pass:
                        pntsd_size += sizeof(struct smb_acl) + nt_size;
                }
 
-               ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size);
+               ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size, false);
                kfree(pntsd);
        }
 
@@ -1377,7 +1377,7 @@ err_out:
 
 int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
                 const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
-                bool type_check)
+                bool type_check, bool get_write)
 {
        int rc;
        struct smb_fattr fattr = {{0}};
@@ -1437,7 +1437,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
        if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
                /* Update WinACL in xattr */
                ksmbd_vfs_remove_sd_xattrs(idmap, path);
-               ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len);
+               ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len,
+                               get_write);
        }
 
 out:
index 49a8c292bd2e81c33a6e24434a0a91d4dfb5a2fc..2b52861707d8c11233a8c370f50cdf79493b7e46 100644 (file)
@@ -207,7 +207,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
                        __le32 *pdaccess, int uid);
 int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
                 const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
-                bool type_check);
+                bool type_check, bool get_write);
 void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
 void ksmbd_init_domain(u32 *sub_auth);
 
index c53dea5598fc63718e5df7f42e2079d5ac00497a..9091dcd7a3102c82a24d04948847ed5edbb67d45 100644 (file)
@@ -97,6 +97,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
                return -ENOENT;
        }
 
+       err = mnt_want_write(parent_path->mnt);
+       if (err) {
+               path_put(parent_path);
+               putname(filename);
+               return -ENOENT;
+       }
+
        inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT);
        d = lookup_one_qstr_excl(&last, parent_path->dentry, 0);
        if (IS_ERR(d))
@@ -123,6 +130,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
 
 err_out:
        inode_unlock(d_inode(parent_path->dentry));
+       mnt_drop_write(parent_path->mnt);
        path_put(parent_path);
        putname(filename);
        return -ENOENT;
@@ -451,7 +459,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
                                 fp->stream.name,
                                 (void *)stream_buf,
                                 size,
-                                0);
+                                0,
+                                true);
        if (err < 0)
                goto out;
 
@@ -593,10 +602,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
                goto out_err;
        }
 
-       err = mnt_want_write(path->mnt);
-       if (err)
-               goto out_err;
-
        idmap = mnt_idmap(path->mnt);
        if (S_ISDIR(d_inode(path->dentry)->i_mode)) {
                err = vfs_rmdir(idmap, d_inode(parent), path->dentry);
@@ -607,7 +612,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
                if (err)
                        ksmbd_debug(VFS, "unlink failed, err %d\n", err);
        }
-       mnt_drop_write(path->mnt);
 
 out_err:
        ksmbd_revert_fsids(work);
@@ -715,7 +719,7 @@ retry:
                goto out3;
        }
 
-       parent_fp = ksmbd_lookup_fd_inode(d_inode(old_child->d_parent));
+       parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent);
        if (parent_fp) {
                if (parent_fp->daccess & FILE_DELETE_LE) {
                        pr_err("parent dir is opened with delete access\n");
@@ -907,18 +911,22 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap,
  * @attr_value:        xattr value to set
  * @attr_size: size of xattr value
  * @flags:     destination buffer length
+ * @get_write: get write access to a mount
  *
  * Return:     0 on success, otherwise error
  */
 int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
                       const struct path *path, const char *attr_name,
-                      void *attr_value, size_t attr_size, int flags)
+                      void *attr_value, size_t attr_size, int flags,
+                      bool get_write)
 {
        int err;
 
-       err = mnt_want_write(path->mnt);
-       if (err)
-               return err;
+       if (get_write == true) {
+               err = mnt_want_write(path->mnt);
+               if (err)
+                       return err;
+       }
 
        err = vfs_setxattr(idmap,
                           path->dentry,
@@ -928,7 +936,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
                           flags);
        if (err)
                ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
-       mnt_drop_write(path->mnt);
+       if (get_write == true)
+               mnt_drop_write(path->mnt);
        return err;
 }
 
@@ -1252,6 +1261,13 @@ out1:
        }
 
        if (!err) {
+               err = mnt_want_write(parent_path->mnt);
+               if (err) {
+                       path_put(path);
+                       path_put(parent_path);
+                       return err;
+               }
+
                err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
                if (err) {
                        path_put(path);
@@ -1261,6 +1277,14 @@ out1:
        return err;
 }
 
+void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path)
+{
+       inode_unlock(d_inode(parent_path->dentry));
+       mnt_drop_write(parent_path->mnt);
+       path_put(path);
+       path_put(parent_path);
+}
+
 struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
                                          const char *name,
                                          unsigned int flags,
@@ -1415,7 +1439,8 @@ out:
 int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
                           struct mnt_idmap *idmap,
                           const struct path *path,
-                          struct smb_ntsd *pntsd, int len)
+                          struct smb_ntsd *pntsd, int len,
+                          bool get_write)
 {
        int rc;
        struct ndr sd_ndr = {0}, acl_ndr = {0};
@@ -1475,7 +1500,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
 
        rc = ksmbd_vfs_setxattr(idmap, path,
                                XATTR_NAME_SD, sd_ndr.data,
-                               sd_ndr.offset, 0);
+                               sd_ndr.offset, 0, get_write);
        if (rc < 0)
                pr_err("Failed to store XATTR ntacl :%d\n", rc);
 
@@ -1564,7 +1589,8 @@ free_n_data:
 
 int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
                                   const struct path *path,
-                                  struct xattr_dos_attrib *da)
+                                  struct xattr_dos_attrib *da,
+                                  bool get_write)
 {
        struct ndr n;
        int err;
@@ -1574,7 +1600,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
                return err;
 
        err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE,
-                                (void *)n.data, n.offset, 0);
+                                (void *)n.data, n.offset, 0, get_write);
        if (err)
                ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
        kfree(n.data);
@@ -1846,10 +1872,6 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
        }
        posix_state_to_acl(&acl_state, acls->a_entries);
 
-       rc = mnt_want_write(path->mnt);
-       if (rc)
-               goto out_err;
-
        rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
        if (rc < 0)
                ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1861,9 +1883,7 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
                        ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
                                    rc);
        }
-       mnt_drop_write(path->mnt);
 
-out_err:
        free_acl_state(&acl_state);
        posix_acl_release(acls);
        return rc;
@@ -1893,10 +1913,6 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
                }
        }
 
-       rc = mnt_want_write(path->mnt);
-       if (rc)
-               goto out_err;
-
        rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
        if (rc < 0)
                ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1908,9 +1924,7 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
                        ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
                                    rc);
        }
-       mnt_drop_write(path->mnt);
 
-out_err:
        posix_acl_release(acls);
        return rc;
 }
index 00968081856e38288953aeb927eb9700d1a2a348..cfe1c8092f2302f2cd0ea7cf5b42f1357160981f 100644 (file)
@@ -109,7 +109,8 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap,
                                int attr_name_len);
 int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
                       const struct path *path, const char *attr_name,
-                      void *attr_value, size_t attr_size, int flags);
+                      void *attr_value, size_t attr_size, int flags,
+                      bool get_write);
 int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
                                size_t *xattr_stream_name_size, int s_type);
 int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
@@ -117,6 +118,7 @@ int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
 int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
                               unsigned int flags, struct path *parent_path,
                               struct path *path, bool caseless);
+void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path);
 struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
                                          const char *name,
                                          unsigned int flags,
@@ -144,14 +146,16 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path)
 int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
                           struct mnt_idmap *idmap,
                           const struct path *path,
-                          struct smb_ntsd *pntsd, int len);
+                          struct smb_ntsd *pntsd, int len,
+                          bool get_write);
 int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
                           struct mnt_idmap *idmap,
                           struct dentry *dentry,
                           struct smb_ntsd **pntsd);
 int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
                                   const struct path *path,
-                                  struct xattr_dos_attrib *da);
+                                  struct xattr_dos_attrib *da,
+                                  bool get_write);
 int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
                                   struct dentry *dentry,
                                   struct xattr_dos_attrib *da);
index c91eac6514dd95e732e6cdc74ee9a5422dc06eec..ddf233994ddbbf37c1657b925961a7f8be94f4f0 100644 (file)
@@ -66,14 +66,14 @@ static unsigned long inode_hash(struct super_block *sb, unsigned long hashval)
        return tmp & inode_hash_mask;
 }
 
-static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
+static struct ksmbd_inode *__ksmbd_inode_lookup(struct dentry *de)
 {
        struct hlist_head *head = inode_hashtable +
-               inode_hash(inode->i_sb, inode->i_ino);
+               inode_hash(d_inode(de)->i_sb, (unsigned long)de);
        struct ksmbd_inode *ci = NULL, *ret_ci = NULL;
 
        hlist_for_each_entry(ci, head, m_hash) {
-               if (ci->m_inode == inode) {
+               if (ci->m_de == de) {
                        if (atomic_inc_not_zero(&ci->m_count))
                                ret_ci = ci;
                        break;
@@ -84,26 +84,16 @@ static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
 
 static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp)
 {
-       return __ksmbd_inode_lookup(file_inode(fp->filp));
+       return __ksmbd_inode_lookup(fp->filp->f_path.dentry);
 }
 
-static struct ksmbd_inode *ksmbd_inode_lookup_by_vfsinode(struct inode *inode)
-{
-       struct ksmbd_inode *ci;
-
-       read_lock(&inode_hash_lock);
-       ci = __ksmbd_inode_lookup(inode);
-       read_unlock(&inode_hash_lock);
-       return ci;
-}
-
-int ksmbd_query_inode_status(struct inode *inode)
+int ksmbd_query_inode_status(struct dentry *dentry)
 {
        struct ksmbd_inode *ci;
        int ret = KSMBD_INODE_STATUS_UNKNOWN;
 
        read_lock(&inode_hash_lock);
-       ci = __ksmbd_inode_lookup(inode);
+       ci = __ksmbd_inode_lookup(dentry);
        if (ci) {
                ret = KSMBD_INODE_STATUS_OK;
                if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS))
@@ -143,7 +133,7 @@ void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
 static void ksmbd_inode_hash(struct ksmbd_inode *ci)
 {
        struct hlist_head *b = inode_hashtable +
-               inode_hash(ci->m_inode->i_sb, ci->m_inode->i_ino);
+               inode_hash(d_inode(ci->m_de)->i_sb, (unsigned long)ci->m_de);
 
        hlist_add_head(&ci->m_hash, b);
 }
@@ -157,7 +147,6 @@ static void ksmbd_inode_unhash(struct ksmbd_inode *ci)
 
 static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
 {
-       ci->m_inode = file_inode(fp->filp);
        atomic_set(&ci->m_count, 1);
        atomic_set(&ci->op_count, 0);
        atomic_set(&ci->sop_count, 0);
@@ -166,6 +155,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
        INIT_LIST_HEAD(&ci->m_fp_list);
        INIT_LIST_HEAD(&ci->m_op_list);
        rwlock_init(&ci->m_lock);
+       ci->m_de = fp->filp->f_path.dentry;
        return 0;
 }
 
@@ -488,12 +478,15 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid)
        return fp;
 }
 
-struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry)
 {
        struct ksmbd_file       *lfp;
        struct ksmbd_inode      *ci;
+       struct inode            *inode = d_inode(dentry);
 
-       ci = ksmbd_inode_lookup_by_vfsinode(inode);
+       read_lock(&inode_hash_lock);
+       ci = __ksmbd_inode_lookup(dentry);
+       read_unlock(&inode_hash_lock);
        if (!ci)
                return NULL;
 
index 03d0bf941216f8f5157e1d9f1dca76897a3c51c3..8325cf4527c464c7db83b772e145f01849814faf 100644 (file)
@@ -51,7 +51,7 @@ struct ksmbd_inode {
        atomic_t                        op_count;
        /* opinfo count for streams */
        atomic_t                        sop_count;
-       struct inode                    *m_inode;
+       struct dentry                   *m_de;
        unsigned int                    m_flags;
        struct hlist_node               m_hash;
        struct list_head                m_fp_list;
@@ -140,7 +140,7 @@ struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
 void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp);
 struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id);
 struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
-struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode);
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry);
 unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
 struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp);
 void ksmbd_close_tree_conn_fds(struct ksmbd_work *work);
@@ -164,7 +164,7 @@ enum KSMBD_INODE_STATUS {
        KSMBD_INODE_STATUS_PENDING_DELETE,
 };
 
-int ksmbd_query_inode_status(struct inode *inode);
+int ksmbd_query_inode_status(struct dentry *dentry);
 bool ksmbd_inode_pending_delete(struct ksmbd_file *fp);
 void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp);
 void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp);
index 24bb0209e4599f934af06f6c0a9f984880b2fb34..f721d26ec3f7e535e85beb695a7330a479fbb851 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -133,7 +133,8 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
        idmap = mnt_idmap(path->mnt);
        if (inode->i_op->getattr)
                return inode->i_op->getattr(idmap, path, stat,
-                                           request_mask, query_flags);
+                                           request_mask,
+                                           query_flags | AT_GETATTR_NOSEC);
 
        generic_fillattr(idmap, request_mask, inode, stat);
        return 0;
@@ -166,6 +167,9 @@ int vfs_getattr(const struct path *path, struct kstat *stat,
 {
        int retval;
 
+       if (WARN_ON_ONCE(query_flags & AT_GETATTR_NOSEC))
+               return -EPERM;
+
        retval = security_inode_getattr(path);
        if (retval)
                return retval;
index f8a594a50ae628b3116f9918adafd7e7630b0f89..0b90869fd805cd62ab6db7cf924ee0cad8c86eaf 100644 (file)
 /*
  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
  * to the ei->dentry must be done under this mutex and after checking
- * if ei->is_freed is not set. The ei->dentry is released under the
- * mutex at the same time ei->is_freed is set. If ei->is_freed is set
- * then the ei->dentry is invalid.
+ * if ei->is_freed is not set. When ei->is_freed is set, the dentry
+ * is on its way to being freed after the last dput() is made on it.
  */
 static DEFINE_MUTEX(eventfs_mutex);
 
 /*
  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
  * its parent's list and will have is_freed set (under eventfs_mutex).
- * After the SRCU grace period is over, the ei may be freed.
+ * After the SRCU grace period is over and the last dput() is called
+ * the ei is freed.
  */
 DEFINE_STATIC_SRCU(eventfs_srcu);
 
@@ -95,7 +95,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
        if (!(dentry->d_inode->i_mode & S_IFDIR)) {
                if (!ei->entry_attrs) {
                        ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
-                                                 GFP_KERNEL);
+                                                 GFP_NOFS);
                        if (!ei->entry_attrs) {
                                ret = -ENOMEM;
                                goto out;
@@ -326,7 +326,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
        struct eventfs_attr *attr = NULL;
        struct dentry **e_dentry = &ei->d_children[idx];
        struct dentry *dentry;
-       bool invalidate = false;
+
+       WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
 
        mutex_lock(&eventfs_mutex);
        if (ei->is_freed) {
@@ -348,15 +349,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
 
        mutex_unlock(&eventfs_mutex);
 
-       /* The lookup already has the parent->d_inode locked */
-       if (!lookup)
-               inode_lock(parent->d_inode);
-
        dentry = create_file(name, mode, attr, parent, data, fops);
 
-       if (!lookup)
-               inode_unlock(parent->d_inode);
-
        mutex_lock(&eventfs_mutex);
 
        if (IS_ERR_OR_NULL(dentry)) {
@@ -365,12 +359,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
                 * created the dentry for this e_dentry. In which case
                 * use that one.
                 *
-                * Note, with the mutex held, the e_dentry cannot have content
-                * and the ei->is_freed be true at the same time.
+                * If ei->is_freed is set, the e_dentry is currently on its
+                * way to being freed, don't return it. If e_dentry is NULL
+                * it means it was already freed.
                 */
-               dentry = *e_dentry;
-               if (WARN_ON_ONCE(dentry && ei->is_freed))
+               if (ei->is_freed)
                        dentry = NULL;
+               else
+                       dentry = *e_dentry;
                /* The lookup does not need to up the dentry refcount */
                if (dentry && !lookup)
                        dget(dentry);
@@ -387,17 +383,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
                 * Otherwise it means two dentries exist with the same name.
                 */
                WARN_ON_ONCE(!ei->is_freed);
-               invalidate = true;
+               dentry = NULL;
        }
        mutex_unlock(&eventfs_mutex);
 
-       if (invalidate)
-               d_invalidate(dentry);
-
-       if (lookup || invalidate)
+       if (lookup)
                dput(dentry);
 
-       return invalidate ? NULL : dentry;
+       return dentry;
 }
 
 /**
@@ -437,9 +430,10 @@ static struct dentry *
 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
                  struct dentry *parent, bool lookup)
 {
-       bool invalidate = false;
        struct dentry *dentry = NULL;
 
+       WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
+
        mutex_lock(&eventfs_mutex);
        if (pei->is_freed || ei->is_freed) {
                mutex_unlock(&eventfs_mutex);
@@ -456,15 +450,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
        }
        mutex_unlock(&eventfs_mutex);
 
-       /* The lookup already has the parent->d_inode locked */
-       if (!lookup)
-               inode_lock(parent->d_inode);
-
        dentry = create_dir(ei, parent);
 
-       if (!lookup)
-               inode_unlock(parent->d_inode);
-
        mutex_lock(&eventfs_mutex);
 
        if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
@@ -473,8 +460,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
                 * created the dentry for this e_dentry. In which case
                 * use that one.
                 *
-                * Note, with the mutex held, the e_dentry cannot have content
-                * and the ei->is_freed be true at the same time.
+                * If ei->is_freed is set, the e_dentry is currently on its
+                * way to being freed.
                 */
                dentry = ei->dentry;
                if (dentry && !lookup)
@@ -493,16 +480,14 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
                 * Otherwise it means two dentries exist with the same name.
                 */
                WARN_ON_ONCE(!ei->is_freed);
-               invalidate = true;
+               dentry = NULL;
        }
        mutex_unlock(&eventfs_mutex);
-       if (invalidate)
-               d_invalidate(dentry);
 
-       if (lookup || invalidate)
+       if (lookup)
                dput(dentry);
 
-       return invalidate ? NULL : dentry;
+       return dentry;
 }
 
 /**
@@ -632,7 +617,7 @@ static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
 {
        struct dentry **tmp;
 
-       tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
+       tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
        if (!tmp)
                return -1;
        tmp[cnt] = d;
@@ -698,6 +683,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
                return -ENOMEM;
        }
 
+       inode_lock(parent->d_inode);
        list_for_each_entry_srcu(ei_child, &ei->children, list,
                                 srcu_read_lock_held(&eventfs_srcu)) {
                d = create_dir_dentry(ei, ei_child, parent, false);
@@ -730,6 +716,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
                        cnt++;
                }
        }
+       inode_unlock(parent->d_inode);
        srcu_read_unlock(&eventfs_srcu, idx);
        ret = dcache_dir_open(inode, file);
 
index 5b54948514fe21de97b1fe0c35b3fa2a1d91de46..ae648deed019cc72e12bcbb3ca788eee78008d7b 100644 (file)
@@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
        struct dentry *dentry;
        int error;
 
+       /* Must always have a parent. */
+       if (WARN_ON_ONCE(!parent))
+               return ERR_PTR(-EINVAL);
+
        error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
                              &tracefs_mount_count);
        if (error)
                return ERR_PTR(error);
 
-       /*
-        * If the parent is not specified, we create it in the root.
-        * We need the root dentry to do this, which is in the super
-        * block. A pointer to that is in the struct vfsmount that we
-        * have around.
-        */
-       if (!parent)
-               parent = tracefs_mount->mnt_root;
-
        if (unlikely(IS_DEADDIR(parent->d_inode)))
                dentry = ERR_PTR(-ENOENT);
        else
index ed0bc8cbc703d9c345f121f1226e13f419b511d4..567fb37274d35a796756ab86e7437bdb8fdf9479 100644 (file)
@@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS
        bool "XFS online metadata check usage data collection"
        default y
        depends on XFS_ONLINE_SCRUB
-       select XFS_DEBUG
+       select DEBUG_FS
        help
          If you say Y here, the kernel will gather usage data about
          the online metadata check subsystem.  This includes the number
index 3069194527dd06791d54cb74f865a663bb0b1624..100ab5931b3132e8b3de4daab94de0dc16f10e07 100644 (file)
@@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist(
 
        ASSERT(mp->m_alloc_maxlevels > 0);
 
+       /*
+        * For a btree shorter than the maximum height, the worst case is that
+        * every level gets split and a new level is added, then while inserting
+        * another entry to refill the AGFL, every level under the old root gets
+        * split again. This is:
+        *
+        *   (full height split reservation) + (AGFL refill split height)
+        * = (current height + 1) + (current height - 1)
+        * = (new height) + (new height - 2)
+        * = 2 * new height - 2
+        *
+        * For a btree of maximum height, the worst case is that every level
+        * under the root gets split, then while inserting another entry to
+        * refill the AGFL, every level under the root gets split again. This is
+        * also:
+        *
+        *   2 * (current height - 1)
+        * = 2 * (new height - 1)
+        * = 2 * new height - 2
+        */
+
        /* space needed by-bno freespace btree */
        min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
-                                      mp->m_alloc_maxlevels);
+                                      mp->m_alloc_maxlevels) * 2 - 2;
        /* space needed by-size freespace btree */
        min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
-                                      mp->m_alloc_maxlevels);
+                                      mp->m_alloc_maxlevels) * 2 - 2;
        /* space needed reverse mapping used space btree */
        if (xfs_has_rmapbt(mp))
                min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
-                                               mp->m_rmap_maxlevels);
+                                               mp->m_rmap_maxlevels) * 2 - 2;
 
        return min_free;
 }
index bcfb6a4203cdd9cb00bc7a25e0ec112df214e228..f71679ce23b95da4d07536581d15711eafa7fd46 100644 (file)
@@ -245,21 +245,18 @@ xfs_defer_create_intents(
        return ret;
 }
 
-/* Abort all the intents that were committed. */
 STATIC void
-xfs_defer_trans_abort(
-       struct xfs_trans                *tp,
-       struct list_head                *dop_pending)
+xfs_defer_pending_abort(
+       struct xfs_mount                *mp,
+       struct list_head                *dop_list)
 {
        struct xfs_defer_pending        *dfp;
        const struct xfs_defer_op_type  *ops;
 
-       trace_xfs_defer_trans_abort(tp, _RET_IP_);
-
        /* Abort intent items that don't have a done item. */
-       list_for_each_entry(dfp, dop_pending, dfp_list) {
+       list_for_each_entry(dfp, dop_list, dfp_list) {
                ops = defer_op_types[dfp->dfp_type];
-               trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
+               trace_xfs_defer_pending_abort(mp, dfp);
                if (dfp->dfp_intent && !dfp->dfp_done) {
                        ops->abort_intent(dfp->dfp_intent);
                        dfp->dfp_intent = NULL;
@@ -267,6 +264,16 @@ xfs_defer_trans_abort(
        }
 }
 
+/* Abort all the intents that were committed. */
+STATIC void
+xfs_defer_trans_abort(
+       struct xfs_trans                *tp,
+       struct list_head                *dop_pending)
+{
+       trace_xfs_defer_trans_abort(tp, _RET_IP_);
+       xfs_defer_pending_abort(tp->t_mountp, dop_pending);
+}
+
 /*
  * Capture resources that the caller said not to release ("held") when the
  * transaction commits.  Caller is responsible for zero-initializing @dres.
@@ -756,12 +763,13 @@ xfs_defer_ops_capture(
 
 /* Release all resources that we used to capture deferred ops. */
 void
-xfs_defer_ops_capture_free(
+xfs_defer_ops_capture_abort(
        struct xfs_mount                *mp,
        struct xfs_defer_capture        *dfc)
 {
        unsigned short                  i;
 
+       xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
        xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
 
        for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
@@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit(
        /* Commit the transaction and add the capture structure to the list. */
        error = xfs_trans_commit(tp);
        if (error) {
-               xfs_defer_ops_capture_free(mp, dfc);
+               xfs_defer_ops_capture_abort(mp, dfc);
                return error;
        }
 
index 114a3a4930a3c47080a1e3e38bf06f3b17cb1bda..8788ad5f6a731fbe0fa20faa641eeaa8679a0a98 100644 (file)
@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
                struct list_head *capture_list);
 void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
                struct xfs_defer_resources *dres);
-void xfs_defer_ops_capture_free(struct xfs_mount *mp,
+void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
                struct xfs_defer_capture *d);
 void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
 
index 543f3748c2a35334c7c95fda0bbea1d4f5d2bbdf..137a65bda95dc1922d5ecab62ec9c7d358ac1b4a 100644 (file)
@@ -510,6 +510,9 @@ xfs_dinode_verify(
        if (mode && nextents + naextents > nblocks)
                return __this_address;
 
+       if (nextents + naextents == 0 && nblocks != 0)
+               return __this_address;
+
        if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
                return __this_address;
 
index ac6ba646624df5e2a52a16750ced6c3b7da4ca08..a013b87ab8d5e5f7ffcada368e8e7d4c4cb5c0f7 100644 (file)
@@ -562,7 +562,8 @@ xfs_dquot_from_disk(
        struct xfs_dquot        *dqp,
        struct xfs_buf          *bp)
 {
-       struct xfs_disk_dquot   *ddqp = bp->b_addr + dqp->q_bufoffset;
+       struct xfs_dqblk        *dqb = xfs_buf_offset(bp, dqp->q_bufoffset);
+       struct xfs_disk_dquot   *ddqp = &dqb->dd_diskdq;
 
        /*
         * Ensure that we got the type and ID we were looking for.
@@ -1250,7 +1251,7 @@ xfs_qm_dqflush(
        }
 
        /* Flush the incore dquot to the ondisk buffer. */
-       dqblk = bp->b_addr + dqp->q_bufoffset;
+       dqblk = xfs_buf_offset(bp, dqp->q_bufoffset);
        xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp);
 
        /*
index 8966ba842395bfddfcf5fc903600b0bcbf6263aa..2c2720ce692382d3f4a3658e7190fc0f0840e275 100644 (file)
@@ -19,6 +19,7 @@
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
+#include "xfs_error.h"
 
 STATIC void
 xlog_recover_dquot_ra_pass2(
@@ -65,6 +66,7 @@ xlog_recover_dquot_commit_pass2(
 {
        struct xfs_mount                *mp = log->l_mp;
        struct xfs_buf                  *bp;
+       struct xfs_dqblk                *dqb;
        struct xfs_disk_dquot           *ddq, *recddq;
        struct xfs_dq_logformat         *dq_f;
        xfs_failaddr_t                  fa;
@@ -130,14 +132,14 @@ xlog_recover_dquot_commit_pass2(
                return error;
 
        ASSERT(bp);
-       ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
+       dqb = xfs_buf_offset(bp, dq_f->qlf_boffset);
+       ddq = &dqb->dd_diskdq;
 
        /*
         * If the dquot has an LSN in it, recover the dquot only if it's less
         * than the lsn of the transaction we are replaying.
         */
        if (xfs_has_crc(mp)) {
-               struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
                xfs_lsn_t       lsn = be64_to_cpu(dqb->dd_lsn);
 
                if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
@@ -147,10 +149,23 @@ xlog_recover_dquot_commit_pass2(
 
        memcpy(ddq, recddq, item->ri_buf[1].i_len);
        if (xfs_has_crc(mp)) {
-               xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+               xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
                                 XFS_DQUOT_CRC_OFF);
        }
 
+       /* Validate the recovered dquot. */
+       fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id);
+       if (fa) {
+               XFS_CORRUPTION_ERROR("Bad dquot after recovery",
+                               XFS_ERRLEVEL_LOW, mp, dqb,
+                               sizeof(struct xfs_dqblk));
+               xfs_alert(mp,
+ "Metadata corruption detected at %pS, dquot 0x%x",
+                               fa, dq_f->qlf_id);
+               error = -EFSCORRUPTED;
+               goto out_release;
+       }
+
        ASSERT(dq_f->qlf_size == 2);
        ASSERT(bp->b_mount == mp);
        bp->b_flags |= _XBF_LOGRECOVERY;
index 3dc47937da5d17d81e46fd590435f574d11558dc..3beb470f18920d6730b32e5d1edcf5530b93b327 100644 (file)
@@ -569,6 +569,14 @@ extern void xfs_setup_inode(struct xfs_inode *ip);
 extern void xfs_setup_iops(struct xfs_inode *ip);
 extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
 
+static inline void xfs_update_stable_writes(struct xfs_inode *ip)
+{
+       if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev))
+               mapping_set_stable_writes(VFS_I(ip)->i_mapping);
+       else
+               mapping_clear_stable_writes(VFS_I(ip)->i_mapping);
+}
+
 /*
  * When setting up a newly allocated inode, we need to call
  * xfs_finish_inode_setup() once the inode is fully instantiated at
index 0e5dba2343ea13e2e0451178097f6ad247371724..144198a6b2702c9f825bd9ad22fe10cd1085ea65 100644 (file)
@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2(
        struct xfs_log_dinode           *ldip;
        uint                            isize;
        int                             need_free = 0;
+       xfs_failaddr_t                  fa;
 
        if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
                in_f = item->ri_buf[0].i_addr;
@@ -369,24 +370,26 @@ xlog_recover_inode_commit_pass2(
         * superblock flag to determine whether we need to look at di_flushiter
         * to skip replay when the on disk inode is newer than the log one
         */
-       if (!xfs_has_v3inodes(mp) &&
-           ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
-               /*
-                * Deal with the wrap case, DI_MAX_FLUSH is less
-                * than smaller numbers
-                */
-               if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
-                   ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
-                       /* do nothing */
-               } else {
-                       trace_xfs_log_recover_inode_skip(log, in_f);
-                       error = 0;
-                       goto out_release;
+       if (!xfs_has_v3inodes(mp)) {
+               if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+                       /*
+                        * Deal with the wrap case, DI_MAX_FLUSH is less
+                        * than smaller numbers
+                        */
+                       if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
+                           ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+                               /* do nothing */
+                       } else {
+                               trace_xfs_log_recover_inode_skip(log, in_f);
+                               error = 0;
+                               goto out_release;
+                       }
                }
+
+               /* Take the opportunity to reset the flush iteration count */
+               ldip->di_flushiter = 0;
        }
 
-       /* Take the opportunity to reset the flush iteration count */
-       ldip->di_flushiter = 0;
 
        if (unlikely(S_ISREG(ldip->di_mode))) {
                if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -528,8 +531,19 @@ out_owner_change:
            (dip->di_mode != 0))
                error = xfs_recover_inode_owner_change(mp, dip, in_f,
                                                       buffer_list);
-       /* re-generate the checksum. */
+       /* re-generate the checksum and validate the recovered inode. */
        xfs_dinode_calc_crc(log->l_mp, dip);
+       fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
+       if (fa) {
+               XFS_CORRUPTION_ERROR(
+                       "Bad dinode after recovery",
+                               XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
+               xfs_alert(mp,
+                       "Metadata corruption detected at %pS, inode 0x%llx",
+                       fa, in_f->ilf_ino);
+               error = -EFSCORRUPTED;
+               goto out_release;
+       }
 
        ASSERT(bp->b_mount == mp);
        bp->b_flags |= _XBF_LOGRECOVERY;
index a82470e027f7278ac4b3271cd67a27ed97235c77..6c3919687ea6b306585ea2f32f125c59a4b53a60 100644 (file)
@@ -1121,23 +1121,25 @@ xfs_ioctl_setattr_xflags(
        struct fileattr         *fa)
 {
        struct xfs_mount        *mp = ip->i_mount;
+       bool                    rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME);
        uint64_t                i_flags2;
 
-       /* Can't change realtime flag if any extents are allocated. */
-       if ((ip->i_df.if_nextents || ip->i_delayed_blks) &&
-           XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
-               return -EINVAL;
+       if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
+               /* Can't change realtime flag if any extents are allocated. */
+               if (ip->i_df.if_nextents || ip->i_delayed_blks)
+                       return -EINVAL;
+       }
 
-       /* If realtime flag is set then must have realtime device */
-       if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
+       if (rtflag) {
+               /* If realtime flag is set then must have realtime device */
                if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
                    xfs_extlen_to_rtxmod(mp, ip->i_extsize))
                        return -EINVAL;
-       }
 
-       /* Clear reflink if we are actually able to set the rt flag. */
-       if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
-               ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
+               /* Clear reflink if we are actually able to set the rt flag. */
+               if (xfs_is_reflink_inode(ip))
+                       ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
+       }
 
        /* diflags2 only valid for v3 inodes. */
        i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
@@ -1148,6 +1150,14 @@ xfs_ioctl_setattr_xflags(
        ip->i_diflags2 = i_flags2;
 
        xfs_diflags_to_iflags(ip, false);
+
+       /*
+        * Make the stable writes flag match that of the device the inode
+        * resides on when flipping the RT flag.
+        */
+       if (rtflag != XFS_IS_REALTIME_INODE(ip) && S_ISREG(VFS_I(ip)->i_mode))
+               xfs_update_stable_writes(ip);
+
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        XFS_STATS_INC(mp, xs_ig_attrchg);
index fdfda4fba12b1e9afd3b8631ec8587ac7295ce13..a0d77f5f512e2412c0e4c89aefacdd5c7c52c00c 100644 (file)
@@ -1298,6 +1298,13 @@ xfs_setup_inode(
        gfp_mask = mapping_gfp_mask(inode->i_mapping);
        mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
 
+       /*
+        * For real-time inodes update the stable write flags to that of the RT
+        * device instead of the data device.
+        */
+       if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip))
+               xfs_update_stable_writes(ip);
+
        /*
         * If there is no attribute fork no ACL can exist on this inode,
         * and it can't have any file capabilities attached to it either.
index 51c100c861770f619776a8e3efb921d0b2e55bd8..ee206facf0dc065d4328007f7f32c089989c9c11 100644 (file)
@@ -1893,9 +1893,7 @@ xlog_write_iclog(
                 * the buffer manually, the code needs to be kept in sync
                 * with the I/O completion path.
                 */
-               xlog_state_done_syncing(iclog);
-               up(&iclog->ic_sema);
-               return;
+               goto sync;
        }
 
        /*
@@ -1925,20 +1923,17 @@ xlog_write_iclog(
                 * avoid shutdown re-entering this path and erroring out again.
                 */
                if (log->l_targ != log->l_mp->m_ddev_targp &&
-                   blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
-                       xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
-                       return;
-               }
+                   blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
+                       goto shutdown;
        }
        if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
                iclog->ic_bio.bi_opf |= REQ_FUA;
 
        iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
 
-       if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
-               xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
-               return;
-       }
+       if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
+               goto shutdown;
+
        if (is_vmalloc_addr(iclog->ic_data))
                flush_kernel_vmap_range(iclog->ic_data, count);
 
@@ -1959,6 +1954,12 @@ xlog_write_iclog(
        }
 
        submit_bio(&iclog->ic_bio);
+       return;
+shutdown:
+       xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+sync:
+       xlog_state_done_syncing(iclog);
+       up(&iclog->ic_sema);
 }
 
 /*
index 13b94d2e605bd9ddb852f76eeeea0bd956a595e0..a1e18b24971a28eedcf79b4d43b16778b42133f8 100644 (file)
@@ -2511,7 +2511,7 @@ xlog_abort_defer_ops(
 
        list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
                list_del_init(&dfc->dfc_list);
-               xfs_defer_ops_capture_free(mp, dfc);
+               xfs_defer_ops_capture_abort(mp, dfc);
        }
 }
 
index 658edee8381dcdca656135a74a9b5a1b6e13a8cb..e5b62dc2846644c142b04423fbec10e57d81c34d 100644 (file)
@@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent(
                }
        }
        del = got;
+       xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
 
        /* Grab the corresponding mapping in the data fork. */
        nmaps = 1;
index afeed6e72049e470f836d79753971aa392359662..1216d72c650faee69165b6b9f1545f41b3b9954c 100644 (file)
@@ -542,6 +542,7 @@ int acpi_device_set_power(struct acpi_device *device, int state);
 int acpi_bus_init_power(struct acpi_device *device);
 int acpi_device_fix_up_power(struct acpi_device *device);
 void acpi_device_fix_up_power_extended(struct acpi_device *adev);
+void acpi_device_fix_up_power_children(struct acpi_device *adev);
 int acpi_bus_update_power(acpi_handle handle, int *state_p);
 int acpi_device_update_power(struct acpi_device *device, int *state_p);
 bool acpi_bus_power_manageable(acpi_handle handle);
index 995513fa26904afa113d70e8b0b408af13ee9c65..0655aa5b57b29066286b9c94f477e7d6593f88b8 100644 (file)
@@ -70,7 +70,7 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
  */
 static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
 {
-       return !atomic_read(&lock.val);
+       return !lock.val.counter;
 }
 
 /**
index b4825d3cdb292304bb256b40c54ac254dddb1e2d..6762dac3ef76153fe96bbd05a1050b9a31d1a43d 100644 (file)
@@ -56,7 +56,7 @@ extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
 extern struct kobject *btf_kobj;
 extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
-extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
+extern bool bpf_global_ma_set;
 
 typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -909,10 +909,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
        aux->ctx_field_size = size;
 }
 
+static bool bpf_is_ldimm64(const struct bpf_insn *insn)
+{
+       return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
 static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
 {
-       return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
-              insn->src_reg == BPF_PSEUDO_FUNC;
+       return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
 }
 
 struct bpf_prog_ops {
index 24213a99cc79db4c41ad17f8b28d23bcc7a8be9f..aa4d19d0bc94bb41da7ef238b3b3e7ebc98d7ced 100644 (file)
@@ -301,6 +301,17 @@ struct bpf_func_state {
        struct tnum callback_ret_range;
        bool in_async_callback_fn;
        bool in_exception_callback_fn;
+       /* For callback calling functions that limit number of possible
+        * callback executions (e.g. bpf_loop) keeps track of current
+        * simulated iteration number.
+        * Value in frame N refers to number of times callback with frame
+        * N+1 was simulated, e.g. for the following call:
+        *
+        *   bpf_loop(..., fn, ...); | suppose current frame is N
+        *                           | fn would be simulated in frame N+1
+        *                           | number of simulations is tracked in frame N
+        */
+       u32 callback_depth;
 
        /* The following fields should be last. See copy_func_state() */
        int acquired_refs;
@@ -400,6 +411,7 @@ struct bpf_verifier_state {
        struct bpf_idx_pair *jmp_history;
        u32 jmp_history_cnt;
        u32 dfs_depth;
+       u32 callback_unroll_depth;
 };
 
 #define bpf_get_spilled_reg(slot, frame, mask)                         \
@@ -511,6 +523,10 @@ struct bpf_insn_aux_data {
         * this instruction, regardless of any heuristics
         */
        bool force_checkpoint;
+       /* true if instruction is a call to a helper function that
+        * accepts callback function as a parameter.
+        */
+       bool calls_callback;
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
index de7bb47d8a46ace38d95a81ed6df231d91ac725b..c554c6a08768ad60cdf529a65cf962095363a4a9 100644 (file)
 
 struct closure;
 struct closure_syncer;
-typedef void (closure_fn) (struct closure *);
+typedef void (closure_fn) (struct work_struct *);
 extern struct dentry *bcache_debug;
 
 struct closure_waitlist {
@@ -254,7 +254,7 @@ static inline void closure_queue(struct closure *cl)
                INIT_WORK(&cl->work, cl->work.func);
                BUG_ON(!queue_work(wq, &cl->work));
        } else
-               cl->fn(cl);
+               cl->fn(&cl->work);
 }
 
 /**
@@ -309,6 +309,11 @@ static inline void closure_wake_up(struct closure_waitlist *list)
        __closure_wake_up(list);
 }
 
+#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws)
+#define closure_type(name, type, member)                               \
+       struct closure *cl = container_of(ws, struct closure, work);    \
+       type *name = container_of(cl, type, member)
+
 /**
  * continue_at - jump to another function with barrier
  *
index d305db70674bb539564fda04b226be40ac6acbbd..efc0c0b07efb41b7d4856ab290f8a836319a34d1 100644 (file)
@@ -195,6 +195,7 @@ enum cpuhp_state {
        CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
        CPUHP_AP_ARM64_ISNDEP_STARTING,
        CPUHP_AP_SMPCFD_DYING,
+       CPUHP_AP_HRTIMERS_DYING,
        CPUHP_AP_X86_TBOOT_DYING,
        CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
        CPUHP_AP_ONLINE,
index ea2d919fd9c7990061ba4f469f92bbe5880a7b45..c9c65b132c0fd7fcf12c95c0ed50281bb0e66efa 100644 (file)
@@ -171,6 +171,25 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
 ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
                              size_t count, loff_t *ppos);
 
+/**
+ * struct debugfs_cancellation - cancellation data
+ * @list: internal, for keeping track
+ * @cancel: callback to call
+ * @cancel_data: extra data for the callback to call
+ */
+struct debugfs_cancellation {
+       struct list_head list;
+       void (*cancel)(struct dentry *, void *);
+       void *cancel_data;
+};
+
+void __acquires(cancellation)
+debugfs_enter_cancellation(struct file *file,
+                          struct debugfs_cancellation *cancellation);
+void __releases(cancellation)
+debugfs_leave_cancellation(struct file *file,
+                          struct debugfs_cancellation *cancellation);
+
 #else
 
 #include <linux/err.h>
index 45fca09b231943ea38f56f5079ba7e431074f8a9..69501e0ec239f93d7008b2446c2ac23e2c7e11d2 100644 (file)
@@ -50,9 +50,7 @@
            "   .previous"                                              "\n"    \
        )
 
-#ifdef CONFIG_IA64
-#define KSYM_FUNC(name)                @fptr(name)
-#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
 #define KSYM_FUNC(name)                P%name
 #else
 #define KSYM_FUNC(name)                name
index 5a8387a4a7126303adcee9553cce3cfb7599e1b2..bf43f3ff666400fb8a5d0fcef9e1c20773dcde1e 100644 (file)
@@ -679,6 +679,7 @@ struct hid_device {                                                 /* device report descriptor */
        struct list_head debug_list;
        spinlock_t  debug_list_lock;
        wait_queue_head_t debug_wait;
+       struct kref                     ref;
 
        unsigned int id;                                                /* system unique id */
 
@@ -687,6 +688,8 @@ struct hid_device {                                                 /* device report descriptor */
 #endif /* CONFIG_BPF */
 };
 
+void hiddev_free(struct kref *ref);
+
 #define to_hid_device(pdev) \
        container_of(pdev, struct hid_device, dev)
 
index 0ee140176f102f4eed0986f6eebb41c9ec839870..f2044d5a652b5c6eed1652f2a7f1c23050d854a6 100644 (file)
@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);
 
 int hrtimers_prepare_cpu(unsigned int cpu);
 #ifdef CONFIG_HOTPLUG_CPU
-int hrtimers_dead_cpu(unsigned int cpu);
+int hrtimers_cpu_dying(unsigned int cpu);
 #else
-#define hrtimers_dead_cpu      NULL
+#define hrtimers_cpu_dying     NULL
 #endif
 
 #endif
index 958771bac9c0295f2a7ee57f56cac436933626fd..c2ac9e9e7ee9a8bc80ebaa6465875418bdabe397 100644 (file)
@@ -2830,12 +2830,14 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
 static inline const struct ieee80211_he_6ghz_oper *
 ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper)
 {
-       const u8 *ret = (const void *)&he_oper->optional;
+       const u8 *ret;
        u32 he_oper_params;
 
        if (!he_oper)
                return NULL;
 
+       ret = (const void *)&he_oper->optional;
+
        he_oper_params = le32_to_cpu(he_oper->he_oper_params);
 
        if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO))
index 8fa23bdcedbf98f442ec37379bb88211c43ee33f..007fd9c3e4b62cc93a9bf54a1c6502c95feab7b7 100644 (file)
@@ -420,7 +420,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising)
  * A function that translates value of following registers to the linkmode:
  * IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20)
  * IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60)
- * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61)
+ * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61)
  */
 static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val)
 {
index a16c9cc063fe0efbaefc7d4ce678846abbd1046e..2564e209465ea8c1cac107402e51c8562eff047e 100644 (file)
@@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type {
        ML_PRIV_CAN,
 };
 
+enum netdev_stat_type {
+       NETDEV_PCPU_STAT_NONE,
+       NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
+       NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
+       NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
+};
+
 /**
  *     struct net_device - The DEVICE structure.
  *
@@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type {
  *
  *     @ml_priv:       Mid-layer private
  *     @ml_priv_type:  Mid-layer private type
- *     @lstats:        Loopback statistics
- *     @tstats:        Tunnel statistics
- *     @dstats:        Dummy statistics
- *     @vstats:        Virtual ethernet statistics
+ *
+ *     @pcpu_stat_type:        Type of device statistics which the core should
+ *                             allocate/free: none, lstats, tstats, dstats. none
+ *                             means the driver is handling statistics allocation/
+ *                             freeing internally.
+ *     @lstats:                Loopback statistics: packets, bytes
+ *     @tstats:                Tunnel statistics: RX/TX packets, RX/TX bytes
+ *     @dstats:                Dummy statistics: RX/TX/drop packets, RX/TX bytes
  *
  *     @garp_port:     GARP
  *     @mrp_port:      MRP
@@ -2354,6 +2365,7 @@ struct net_device {
        void                            *ml_priv;
        enum netdev_ml_priv_type        ml_priv_type;
 
+       enum netdev_stat_type           pcpu_stat_type:8;
        union {
                struct pcpu_lstats __percpu             *lstats;
                struct pcpu_sw_netstats __percpu        *tstats;
@@ -2755,6 +2767,16 @@ struct pcpu_sw_netstats {
        struct u64_stats_sync   syncp;
 } __aligned(4 * sizeof(u64));
 
+struct pcpu_dstats {
+       u64                     rx_packets;
+       u64                     rx_bytes;
+       u64                     rx_drops;
+       u64                     tx_packets;
+       u64                     tx_bytes;
+       u64                     tx_drops;
+       struct u64_stats_sync   syncp;
+} __aligned(8 * sizeof(u64));
+
 struct pcpu_lstats {
        u64_stats_t packets;
        u64_stats_t bytes;
index bcc1ea44b4e8541aea3cfe5fc601d2a9bf8c4295..06142ff7f9ce0ef0c600c3aa24f68b1bd5450bdb 100644 (file)
@@ -204,6 +204,8 @@ enum mapping_flags {
        AS_NO_WRITEBACK_TAGS = 5,
        AS_LARGE_FOLIO_SUPPORT = 6,
        AS_RELEASE_ALWAYS,      /* Call ->release_folio(), even if no private data */
+       AS_STABLE_WRITES,       /* must wait for writeback before modifying
+                                  folio contents */
 };
 
 /**
@@ -289,6 +291,21 @@ static inline void mapping_clear_release_always(struct address_space *mapping)
        clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 }
 
+static inline bool mapping_stable_writes(const struct address_space *mapping)
+{
+       return test_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_set_stable_writes(struct address_space *mapping)
+{
+       set_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_clear_stable_writes(struct address_space *mapping)
+{
+       clear_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
        return mapping->gfp_mask;
index afb028c54f3397c50dd2163674a24cae61c47997..5547ba68e6e47d851915d1671b0a2a49bfefa4a1 100644 (file)
@@ -843,11 +843,11 @@ struct perf_event {
 };
 
 /*
- *           ,-----------------------[1:n]----------------------.
- *           V                                                  V
- * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
- *           ^                      ^     |                     |
- *           `--------[1:n]---------'     `-[n:1]-> pmu <-[1:n]-'
+ *           ,-----------------------[1:n]------------------------.
+ *           V                                                    V
+ * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
+ *                                        |                       |
+ *                                        `--[n:1]-> pmu <-[1:n]--'
  *
  *
  * struct perf_event_pmu_context  lifetime is refcount based and RCU freed
@@ -865,6 +865,9 @@ struct perf_event {
  * ctx->mutex pinning the configuration. Since we hold a reference on
  * group_leader (through the filedesc) it can't go away, therefore it's
  * associated pmu_ctx must exist and cannot change due to ctx->mutex.
+ *
+ * perf_event holds a refcount on perf_event_context
+ * perf_event holds a refcount on perf_event_pmu_context
  */
 struct perf_event_pmu_context {
        struct pmu                      *pmu;
index c1637515a8a41613580eb6006fd17f9ad0a8733b..c953b8c0d2f4339a647b93ef0c2b8796010181b1 100644 (file)
@@ -106,6 +106,7 @@ struct sk_psock {
        struct mutex                    work_mutex;
        struct sk_psock_work_state      work_state;
        struct delayed_work             work;
+       struct sock                     *sk_pair;
        struct rcu_work                 rwork;
 };
 
index c36e7a3b45e7e73fc24dcf1a836d568b0dcb85d1..3be2cb564710b5a7be3de43903c5786e15f704ad 100644 (file)
@@ -14,6 +14,7 @@
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 #include <asm/stacktrace.h>
+#include <linux/linkage.h>
 
 /*
  * The lowest address on tsk's stack which we can plausibly erase.
@@ -76,6 +77,11 @@ static inline void stackleak_task_init(struct task_struct *t)
 # endif
 }
 
+asmlinkage void noinstr stackleak_erase(void);
+asmlinkage void noinstr stackleak_erase_on_task_stack(void);
+asmlinkage void noinstr stackleak_erase_off_task_stack(void);
+void __no_caller_saved_registers noinstr stackleak_track_stack(void);
+
 #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
 static inline void stackleak_task_init(struct task_struct *t) { }
 #endif
index b513749582d775a5f6ee0e87cb0bf98fcd988959..e4de6bc1f69b6287cb49882c3235b824bb474d13 100644 (file)
@@ -144,10 +144,6 @@ struct usb_phy {
         */
        int     (*set_wakeup)(struct usb_phy *x, bool enabled);
 
-       /* notify phy port status change */
-       int     (*notify_port_status)(struct usb_phy *x, int port,
-                                     u16 portstatus, u16 portchange);
-
        /* notify phy connect status change */
        int     (*notify_connect)(struct usb_phy *x,
                        enum usb_device_speed speed);
@@ -320,15 +316,6 @@ usb_phy_set_wakeup(struct usb_phy *x, bool enabled)
                return 0;
 }
 
-static inline int
-usb_phy_notify_port_status(struct usb_phy *x, int port, u16 portstatus, u16 portchange)
-{
-       if (x && x->notify_port_status)
-               return x->notify_port_status(x, port, portstatus, portchange);
-       else
-               return 0;
-}
-
 static inline int
 usb_phy_notify_connect(struct usb_phy *x, enum usb_device_speed speed)
 {
index d0f2797420f7044616c7c7ef9faccc956acf5a7c..a09e13a577a99a0f91916ad121329dde11f42b1f 100644 (file)
@@ -5,13 +5,6 @@
 #include <linux/pci.h>
 #include <linux/virtio_pci.h>
 
-struct virtio_pci_modern_common_cfg {
-       struct virtio_pci_common_cfg cfg;
-
-       __le16 queue_notify_data;       /* read-write */
-       __le16 queue_reset;             /* read-write */
-};
-
 /**
  * struct virtio_pci_modern_device - info for modern PCI virtio
  * @pci_dev:       Ptr to the PCI device struct
index 824c258143a3ab360b870fda38ba684b70068eee..49c4640027d8a6b93e903a6238d21e8541e31da4 100644 (file)
@@ -75,6 +75,7 @@ struct unix_sock {
 };
 
 #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk)
+#define unix_peer(sk) (unix_sk(sk)->peer)
 
 #define peer_wait peer_wq.wait
 
index b137a33a1b6892190dfa4e9067cf1045b82cf4b4..4ecfb06c413dbfd468467840dc5d1a178c6f5869 100644 (file)
@@ -9299,4 +9299,50 @@ bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
  */
 void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
 
+#ifdef CONFIG_CFG80211_DEBUGFS
+/**
+ * wiphy_locked_debugfs_read - do a locked read in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being read
+ * @buf: the buffer to fill and then read from
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy to
+ * @count: read count
+ * @ppos: read position
+ * @handler: the read handler to call (under wiphy lock)
+ * @data: additional data to pass to the read handler
+ */
+ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
+                                 char *buf, size_t bufsize,
+                                 char __user *userbuf, size_t count,
+                                 loff_t *ppos,
+                                 ssize_t (*handler)(struct wiphy *wiphy,
+                                                    struct file *file,
+                                                    char *buf,
+                                                    size_t bufsize,
+                                                    void *data),
+                                 void *data);
+
+/**
+ * wiphy_locked_debugfs_write - do a locked write in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being written to
+ * @buf: the buffer to copy the user data to
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy from
+ * @count: read count
+ * @handler: the write handler to call (under wiphy lock)
+ * @data: additional data to pass to the write handler
+ */
+ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file,
+                                  char *buf, size_t bufsize,
+                                  const char __user *userbuf, size_t count,
+                                  ssize_t (*handler)(struct wiphy *wiphy,
+                                                     struct file *file,
+                                                     char *buf,
+                                                     size_t count,
+                                                     void *data),
+                                  void *data);
+#endif
+
 #endif /* __NET_CFG80211_H */
index 07022bb0d44d4b5eef5812cc86e042833cf3a337..0d28172193fa634ec08c85007ca851da7f56d7ee 100644 (file)
@@ -162,7 +162,7 @@ struct neighbour {
        struct rcu_head         rcu;
        struct net_device       *dev;
        netdevice_tracker       dev_tracker;
-       u8                      primary_key[0];
+       u8                      primary_key[];
 } __randomize_layout;
 
 struct neigh_ops {
index 3bbd13ab1ecf590bf8d8f040b072653d736b40b8..b157c5cafd14cfe307f3d36ad533d528f142eea6 100644 (file)
@@ -178,9 +178,9 @@ static inline __be32 nft_reg_load_be32(const u32 *sreg)
        return *(__force __be32 *)sreg;
 }
 
-static inline void nft_reg_store64(u32 *dreg, u64 val)
+static inline void nft_reg_store64(u64 *dreg, u64 val)
 {
-       put_unaligned(val, (u64 *)dreg);
+       put_unaligned(val, dreg);
 }
 
 static inline u64 nft_reg_load64(const u32 *sreg)
index 0ba2e6b847ca53de6d2bcb480c0d615cc8f2dcc0..9ec0163739f45156657cb0d1b0f895514dd022a9 100644 (file)
@@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
 int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
+INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev));
 #else
 static inline int netkit_prog_attach(const union bpf_attr *attr,
                                     struct bpf_prog *prog)
@@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr,
 {
        return -EINVAL;
 }
+
+static inline struct net_device *netkit_peer_dev(struct net_device *dev)
+{
+       return NULL;
+}
 #endif /* CONFIG_NETKIT */
 #endif /* __NET_NETKIT_H */
index 8a6dbfb233362b2ef0a85e5738d5790ccc0d0a4b..77f87c622a2ef4c2ab7d19277fc95264ed28c9fb 100644 (file)
@@ -58,6 +58,11 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
        return to_ct_params(a)->nf_ft;
 }
 
+static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
+{
+       return to_ct_params(a)->helper;
+}
+
 #else
 static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; }
 static inline int tcf_ct_action(const struct tc_action *a) { return 0; }
@@ -65,6 +70,10 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
 {
        return NULL;
 }
+static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
+{
+       return NULL;
+}
 #endif /* CONFIG_NF_CONNTRACK */
 
 #if IS_ENABLED(CONFIG_NET_ACT_CT)
index 10480eb582b2a6dd3503112be4e4b0c4731f4f51..5ec1e71a09de7698616dff799a935da15083deef 100644 (file)
@@ -167,19 +167,25 @@ struct scsi_device {
         * power state for system suspend/resume (suspend to RAM and
         * hibernation) operations.
         */
-       bool manage_system_start_stop;
+       unsigned manage_system_start_stop:1;
 
        /*
         * If true, let the high-level device driver (sd) manage the device
         * power state for runtime device suspand and resume operations.
         */
-       bool manage_runtime_start_stop;
+       unsigned manage_runtime_start_stop:1;
 
        /*
         * If true, let the high-level device driver (sd) manage the device
         * power state for system shutdown (power off) operations.
         */
-       bool manage_shutdown;
+       unsigned manage_shutdown:1;
+
+       /*
+        * If set and if the device is runtime suspended, ask the high-level
+        * device driver (sd) to force a runtime resume of the device.
+        */
+       unsigned force_runtime_start_on_system_start:1;
 
        unsigned removable:1;
        unsigned changed:1;     /* Data invalid due to media change */
index 4c53a5ef6257b8fcbc6ff87163bd9eebd5d5a685..f7e537f64db457f8cb6004dc90c0b84e216b62ab 100644 (file)
        E_(rxrpc_rtt_tx_ping,                   "PING")
 
 #define rxrpc_rtt_rx_traces \
-       EM(rxrpc_rtt_rx_cancel,                 "CNCL") \
+       EM(rxrpc_rtt_rx_other_ack,              "OACK") \
        EM(rxrpc_rtt_rx_obsolete,               "OBSL") \
        EM(rxrpc_rtt_rx_lost,                   "LOST") \
        EM(rxrpc_rtt_rx_ping_response,          "PONG") \
index c25fc96145947ed3468aebfbad81d22015f5f239..d24e8e121507bb691e3fa272b63b44f3194f6776 100644 (file)
  */
 #define BTRFS_METADATA_ITEM_KEY        169
 
+/*
+ * Special inline ref key which stores the id of the subvolume which originally
+ * created the extent. This subvolume owns the extent permanently from the
+ * perspective of simple quotas. Needed to know which subvolume to free quota
+ * usage from when the extent is deleted.
+ *
+ * Stored as an inline ref rather to avoid wasting space on a separate item on
+ * top of the existing extent item. However, unlike the other inline refs,
+ * there is one one owner ref per extent rather than one per extent.
+ *
+ * Because of this, it goes at the front of the list of inline refs, and thus
+ * must have a lower type value than any other inline ref type (to satisfy the
+ * disk format rule that inline refs have non-decreasing type).
+ */
+#define BTRFS_EXTENT_OWNER_REF_KEY     172
+
 #define BTRFS_TREE_BLOCK_REF_KEY       176
 
 #define BTRFS_EXTENT_DATA_REF_KEY      178
 
 #define BTRFS_SHARED_DATA_REF_KEY      184
 
-/*
- * Special inline ref key which stores the id of the subvolume which originally
- * created the extent. This subvolume owns the extent permanently from the
- * perspective of simple quotas. Needed to know which subvolume to free quota
- * usage from when the extent is deleted.
- */
-#define BTRFS_EXTENT_OWNER_REF_KEY     188
-
 /*
  * block groups give us hints into the extent allocation trees.  Which
  * blocks are free etc etc
index 6c80f96049bd07d1aa527c103acb07fe52bfd617..282e90aeb163c0288590995b38fe011b19e85111 100644 (file)
 #define AT_HANDLE_FID          AT_REMOVEDIR    /* file handle is needed to
                                        compare object identity and may not
                                        be usable to open_by_handle_at(2) */
+#if defined(__KERNEL__)
+#define AT_GETATTR_NOSEC       0x80000000
+#endif
 
 #endif /* _UAPI_LINUX_FCNTL_H */
index 5c6c4269f7efe4d649366907191adfbcffef41e4..2ec6f35cda32e9e61afbbe8708f4bbb5bf1e39f7 100644 (file)
@@ -27,7 +27,7 @@
        union { \
                struct { MEMBERS } ATTRS; \
                struct TAG { MEMBERS } ATTRS NAME; \
-       }
+       } ATTRS
 
 #ifdef __cplusplus
 /* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */
index 4a195b68f28f6d788b17c962c19277ca4d822a1f..b383c2fe0cf3540ac5b1b8403d1f05421d7418f0 100644 (file)
@@ -239,7 +239,7 @@ struct v4l2_subdev_routing {
  * set (which is the default), the 'stream' fields will be forced to 0 by the
  * kernel.
  */
- #define V4L2_SUBDEV_CLIENT_CAP_STREAMS                (1U << 0)
+ #define V4L2_SUBDEV_CLIENT_CAP_STREAMS                (1ULL << 0)
 
 /**
  * struct v4l2_subdev_client_capability - Capabilities of the client accessing
index f703afc7ad31ba0791101585fd95b9a10a48f3ce..44f4dd2add188090ff3b03d859fb4d27009d5479 100644 (file)
@@ -166,6 +166,17 @@ struct virtio_pci_common_cfg {
        __le32 queue_used_hi;           /* read-write */
 };
 
+/*
+ * Warning: do not use sizeof on this: use offsetofend for
+ * specific fields you need.
+ */
+struct virtio_pci_modern_common_cfg {
+       struct virtio_pci_common_cfg cfg;
+
+       __le16 queue_notify_data;       /* read-write */
+       __le16 queue_reset;             /* read-write */
+};
+
 /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
 struct virtio_pci_cfg_cap {
        struct virtio_pci_cap cap;
index 23932b0673dc7459037c16e97642598a16f08877..3b07409f80320e3571ed32fe0877eea631af5655 100644 (file)
@@ -88,7 +88,6 @@ void xen_irq_resume(void);
 
 /* Clear an irq's pending state, in preparation for polling on it */
 void xen_clear_irq_pending(int irq);
-void xen_set_irq_pending(int irq);
 bool xen_test_irq_pending(int irq);
 
 /* Poll waiting for an irq to become pending.  In the usual case, the
@@ -101,8 +100,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout);
 
 /* Determine the IRQ which is bound to an event channel */
 unsigned int irq_from_evtchn(evtchn_port_t evtchn);
-int irq_from_virq(unsigned int cpu, unsigned int virq);
-evtchn_port_t evtchn_from_irq(unsigned irq);
+int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
+                        evtchn_port_t *evtchn);
 
 int xen_set_callback_via(uint64_t via);
 int xen_evtchn_do_upcall(void);
@@ -122,9 +121,6 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 /* De-allocates the above mentioned physical interrupt. */
 int xen_destroy_irq(int irq);
 
-/* Return irq from pirq */
-int xen_irq_from_pirq(unsigned pirq);
-
 /* Return the pirq allocated to the irq. */
 int xen_pirq_from_irq(unsigned irq);
 
index f04a43044d917ceef45fc5a2acfb1910978dd0f3..976e9500f6518cbc121d212af5e80334ef3e2ace 100644 (file)
@@ -145,13 +145,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
        if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
                struct io_sq_data *sq = ctx->sq_data;
 
-               if (mutex_trylock(&sq->lock)) {
-                       if (sq->thread) {
-                               sq_pid = task_pid_nr(sq->thread);
-                               sq_cpu = task_cpu(sq->thread);
-                       }
-                       mutex_unlock(&sq->lock);
-               }
+               sq_pid = sq->task_pid;
+               sq_cpu = sq->sq_cpu;
        }
 
        seq_printf(m, "SqThread:\t%d\n", sq_pid);
index 08e3b175469c685d64186595da74c778210df88a..eccea851dd5a2858936f8f0d9acc6c417614358a 100644 (file)
@@ -254,7 +254,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
        lnk->flags = READ_ONCE(sqe->hardlink_flags);
 
-       lnk->oldpath = getname(oldf);
+       lnk->oldpath = getname_uflags(oldf, lnk->flags);
        if (IS_ERR(lnk->oldpath))
                return PTR_ERR(lnk->oldpath);
 
index 7034be555334d2fe51c17a56ca6bb3b1e3981ac0..f521c5965a9331db5375ecfb1c67f5cda4f0f29c 100644 (file)
@@ -1258,7 +1258,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
                 */
                const struct bio_vec *bvec = imu->bvec;
 
-               if (offset <= bvec->bv_len) {
+               if (offset < bvec->bv_len) {
                        /*
                         * Note, huge pages buffers consists of one large
                         * bvec entry and should always go this way. The other
index bd6c2c7959a5bf26c7a394bcd9cbf1b0e94fe595..65b5dbe3c850ed564432c76f17e64739d430f2fe 100644 (file)
@@ -214,6 +214,7 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
                        did_sig = get_signal(&ksig);
                cond_resched();
                mutex_lock(&sqd->lock);
+               sqd->sq_cpu = raw_smp_processor_id();
        }
        return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
 }
@@ -229,10 +230,15 @@ static int io_sq_thread(void *data)
        snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
        set_task_comm(current, buf);
 
-       if (sqd->sq_cpu != -1)
+       /* reset to our pid after we've set task_comm, for fdinfo */
+       sqd->task_pid = current->pid;
+
+       if (sqd->sq_cpu != -1) {
                set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
-       else
+       } else {
                set_cpus_allowed_ptr(current, cpu_online_mask);
+               sqd->sq_cpu = raw_smp_processor_id();
+       }
 
        mutex_lock(&sqd->lock);
        while (1) {
@@ -261,6 +267,7 @@ static int io_sq_thread(void *data)
                                mutex_unlock(&sqd->lock);
                                cond_resched();
                                mutex_lock(&sqd->lock);
+                               sqd->sq_cpu = raw_smp_processor_id();
                        }
                        continue;
                }
@@ -294,6 +301,7 @@ static int io_sq_thread(void *data)
                                mutex_unlock(&sqd->lock);
                                schedule();
                                mutex_lock(&sqd->lock);
+                               sqd->sq_cpu = raw_smp_processor_id();
                        }
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
                                atomic_andnot(IORING_SQ_NEED_WAKEUP,
index 91e82e34b51e328eed868e025a5873fd6f557534..7a98cd176a127d2ef1726024a34ad9674b54c28f 100644 (file)
@@ -531,7 +531,7 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
        if (tsk != current)
                return 0;
 
-       if (WARN_ON_ONCE(!current->mm))
+       if (!current->mm)
                return 0;
        exe_file = get_mm_exe_file(current->mm);
        if (!exe_file)
index 08626b519ce23fce3cefb585e9aa246e9e43025b..cd3afe57ece3cc9a5a52c20243bdafd7fa987f4f 100644 (file)
@@ -64,8 +64,8 @@
 #define OFF    insn->off
 #define IMM    insn->imm
 
-struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
-bool bpf_global_ma_set, bpf_global_percpu_ma_set;
+struct bpf_mem_alloc bpf_global_ma;
+bool bpf_global_ma_set;
 
 /* No hurry in this branch
  *
@@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void)
 
        ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
        bpf_global_ma_set = !ret;
-       ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
-       bpf_global_percpu_ma_set = !ret;
-       return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
+       return ret;
 }
 late_initcall(bpf_global_ma_init);
 #endif
index 63b909d277d47925c70215adbbc4b11b4e5ad558..6a51cfe4c2d63f573542fd918ff8276b7d3abd33 100644 (file)
@@ -978,6 +978,8 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
                memcg = get_memcg(c);
                old_memcg = set_active_memcg(memcg);
                ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT);
+               if (ret)
+                       *(struct bpf_mem_cache **)ret = c;
                set_active_memcg(old_memcg);
                mem_cgroup_put(memcg);
        }
index bd1c42eb540f1f7565d1d0c10457884c10f7a37a..af2819d5c8ee7ceb10e267abae6b7f3fee4ea089 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/poison.h>
 #include <linux/module.h>
 #include <linux/cpumask.h>
+#include <linux/bpf_mem_alloc.h>
 #include <net/xdp.h>
 
 #include "disasm.h"
@@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
 #undef BPF_LINK_TYPE
 };
 
+struct bpf_mem_alloc bpf_global_percpu_ma;
+static bool bpf_global_percpu_ma_set;
+
 /* bpf_check() is a static code analyzer that walks eBPF program
  * instruction by instruction and updates register/stack state.
  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
@@ -336,6 +340,7 @@ struct bpf_kfunc_call_arg_meta {
 struct btf *btf_vmlinux;
 
 static DEFINE_MUTEX(bpf_verifier_lock);
+static DEFINE_MUTEX(bpf_percpu_ma_lock);
 
 static const struct bpf_line_info *
 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
@@ -542,13 +547,12 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
        return func_id == BPF_FUNC_dynptr_data;
 }
 
-static bool is_callback_calling_kfunc(u32 btf_id);
+static bool is_sync_callback_calling_kfunc(u32 btf_id);
 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
 
-static bool is_callback_calling_function(enum bpf_func_id func_id)
+static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_for_each_map_elem ||
-              func_id == BPF_FUNC_timer_set_callback ||
               func_id == BPF_FUNC_find_vma ||
               func_id == BPF_FUNC_loop ||
               func_id == BPF_FUNC_user_ringbuf_drain;
@@ -559,6 +563,18 @@ static bool is_async_callback_calling_function(enum bpf_func_id func_id)
        return func_id == BPF_FUNC_timer_set_callback;
 }
 
+static bool is_callback_calling_function(enum bpf_func_id func_id)
+{
+       return is_sync_callback_calling_function(func_id) ||
+              is_async_callback_calling_function(func_id);
+}
+
+static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
+{
+       return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
+              (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
+}
+
 static bool is_storage_get_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_sk_storage_get ||
@@ -1803,6 +1819,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
        dst_state->first_insn_idx = src->first_insn_idx;
        dst_state->last_insn_idx = src->last_insn_idx;
        dst_state->dfs_depth = src->dfs_depth;
+       dst_state->callback_unroll_depth = src->callback_unroll_depth;
        dst_state->used_as_loop_entry = src->used_as_loop_entry;
        for (i = 0; i <= src->curframe; i++) {
                dst = dst_state->frame[i];
@@ -3434,13 +3451,11 @@ static void mark_insn_zext(struct bpf_verifier_env *env,
        reg->subreg_def = DEF_NOT_SUBREG;
 }
 
-static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
-                        enum reg_arg_type t)
+static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
+                          enum reg_arg_type t)
 {
-       struct bpf_verifier_state *vstate = env->cur_state;
-       struct bpf_func_state *state = vstate->frame[vstate->curframe];
        struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
-       struct bpf_reg_state *reg, *regs = state->regs;
+       struct bpf_reg_state *reg;
        bool rw64;
 
        if (regno >= MAX_BPF_REG) {
@@ -3481,6 +3496,15 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
        return 0;
 }
 
+static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
+                        enum reg_arg_type t)
+{
+       struct bpf_verifier_state *vstate = env->cur_state;
+       struct bpf_func_state *state = vstate->frame[vstate->curframe];
+
+       return __check_reg_arg(env, state->regs, regno, t);
+}
+
 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
 {
        env->insn_aux_data[idx].jmp_point = true;
@@ -3516,12 +3540,29 @@ static int push_jmp_history(struct bpf_verifier_env *env,
 
 /* Backtrack one insn at a time. If idx is not at the top of recorded
  * history then previous instruction came from straight line execution.
+ * Return -ENOENT if we exhausted all instructions within given state.
+ *
+ * It's legal to have a bit of a looping with the same starting and ending
+ * insn index within the same state, e.g.: 3->4->5->3, so just because current
+ * instruction index is the same as state's first_idx doesn't mean we are
+ * done. If there is still some jump history left, we should keep going. We
+ * need to take into account that we might have a jump history between given
+ * state's parent and itself, due to checkpointing. In this case, we'll have
+ * history entry recording a jump from last instruction of parent state and
+ * first instruction of given state.
  */
 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
                             u32 *history)
 {
        u32 cnt = *history;
 
+       if (i == st->first_insn_idx) {
+               if (cnt == 0)
+                       return -ENOENT;
+               if (cnt == 1 && st->jmp_history[0].idx == i)
+                       return -ENOENT;
+       }
+
        if (cnt && st->jmp_history[cnt - 1].idx == i) {
                i = st->jmp_history[cnt - 1].prev_idx;
                (*history)--;
@@ -3702,6 +3743,8 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
        }
 }
 
+static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
+
 /* For given verifier state backtrack_insn() is called from the last insn to
  * the first insn. Its purpose is to compute a bitmask of registers and
  * stack slots that needs precision in the parent verifier state.
@@ -3877,16 +3920,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
                                        return -EFAULT;
                                return 0;
                        }
-               } else if ((bpf_helper_call(insn) &&
-                           is_callback_calling_function(insn->imm) &&
-                           !is_async_callback_calling_function(insn->imm)) ||
-                          (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) {
-                       /* callback-calling helper or kfunc call, which means
-                        * we are exiting from subprog, but unlike the subprog
-                        * call handling above, we shouldn't propagate
-                        * precision of r1-r5 (if any requested), as they are
-                        * not actually arguments passed directly to callback
-                        * subprogs
+               } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
+                       /* exit from callback subprog to callback-calling helper or
+                        * kfunc call. Use idx/subseq_idx check to discern it from
+                        * straight line code backtracking.
+                        * Unlike the subprog call handling above, we shouldn't
+                        * propagate precision of r1-r5 (if any requested), as they are
+                        * not actually arguments passed directly to callback subprogs
                         */
                        if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
                                verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
@@ -3921,10 +3961,18 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
                } else if (opcode == BPF_EXIT) {
                        bool r0_precise;
 
+                       /* Backtracking to a nested function call, 'idx' is a part of
+                        * the inner frame 'subseq_idx' is a part of the outer frame.
+                        * In case of a regular function call, instructions giving
+                        * precision to registers R1-R5 should have been found already.
+                        * In case of a callback, it is ok to have R1-R5 marked for
+                        * backtracking, as these registers are set by the function
+                        * invoking callback.
+                        */
+                       if (subseq_idx >= 0 && calls_callback(env, subseq_idx))
+                               for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+                                       bt_clear_reg(bt, i);
                        if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
-                               /* if backtracing was looking for registers R1-R5
-                                * they should have been found already.
-                                */
                                verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
                                WARN_ONCE(1, "verifier backtracking bug");
                                return -EFAULT;
@@ -4401,10 +4449,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
                                 * Nothing to be tracked further in the parent state.
                                 */
                                return 0;
-                       if (i == first_idx)
-                               break;
                        subseq_idx = i;
                        i = get_prev_insn_idx(st, i, &history);
+                       if (i == -ENOENT)
+                               break;
                        if (i >= env->prog->len) {
                                /* This can happen if backtracking reached insn 0
                                 * and there are still reg_mask or stack_mask
@@ -9328,7 +9376,7 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
        /* after the call registers r0 - r5 were scratched */
        for (i = 0; i < CALLER_SAVED_REGS; i++) {
                mark_reg_not_init(env, regs, caller_saved[i]);
-               check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+               __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
        }
 }
 
@@ -9341,11 +9389,10 @@ static int set_callee_state(struct bpf_verifier_env *env,
                            struct bpf_func_state *caller,
                            struct bpf_func_state *callee, int insn_idx);
 
-static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
-                            int *insn_idx, int subprog,
-                            set_callee_state_fn set_callee_state_cb)
+static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
+                           set_callee_state_fn set_callee_state_cb,
+                           struct bpf_verifier_state *state)
 {
-       struct bpf_verifier_state *state = env->cur_state;
        struct bpf_func_state *caller, *callee;
        int err;
 
@@ -9355,54 +9402,72 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                return -E2BIG;
        }
 
-       caller = state->frame[state->curframe];
        if (state->frame[state->curframe + 1]) {
                verbose(env, "verifier bug. Frame %d already allocated\n",
                        state->curframe + 1);
                return -EFAULT;
        }
 
+       caller = state->frame[state->curframe];
+       callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+       if (!callee)
+               return -ENOMEM;
+       state->frame[state->curframe + 1] = callee;
+
+       /* callee cannot access r0, r6 - r9 for reading and has to write
+        * into its own stack before reading from it.
+        * callee can read/write into caller's stack
+        */
+       init_func_state(env, callee,
+                       /* remember the callsite, it will be used by bpf_exit */
+                       callsite,
+                       state->curframe + 1 /* frameno within this callchain */,
+                       subprog /* subprog number within this prog */);
+       /* Transfer references to the callee */
+       err = copy_reference_state(callee, caller);
+       err = err ?: set_callee_state_cb(env, caller, callee, callsite);
+       if (err)
+               goto err_out;
+
+       /* only increment it after check_reg_arg() finished */
+       state->curframe++;
+
+       return 0;
+
+err_out:
+       free_func_state(callee);
+       state->frame[state->curframe + 1] = NULL;
+       return err;
+}
+
+static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                             int insn_idx, int subprog,
+                             set_callee_state_fn set_callee_state_cb)
+{
+       struct bpf_verifier_state *state = env->cur_state, *callback_state;
+       struct bpf_func_state *caller, *callee;
+       int err;
+
+       caller = state->frame[state->curframe];
        err = btf_check_subprog_call(env, subprog, caller->regs);
        if (err == -EFAULT)
                return err;
-       if (subprog_is_global(env, subprog)) {
-               if (err) {
-                       verbose(env, "Caller passes invalid args into func#%d\n",
-                               subprog);
-                       return err;
-               } else {
-                       if (env->log.level & BPF_LOG_LEVEL)
-                               verbose(env,
-                                       "Func#%d is global and valid. Skipping.\n",
-                                       subprog);
-                       clear_caller_saved_regs(env, caller->regs);
-
-                       /* All global functions return a 64-bit SCALAR_VALUE */
-                       mark_reg_unknown(env, caller->regs, BPF_REG_0);
-                       caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
-
-                       /* continue with next insn after call */
-                       return 0;
-               }
-       }
 
        /* set_callee_state is used for direct subprog calls, but we are
         * interested in validating only BPF helpers that can call subprogs as
         * callbacks
         */
-       if (set_callee_state_cb != set_callee_state) {
-               env->subprog_info[subprog].is_cb = true;
-               if (bpf_pseudo_kfunc_call(insn) &&
-                   !is_callback_calling_kfunc(insn->imm)) {
-                       verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
-                               func_id_name(insn->imm), insn->imm);
-                       return -EFAULT;
-               } else if (!bpf_pseudo_kfunc_call(insn) &&
-                          !is_callback_calling_function(insn->imm)) { /* helper */
-                       verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
-                               func_id_name(insn->imm), insn->imm);
-                       return -EFAULT;
-               }
+       env->subprog_info[subprog].is_cb = true;
+       if (bpf_pseudo_kfunc_call(insn) &&
+           !is_sync_callback_calling_kfunc(insn->imm)) {
+               verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
+                       func_id_name(insn->imm), insn->imm);
+               return -EFAULT;
+       } else if (!bpf_pseudo_kfunc_call(insn) &&
+                  !is_callback_calling_function(insn->imm)) { /* helper */
+               verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
+                       func_id_name(insn->imm), insn->imm);
+               return -EFAULT;
        }
 
        if (insn->code == (BPF_JMP | BPF_CALL) &&
@@ -9413,53 +9478,83 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                /* there is no real recursion here. timer callbacks are async */
                env->subprog_info[subprog].is_async_cb = true;
                async_cb = push_async_cb(env, env->subprog_info[subprog].start,
-                                        *insn_idx, subprog);
+                                        insn_idx, subprog);
                if (!async_cb)
                        return -EFAULT;
                callee = async_cb->frame[0];
                callee->async_entry_cnt = caller->async_entry_cnt + 1;
 
                /* Convert bpf_timer_set_callback() args into timer callback args */
-               err = set_callee_state_cb(env, caller, callee, *insn_idx);
+               err = set_callee_state_cb(env, caller, callee, insn_idx);
                if (err)
                        return err;
 
+               return 0;
+       }
+
+       /* for callback functions enqueue entry to callback and
+        * proceed with next instruction within current frame.
+        */
+       callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
+       if (!callback_state)
+               return -ENOMEM;
+
+       err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
+                              callback_state);
+       if (err)
+               return err;
+
+       callback_state->callback_unroll_depth++;
+       callback_state->frame[callback_state->curframe - 1]->callback_depth++;
+       caller->callback_depth = 0;
+       return 0;
+}
+
+static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                          int *insn_idx)
+{
+       struct bpf_verifier_state *state = env->cur_state;
+       struct bpf_func_state *caller;
+       int err, subprog, target_insn;
+
+       target_insn = *insn_idx + insn->imm + 1;
+       subprog = find_subprog(env, target_insn);
+       if (subprog < 0) {
+               verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
+               return -EFAULT;
+       }
+
+       caller = state->frame[state->curframe];
+       err = btf_check_subprog_call(env, subprog, caller->regs);
+       if (err == -EFAULT)
+               return err;
+       if (subprog_is_global(env, subprog)) {
+               if (err) {
+                       verbose(env, "Caller passes invalid args into func#%d\n", subprog);
+                       return err;
+               }
+
+               if (env->log.level & BPF_LOG_LEVEL)
+                       verbose(env, "Func#%d is global and valid. Skipping.\n", subprog);
                clear_caller_saved_regs(env, caller->regs);
+
+               /* All global functions return a 64-bit SCALAR_VALUE */
                mark_reg_unknown(env, caller->regs, BPF_REG_0);
                caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+
                /* continue with next insn after call */
                return 0;
        }
 
-       callee = kzalloc(sizeof(*callee), GFP_KERNEL);
-       if (!callee)
-               return -ENOMEM;
-       state->frame[state->curframe + 1] = callee;
-
-       /* callee cannot access r0, r6 - r9 for reading and has to write
-        * into its own stack before reading from it.
-        * callee can read/write into caller's stack
+       /* for regular function entry setup new frame and continue
+        * from that frame.
         */
-       init_func_state(env, callee,
-                       /* remember the callsite, it will be used by bpf_exit */
-                       *insn_idx /* callsite */,
-                       state->curframe + 1 /* frameno within this callchain */,
-                       subprog /* subprog number within this prog */);
-
-       /* Transfer references to the callee */
-       err = copy_reference_state(callee, caller);
-       if (err)
-               goto err_out;
-
-       err = set_callee_state_cb(env, caller, callee, *insn_idx);
+       err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
        if (err)
-               goto err_out;
+               return err;
 
        clear_caller_saved_regs(env, caller->regs);
 
-       /* only increment it after check_reg_arg() finished */
-       state->curframe++;
-
        /* and go analyze first insn of the callee */
        *insn_idx = env->subprog_info[subprog].start - 1;
 
@@ -9467,14 +9562,10 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                verbose(env, "caller:\n");
                print_verifier_state(env, caller, true);
                verbose(env, "callee:\n");
-               print_verifier_state(env, callee, true);
+               print_verifier_state(env, state->frame[state->curframe], true);
        }
-       return 0;
 
-err_out:
-       free_func_state(callee);
-       state->frame[state->curframe + 1] = NULL;
-       return err;
+       return 0;
 }
 
 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
@@ -9518,22 +9609,6 @@ static int set_callee_state(struct bpf_verifier_env *env,
        return 0;
 }
 
-static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
-                          int *insn_idx)
-{
-       int subprog, target_insn;
-
-       target_insn = *insn_idx + insn->imm + 1;
-       subprog = find_subprog(env, target_insn);
-       if (subprog < 0) {
-               verbose(env, "verifier bug. No program starts at insn %d\n",
-                       target_insn);
-               return -EFAULT;
-       }
-
-       return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
-}
-
 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
                                       struct bpf_func_state *caller,
                                       struct bpf_func_state *callee,
@@ -9726,9 +9801,10 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
 
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
-       struct bpf_verifier_state *state = env->cur_state;
+       struct bpf_verifier_state *state = env->cur_state, *prev_st;
        struct bpf_func_state *caller, *callee;
        struct bpf_reg_state *r0;
+       bool in_callback_fn;
        int err;
 
        callee = state->frame[state->curframe];
@@ -9757,6 +9833,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
                        verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
                        return -EINVAL;
                }
+               if (!calls_callback(env, callee->callsite)) {
+                       verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n",
+                               *insn_idx, callee->callsite);
+                       return -EFAULT;
+               }
        } else {
                /* return to the caller whatever r0 had in the callee */
                caller->regs[BPF_REG_0] = *r0;
@@ -9774,7 +9855,16 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
                        return err;
        }
 
-       *insn_idx = callee->callsite + 1;
+       /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
+        * there function call logic would reschedule callback visit. If iteration
+        * converges is_state_visited() would prune that visit eventually.
+        */
+       in_callback_fn = callee->in_callback_fn;
+       if (in_callback_fn)
+               *insn_idx = callee->callsite;
+       else
+               *insn_idx = callee->callsite + 1;
+
        if (env->log.level & BPF_LOG_LEVEL) {
                verbose(env, "returning from callee:\n");
                print_verifier_state(env, callee, true);
@@ -9785,6 +9875,24 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
         * bpf_throw, this will be done by copy_verifier_state for extra frames. */
        free_func_state(callee);
        state->frame[state->curframe--] = NULL;
+
+       /* for callbacks widen imprecise scalars to make programs like below verify:
+        *
+        *   struct ctx { int i; }
+        *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
+        *   ...
+        *   struct ctx = { .i = 0; }
+        *   bpf_loop(100, cb, &ctx, 0);
+        *
+        * This is similar to what is done in process_iter_next_call() for open
+        * coded iterators.
+        */
+       prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
+       if (prev_st) {
+               err = widen_imprecise_scalars(env, prev_st, state);
+               if (err)
+                       return err;
+       }
        return 0;
 }
 
@@ -10187,24 +10295,37 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                }
                break;
        case BPF_FUNC_for_each_map_elem:
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_map_elem_callback_state);
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_map_elem_callback_state);
                break;
        case BPF_FUNC_timer_set_callback:
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_timer_callback_state);
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_timer_callback_state);
                break;
        case BPF_FUNC_find_vma:
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_find_vma_callback_state);
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_find_vma_callback_state);
                break;
        case BPF_FUNC_snprintf:
                err = check_bpf_snprintf_call(env, regs);
                break;
        case BPF_FUNC_loop:
                update_loop_inline_state(env, meta.subprogno);
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_loop_callback_state);
+               /* Verifier relies on R1 value to determine if bpf_loop() iteration
+                * is finished, thus mark it precise.
+                */
+               err = mark_chain_precision(env, BPF_REG_1);
+               if (err)
+                       return err;
+               if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
+                       err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                                set_loop_callback_state);
+               } else {
+                       cur_func(env)->callback_depth = 0;
+                       if (env->log.level & BPF_LOG_LEVEL2)
+                               verbose(env, "frame%d bpf_loop iteration limit reached\n",
+                                       env->cur_state->curframe);
+               }
                break;
        case BPF_FUNC_dynptr_from_mem:
                if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
@@ -10300,8 +10421,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                break;
        }
        case BPF_FUNC_user_ringbuf_drain:
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_user_ringbuf_callback_state);
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_user_ringbuf_callback_state);
                break;
        }
 
@@ -11189,7 +11310,7 @@ static bool is_bpf_graph_api_kfunc(u32 btf_id)
               btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
 }
 
-static bool is_callback_calling_kfunc(u32 btf_id)
+static bool is_sync_callback_calling_kfunc(u32 btf_id)
 {
        return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
 }
@@ -11941,6 +12062,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                return -EACCES;
        }
 
+       /* Check the arguments */
+       err = check_kfunc_args(env, &meta, insn_idx);
+       if (err < 0)
+               return err;
+
+       if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_rbtree_add_callback_state);
+               if (err) {
+                       verbose(env, "kfunc %s#%d failed callback verification\n",
+                               func_name, meta.func_id);
+                       return err;
+               }
+       }
+
        rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
        rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
 
@@ -11976,10 +12112,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                return -EINVAL;
        }
 
-       /* Check the arguments */
-       err = check_kfunc_args(env, &meta, insn_idx);
-       if (err < 0)
-               return err;
        /* In case of release function, we get register number of refcounted
         * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
         */
@@ -12013,16 +12145,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                }
        }
 
-       if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_rbtree_add_callback_state);
-               if (err) {
-                       verbose(env, "kfunc %s#%d failed callback verification\n",
-                               func_name, meta.func_id);
-                       return err;
-               }
-       }
-
        if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
                if (!bpf_jit_supports_exceptions()) {
                        verbose(env, "JIT does not support calling kfunc %s#%d\n",
@@ -12074,8 +12196,19 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                                if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
                                        return -ENOMEM;
 
-                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
-                                       return -ENOMEM;
+                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+                                       if (!bpf_global_percpu_ma_set) {
+                                               mutex_lock(&bpf_percpu_ma_lock);
+                                               if (!bpf_global_percpu_ma_set) {
+                                                       err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
+                                                       if (!err)
+                                                               bpf_global_percpu_ma_set = true;
+                                               }
+                                               mutex_unlock(&bpf_percpu_ma_lock);
+                                               if (err)
+                                                       return err;
+                                       }
+                               }
 
                                if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
                                        verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
@@ -15375,6 +15508,15 @@ static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
        return env->insn_aux_data[insn_idx].force_checkpoint;
 }
 
+static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
+{
+       env->insn_aux_data[idx].calls_callback = true;
+}
+
+static bool calls_callback(struct bpf_verifier_env *env, int insn_idx)
+{
+       return env->insn_aux_data[insn_idx].calls_callback;
+}
 
 enum {
        DONE_EXPLORING = 0,
@@ -15386,8 +15528,7 @@ enum {
  * w - next instruction
  * e - edge
  */
-static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
-                    bool loop_ok)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
 {
        int *insn_stack = env->cfg.insn_stack;
        int *insn_state = env->cfg.insn_state;
@@ -15419,7 +15560,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
                insn_stack[env->cfg.cur_stack++] = w;
                return KEEP_EXPLORING;
        } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
-               if (loop_ok && env->bpf_capable)
+               if (env->bpf_capable)
                        return DONE_EXPLORING;
                verbose_linfo(env, t, "%d: ", t);
                verbose_linfo(env, w, "%d: ", w);
@@ -15439,24 +15580,20 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
                                struct bpf_verifier_env *env,
                                bool visit_callee)
 {
-       int ret;
+       int ret, insn_sz;
 
-       ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
+       insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
+       ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
        if (ret)
                return ret;
 
-       mark_prune_point(env, t + 1);
+       mark_prune_point(env, t + insn_sz);
        /* when we exit from subprog, we need to record non-linear history */
-       mark_jmp_point(env, t + 1);
+       mark_jmp_point(env, t + insn_sz);
 
        if (visit_callee) {
                mark_prune_point(env, t);
-               ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
-                               /* It's ok to allow recursion from CFG point of
-                                * view. __check_func_call() will do the actual
-                                * check.
-                                */
-                               bpf_pseudo_func(insns + t));
+               ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
        }
        return ret;
 }
@@ -15469,15 +15606,17 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
 static int visit_insn(int t, struct bpf_verifier_env *env)
 {
        struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
-       int ret, off;
+       int ret, off, insn_sz;
 
        if (bpf_pseudo_func(insn))
                return visit_func_call_insn(t, insns, env, true);
 
        /* All non-branch instructions have a single fall-through edge. */
        if (BPF_CLASS(insn->code) != BPF_JMP &&
-           BPF_CLASS(insn->code) != BPF_JMP32)
-               return push_insn(t, t + 1, FALLTHROUGH, env, false);
+           BPF_CLASS(insn->code) != BPF_JMP32) {
+               insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
+               return push_insn(t, t + insn_sz, FALLTHROUGH, env);
+       }
 
        switch (BPF_OP(insn->code)) {
        case BPF_EXIT:
@@ -15491,6 +15630,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
                         * async state will be pushed for further exploration.
                         */
                        mark_prune_point(env, t);
+               /* For functions that invoke callbacks it is not known how many times
+                * callback would be called. Verifier models callback calling functions
+                * by repeatedly visiting callback bodies and returning to origin call
+                * instruction.
+                * In order to stop such iteration verifier needs to identify when a
+                * state identical some state from a previous iteration is reached.
+                * Check below forces creation of checkpoint before callback calling
+                * instruction to allow search for such identical states.
+                */
+               if (is_sync_callback_calling_insn(insn)) {
+                       mark_calls_callback(env, t);
+                       mark_force_checkpoint(env, t);
+                       mark_prune_point(env, t);
+                       mark_jmp_point(env, t);
+               }
                if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
                        struct bpf_kfunc_call_arg_meta meta;
 
@@ -15523,8 +15677,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
                        off = insn->imm;
 
                /* unconditional jump with single edge */
-               ret = push_insn(t, t + off + 1, FALLTHROUGH, env,
-                               true);
+               ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
                if (ret)
                        return ret;
 
@@ -15537,11 +15690,11 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
                /* conditional jump with two edges */
                mark_prune_point(env, t);
 
-               ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
+               ret = push_insn(t, t + 1, FALLTHROUGH, env);
                if (ret)
                        return ret;
 
-               return push_insn(t, t + insn->off + 1, BRANCH, env, true);
+               return push_insn(t, t + insn->off + 1, BRANCH, env);
        }
 }
 
@@ -15607,11 +15760,21 @@ walk_cfg:
        }
 
        for (i = 0; i < insn_cnt; i++) {
+               struct bpf_insn *insn = &env->prog->insnsi[i];
+
                if (insn_state[i] != EXPLORED) {
                        verbose(env, "unreachable insn %d\n", i);
                        ret = -EINVAL;
                        goto err_free;
                }
+               if (bpf_is_ldimm64(insn)) {
+                       if (insn_state[i + 1] != 0) {
+                               verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
+                               ret = -EINVAL;
+                               goto err_free;
+                       }
+                       i++; /* skip second half of ldimm64 */
+               }
        }
        ret = 0; /* cfg looks good */
 
@@ -16951,10 +17114,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
                                }
                                goto skip_inf_loop_check;
                        }
+                       if (calls_callback(env, insn_idx)) {
+                               if (states_equal(env, &sl->state, cur, true))
+                                       goto hit;
+                               goto skip_inf_loop_check;
+                       }
                        /* attempt to detect infinite loop to avoid unnecessary doomed work */
                        if (states_maybe_looping(&sl->state, cur) &&
                            states_equal(env, &sl->state, cur, false) &&
-                           !iter_active_depths_differ(&sl->state, cur)) {
+                           !iter_active_depths_differ(&sl->state, cur) &&
+                           sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
                                verbose_linfo(env, insn_idx, "; ");
                                verbose(env, "infinite loop detected at insn %d\n", insn_idx);
                                verbose(env, "cur state:");
index 1d5b9de3b1b9d01791b1222bf2fcbb4e46c852ee..4b9ff41ca603a3a9952d079b3180b43474b9f892 100644 (file)
@@ -3885,14 +3885,6 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
        return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
 }
 
-static int cgroup_pressure_open(struct kernfs_open_file *of)
-{
-       if (of->file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
-               return -EPERM;
-
-       return 0;
-}
-
 static void cgroup_pressure_release(struct kernfs_open_file *of)
 {
        struct cgroup_file_ctx *ctx = of->priv;
@@ -5299,7 +5291,6 @@ static struct cftype cgroup_psi_files[] = {
        {
                .name = "io.pressure",
                .file_offset = offsetof(struct cgroup, psi_files[PSI_IO]),
-               .open = cgroup_pressure_open,
                .seq_show = cgroup_io_pressure_show,
                .write = cgroup_io_pressure_write,
                .poll = cgroup_pressure_poll,
@@ -5308,7 +5299,6 @@ static struct cftype cgroup_psi_files[] = {
        {
                .name = "memory.pressure",
                .file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]),
-               .open = cgroup_pressure_open,
                .seq_show = cgroup_memory_pressure_show,
                .write = cgroup_memory_pressure_write,
                .poll = cgroup_pressure_poll,
@@ -5317,7 +5307,6 @@ static struct cftype cgroup_psi_files[] = {
        {
                .name = "cpu.pressure",
                .file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]),
-               .open = cgroup_pressure_open,
                .seq_show = cgroup_cpu_pressure_show,
                .write = cgroup_cpu_pressure_write,
                .poll = cgroup_pressure_poll,
@@ -5327,7 +5316,6 @@ static struct cftype cgroup_psi_files[] = {
        {
                .name = "irq.pressure",
                .file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]),
-               .open = cgroup_pressure_open,
                .seq_show = cgroup_irq_pressure_show,
                .write = cgroup_irq_pressure_write,
                .poll = cgroup_pressure_poll,
index 9e4c6780adde8f71c9ad14ba3c312e865ac3485d..a86972a91991580b2edcf205b4b6dfd96deeec69 100644 (file)
@@ -2113,7 +2113,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
        [CPUHP_HRTIMERS_PREPARE] = {
                .name                   = "hrtimers:prepare",
                .startup.single         = hrtimers_prepare_cpu,
-               .teardown.single        = hrtimers_dead_cpu,
+               .teardown.single        = NULL,
        },
        [CPUHP_SMPCFD_PREPARE] = {
                .name                   = "smpcfd:prepare",
@@ -2205,6 +2205,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
                .startup.single         = NULL,
                .teardown.single        = smpcfd_dying_cpu,
        },
+       [CPUHP_AP_HRTIMERS_DYING] = {
+               .name                   = "hrtimers:dying",
+               .startup.single         = NULL,
+               .teardown.single        = hrtimers_cpu_dying,
+       },
+
        /* Entry state on starting. Interrupts enabled from here on. Transient
         * state for synchronsization */
        [CPUHP_AP_ONLINE] = {
index 683dc086ef10a53123c8aa175b436c556a51a410..b704d83a28b29bace1741616eeff55a1cf03f413 100644 (file)
@@ -4828,6 +4828,11 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
        void *task_ctx_data = NULL;
 
        if (!ctx->task) {
+               /*
+                * perf_pmu_migrate_context() / __perf_pmu_install_event()
+                * relies on the fact that find_get_pmu_context() cannot fail
+                * for CPU contexts.
+                */
                struct perf_cpu_pmu_context *cpc;
 
                cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
@@ -12889,6 +12894,9 @@ static void __perf_pmu_install_event(struct pmu *pmu,
                                     int cpu, struct perf_event *event)
 {
        struct perf_event_pmu_context *epc;
+       struct perf_event_context *old_ctx = event->ctx;
+
+       get_ctx(ctx); /* normally find_get_context() */
 
        event->cpu = cpu;
        epc = find_get_pmu_context(pmu, ctx, event);
@@ -12897,6 +12905,11 @@ static void __perf_pmu_install_event(struct pmu *pmu,
        if (event->state >= PERF_EVENT_STATE_OFF)
                event->state = PERF_EVENT_STATE_INACTIVE;
        perf_install_in_context(ctx, event, cpu);
+
+       /*
+        * Now that event->ctx is updated and visible, put the old ctx.
+        */
+       put_ctx(old_ctx);
 }
 
 static void __perf_pmu_install(struct perf_event_context *ctx,
@@ -12935,6 +12948,10 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
        struct perf_event_context *src_ctx, *dst_ctx;
        LIST_HEAD(events);
 
+       /*
+        * Since per-cpu context is persistent, no need to grab an extra
+        * reference.
+        */
        src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx;
        dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx;
 
index 52695c59d04114350d29f1bf0403c19b873663d6..dad981a865b841c954deed1934674f514832ff7e 100644 (file)
@@ -700,7 +700,8 @@ retry:
        owner = uval & FUTEX_TID_MASK;
 
        if (pending_op && !pi && !owner) {
-               futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+               futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+                          FUTEX_BITSET_MATCH_ANY);
                return 0;
        }
 
@@ -752,8 +753,10 @@ retry:
         * Wake robust non-PI futexes here. The wakeup of
         * PI futexes happens in exit_pi_state():
         */
-       if (!pi && (uval & FUTEX_WAITERS))
-               futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+       if (!pi && (uval & FUTEX_WAITERS)) {
+               futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+                          FUTEX_BITSET_MATCH_ANY);
+       }
 
        return 0;
 }
index e85b5ad3e206987c1a4d82e81f0386627b01b81e..151bd3de59363a6b67a3a51274fe568bae4b30dd 100644 (file)
@@ -3497,7 +3497,8 @@ static int alloc_chain_hlocks(int req)
                size = chain_block_size(curr);
                if (likely(size >= req)) {
                        del_chain_block(0, size, chain_block_next(curr));
-                       add_chain_block(curr + req, size - req);
+                       if (size > req)
+                               add_chain_block(curr + req, size - req);
                        return curr;
                }
        }
index 2048138ce54b574a3ba56b9f6bf7b1cefac1fd32..d7a3c63a2171a8569abc8fe7c3997dc1d22e001e 100644 (file)
@@ -3666,41 +3666,140 @@ static inline void
 dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
 #endif
 
+static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
+                          unsigned long weight)
+{
+       unsigned long old_weight = se->load.weight;
+       u64 avruntime = avg_vruntime(cfs_rq);
+       s64 vlag, vslice;
+
+       /*
+        * VRUNTIME
+        * ========
+        *
+        * COROLLARY #1: The virtual runtime of the entity needs to be
+        * adjusted if re-weight at !0-lag point.
+        *
+        * Proof: For contradiction assume this is not true, so we can
+        * re-weight without changing vruntime at !0-lag point.
+        *
+        *             Weight   VRuntime   Avg-VRuntime
+        *     before    w          v            V
+        *      after    w'         v'           V'
+        *
+        * Since lag needs to be preserved through re-weight:
+        *
+        *      lag = (V - v)*w = (V'- v')*w', where v = v'
+        *      ==>     V' = (V - v)*w/w' + v           (1)
+        *
+        * Let W be the total weight of the entities before reweight,
+        * since V' is the new weighted average of entities:
+        *
+        *      V' = (WV + w'v - wv) / (W + w' - w)     (2)
+        *
+        * by using (1) & (2) we obtain:
+        *
+        *      (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
+        *      ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
+        *      ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
+        *      ==>     (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
+        *
+        * Since we are doing at !0-lag point which means V != v, we
+        * can simplify (3):
+        *
+        *      ==>     W / (W + w' - w) = w / w'
+        *      ==>     Ww' = Ww + ww' - ww
+        *      ==>     W * (w' - w) = w * (w' - w)
+        *      ==>     W = w   (re-weight indicates w' != w)
+        *
+        * So the cfs_rq contains only one entity, hence vruntime of
+        * the entity @v should always equal to the cfs_rq's weighted
+        * average vruntime @V, which means we will always re-weight
+        * at 0-lag point, thus breach assumption. Proof completed.
+        *
+        *
+        * COROLLARY #2: Re-weight does NOT affect weighted average
+        * vruntime of all the entities.
+        *
+        * Proof: According to corollary #1, Eq. (1) should be:
+        *
+        *      (V - v)*w = (V' - v')*w'
+        *      ==>    v' = V' - (V - v)*w/w'           (4)
+        *
+        * According to the weighted average formula, we have:
+        *
+        *      V' = (WV - wv + w'v') / (W - w + w')
+        *         = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
+        *         = (WV - wv + w'V' - Vw + wv) / (W - w + w')
+        *         = (WV + w'V' - Vw) / (W - w + w')
+        *
+        *      ==>  V'*(W - w + w') = WV + w'V' - Vw
+        *      ==>     V' * (W - w) = (W - w) * V      (5)
+        *
+        * If the entity is the only one in the cfs_rq, then reweight
+        * always occurs at 0-lag point, so V won't change. Or else
+        * there are other entities, hence W != w, then Eq. (5) turns
+        * into V' = V. So V won't change in either case, proof done.
+        *
+        *
+        * So according to corollary #1 & #2, the effect of re-weight
+        * on vruntime should be:
+        *
+        *      v' = V' - (V - v) * w / w'              (4)
+        *         = V  - (V - v) * w / w'
+        *         = V  - vl * w / w'
+        *         = V  - vl'
+        */
+       if (avruntime != se->vruntime) {
+               vlag = (s64)(avruntime - se->vruntime);
+               vlag = div_s64(vlag * old_weight, weight);
+               se->vruntime = avruntime - vlag;
+       }
+
+       /*
+        * DEADLINE
+        * ========
+        *
+        * When the weight changes, the virtual time slope changes and
+        * we should adjust the relative virtual deadline accordingly.
+        *
+        *      d' = v' + (d - v)*w/w'
+        *         = V' - (V - v)*w/w' + (d - v)*w/w'
+        *         = V  - (V - v)*w/w' + (d - v)*w/w'
+        *         = V  + (d - V)*w/w'
+        */
+       vslice = (s64)(se->deadline - avruntime);
+       vslice = div_s64(vslice * old_weight, weight);
+       se->deadline = avruntime + vslice;
+}
+
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
                            unsigned long weight)
 {
-       unsigned long old_weight = se->load.weight;
+       bool curr = cfs_rq->curr == se;
 
        if (se->on_rq) {
                /* commit outstanding execution time */
-               if (cfs_rq->curr == se)
+               if (curr)
                        update_curr(cfs_rq);
                else
-                       avg_vruntime_sub(cfs_rq, se);
+                       __dequeue_entity(cfs_rq, se);
                update_load_sub(&cfs_rq->load, se->load.weight);
        }
        dequeue_load_avg(cfs_rq, se);
 
-       update_load_set(&se->load, weight);
-
        if (!se->on_rq) {
                /*
                 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
                 * we need to scale se->vlag when w_i changes.
                 */
-               se->vlag = div_s64(se->vlag * old_weight, weight);
+               se->vlag = div_s64(se->vlag * se->load.weight, weight);
        } else {
-               s64 deadline = se->deadline - se->vruntime;
-               /*
-                * When the weight changes, the virtual time slope changes and
-                * we should adjust the relative virtual deadline accordingly.
-                */
-               deadline = div_s64(deadline * old_weight, weight);
-               se->deadline = se->vruntime + deadline;
-               if (se != cfs_rq->curr)
-                       min_deadline_cb_propagate(&se->run_node, NULL);
+               reweight_eevdf(cfs_rq, se, weight);
        }
 
+       update_load_set(&se->load, weight);
+
 #ifdef CONFIG_SMP
        do {
                u32 divider = get_pelt_divider(&se->avg);
@@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
        enqueue_load_avg(cfs_rq, se);
        if (se->on_rq) {
                update_load_add(&cfs_rq->load, se->load.weight);
-               if (cfs_rq->curr != se)
-                       avg_vruntime_add(cfs_rq, se);
+               if (!curr) {
+                       /*
+                        * The entity's vruntime has been adjusted, so let's check
+                        * whether the rq-wide min_vruntime needs updated too. Since
+                        * the calculations above require stable min_vruntime rather
+                        * than up-to-date one, we do the update at the end of the
+                        * reweight process.
+                        */
+                       __enqueue_entity(cfs_rq, se);
+                       update_min_vruntime(cfs_rq);
+               }
        }
 }
 
@@ -3857,14 +3965,11 @@ static void update_cfs_group(struct sched_entity *se)
 
 #ifndef CONFIG_SMP
        shares = READ_ONCE(gcfs_rq->tg->shares);
-
-       if (likely(se->load.weight == shares))
-               return;
 #else
-       shares   = calc_group_shares(gcfs_rq);
+       shares = calc_group_shares(gcfs_rq);
 #endif
-
-       reweight_entity(cfs_rq_of(se), se, shares);
+       if (unlikely(se->load.weight != shares))
+               reweight_entity(cfs_rq_of(se), se, shares);
 }
 
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -11079,12 +11184,16 @@ static int should_we_balance(struct lb_env *env)
                        continue;
                }
 
-               /* Are we the first idle CPU? */
+               /*
+                * Are we the first idle core in a non-SMT domain or higher,
+                * or the first idle CPU in a SMT domain?
+                */
                return cpu == env->dst_cpu;
        }
 
-       if (idle_smt == env->dst_cpu)
-               return true;
+       /* Are we the first idle CPU with busy siblings? */
+       if (idle_smt != -1)
+               return idle_smt == env->dst_cpu;
 
        /* Are we the first CPU of this group ? */
        return group_balance_cpu(sg) == env->dst_cpu;
index 420d9cb9cc8e203f50014bb2ec564f6598d9869c..e219fcfa112d863eeef58381d04fd4bab16a1e32 100644 (file)
@@ -2394,6 +2394,10 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
        if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
                return -EINVAL;
 
+       /* PARISC cannot allow mdwe as it needs writable stacks */
+       if (IS_ENABLED(CONFIG_PARISC))
+               return -EINVAL;
+
        current_bits = get_current_mdwe();
        if (current_bits && current_bits != bits)
                return -EPERM; /* Cannot unset the flags */
index 238262e4aba7e2887f2766cc17b61aa0eb546308..760793998cdd703a387c64a792a7b7f7dab552d5 100644 (file)
@@ -2219,29 +2219,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
        }
 }
 
-int hrtimers_dead_cpu(unsigned int scpu)
+int hrtimers_cpu_dying(unsigned int dying_cpu)
 {
        struct hrtimer_cpu_base *old_base, *new_base;
-       int i;
+       int i, ncpu = cpumask_first(cpu_active_mask);
 
-       BUG_ON(cpu_online(scpu));
-       tick_cancel_sched_timer(scpu);
+       tick_cancel_sched_timer(dying_cpu);
+
+       old_base = this_cpu_ptr(&hrtimer_bases);
+       new_base = &per_cpu(hrtimer_bases, ncpu);
 
-       /*
-        * this BH disable ensures that raise_softirq_irqoff() does
-        * not wakeup ksoftirqd (and acquire the pi-lock) while
-        * holding the cpu_base lock
-        */
-       local_bh_disable();
-       local_irq_disable();
-       old_base = &per_cpu(hrtimer_bases, scpu);
-       new_base = this_cpu_ptr(&hrtimer_bases);
        /*
         * The caller is globally serialized and nobody else
         * takes two locks at once, deadlock is not possible.
         */
-       raw_spin_lock(&new_base->lock);
-       raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+       raw_spin_lock(&old_base->lock);
+       raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                migrate_hrtimer_list(&old_base->clock_base[i],
@@ -2252,15 +2245,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
         * The migration might have changed the first expiring softirq
         * timer on this CPU. Update it.
         */
-       hrtimer_update_softirq_timer(new_base, false);
+       __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
+       /* Tell the other CPU to retrigger the next event */
+       smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
 
-       raw_spin_unlock(&old_base->lock);
        raw_spin_unlock(&new_base->lock);
+       raw_spin_unlock(&old_base->lock);
 
-       /* Check, if we got expired work to do */
-       __hrtimer_peek_ahead_timers();
-       local_irq_enable();
-       local_bh_enable();
        return 0;
 }
 
index f86c9eeafb35ad9da21ebddda8a182ea27970ff8..c16540552d61bc14121b034a9d6e302045ff0dc5 100644 (file)
@@ -36,7 +36,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
                        closure_debug_destroy(cl);
 
                        if (destructor)
-                               destructor(cl);
+                               destructor(&cl->work);
 
                        if (parent)
                                closure_put(parent);
@@ -108,8 +108,9 @@ struct closure_syncer {
        int                     done;
 };
 
-static void closure_sync_fn(struct closure *cl)
+static CLOSURE_CALLBACK(closure_sync_fn)
 {
+       struct closure *cl = container_of(ws, struct closure, work);
        struct closure_syncer *s = cl->s;
        struct task_struct *p;
 
index dd1b998552cd943748c76db5c31c2948f40834e2..4f9112b38f3ad67dc02c4dedb539f13b6e0830e1 100644 (file)
@@ -111,9 +111,6 @@ static const char *names_0[] = {
        E(ENOSPC),
        E(ENOSR),
        E(ENOSTR),
-#ifdef ENOSYM
-       E(ENOSYM),
-#endif
        E(ENOSYS),
        E(ENOTBLK),
        E(ENOTCONN),
@@ -144,9 +141,6 @@ static const char *names_0[] = {
 #endif
        E(EREMOTE),
        E(EREMOTEIO),
-#ifdef EREMOTERELEASE
-       E(EREMOTERELEASE),
-#endif
        E(ERESTART),
        E(ERFKILL),
        E(EROFS),
index de7d11cf4c6356deccc37f180fa992dbe4d4c7b0..8ff6824a100539a7894db06edf342c3c362099b8 100644 (file)
@@ -409,7 +409,7 @@ size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t byte
                void *kaddr = kmap_local_page(page);
                size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
 
-               n = iterate_and_advance(i, bytes, kaddr,
+               n = iterate_and_advance(i, n, kaddr + offset,
                                        copy_to_user_iter_nofault,
                                        memcpy_to_iter);
                kunmap_local(kaddr);
index 99d2a3a528e1ed31f118fb1ec799ce197f42fb1d..de2113a58fa034f4da20e4ee2b9734c3092b1017 100644 (file)
@@ -562,7 +562,7 @@ static void kunit_log_test(struct kunit *test)
        KUNIT_EXPECT_TRUE(test, test->log->append_newlines);
 
        full_log = string_stream_get_string(test->log);
-       kunit_add_action(test, (kunit_action_t *)kfree, full_log);
+       kunit_add_action(test, kfree_wrapper, full_log);
        KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
                                     strstr(full_log, "put this in log."));
        KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
index f2eb71f1a66c1d774b3ac38460d6cc71a59ff74e..7aceb07a1af9f64ab619cf7bb9a061e1b3276962 100644 (file)
@@ -338,6 +338,36 @@ void kunit_init_test(struct kunit *test, const char *name, struct string_stream
 }
 EXPORT_SYMBOL_GPL(kunit_init_test);
 
+/* Only warn when a test takes more than twice the threshold */
+#define KUNIT_SPEED_WARNING_MULTIPLIER 2
+
+/* Slow tests are defined as taking more than 1s */
+#define KUNIT_SPEED_SLOW_THRESHOLD_S   1
+
+#define KUNIT_SPEED_SLOW_WARNING_THRESHOLD_S   \
+       (KUNIT_SPEED_WARNING_MULTIPLIER * KUNIT_SPEED_SLOW_THRESHOLD_S)
+
+#define s_to_timespec64(s) ns_to_timespec64((s) * NSEC_PER_SEC)
+
+static void kunit_run_case_check_speed(struct kunit *test,
+                                      struct kunit_case *test_case,
+                                      struct timespec64 duration)
+{
+       struct timespec64 slow_thr =
+               s_to_timespec64(KUNIT_SPEED_SLOW_WARNING_THRESHOLD_S);
+       enum kunit_speed speed = test_case->attr.speed;
+
+       if (timespec64_compare(&duration, &slow_thr) < 0)
+               return;
+
+       if (speed == KUNIT_SPEED_VERY_SLOW || speed == KUNIT_SPEED_SLOW)
+               return;
+
+       kunit_warn(test,
+                  "Test should be marked slow (runtime: %lld.%09lds)",
+                  duration.tv_sec, duration.tv_nsec);
+}
+
 /*
  * Initializes and runs test case. Does not clean up or do post validations.
  */
@@ -345,6 +375,8 @@ static void kunit_run_case_internal(struct kunit *test,
                                    struct kunit_suite *suite,
                                    struct kunit_case *test_case)
 {
+       struct timespec64 start, end;
+
        if (suite->init) {
                int ret;
 
@@ -356,7 +388,13 @@ static void kunit_run_case_internal(struct kunit *test,
                }
        }
 
+       ktime_get_ts64(&start);
+
        test_case->run_case(test);
+
+       ktime_get_ts64(&end);
+
+       kunit_run_case_check_speed(test, test_case, timespec64_sub(end, start));
 }
 
 static void kunit_case_internal_cleanup(struct kunit *test)
@@ -670,6 +708,8 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_
                return 0;
        }
 
+       kunit_suite_counter = 1;
+
        static_branch_inc(&kunit_running);
 
        for (i = 0; i < num_suites; i++) {
@@ -696,8 +736,6 @@ void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites)
 
        for (i = 0; i < num_suites; i++)
                kunit_exit_suite(suites[i]);
-
-       kunit_suite_counter = 1;
 }
 EXPORT_SYMBOL_GPL(__kunit_test_suites_exit);
 
index a0d06095be83de601e292f3154235bfcde946dae..8dcb8ca39767c8dfbf63eaf5e82ce46c4b848381 100644 (file)
@@ -312,7 +312,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
 
 typedef struct {
     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
-    FSE_DTable dtable[1]; /* Dynamically sized */
+    FSE_DTable dtable[]; /* Dynamically sized */
 } FSE_DecompressWksp;
 
 
index 630077d95dc60721015ea4b195c85e1c73f484ce..6262d55904e744a4a41431127266971eb5ee3d8b 100644 (file)
@@ -924,7 +924,7 @@ static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
                matched = true;
                break;
        default:
-               break;
+               return false;
        }
 
        return matched == filter->matching;
index 45bd0fd4a8b1616d6336d27175fc33a372db41e7..be667236b8e6e30713a205a4987d50c3b48ab463 100644 (file)
@@ -162,6 +162,9 @@ damon_sysfs_scheme_regions_alloc(void)
        struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
                        GFP_KERNEL);
 
+       if (!regions)
+               return NULL;
+
        regions->kobj = (struct kobject){};
        INIT_LIST_HEAD(&regions->regions_list);
        regions->nr_regions = 0;
@@ -1823,6 +1826,8 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
                return 0;
 
        region = damon_sysfs_scheme_region_alloc(r);
+       if (!region)
+               return 0;
        list_add_tail(&region->list, &sysfs_regions->regions_list);
        sysfs_regions->nr_regions++;
        if (kobject_init_and_add(&region->kobj,
index e27846708b5a248b9632d662004a0d49a56844b7..7472404456aa812e302cbee3b7b9d0fc490699b7 100644 (file)
@@ -1172,7 +1172,7 @@ static int damon_sysfs_update_target(struct damon_target *target,
                struct damon_ctx *ctx,
                struct damon_sysfs_target *sys_target)
 {
-       int err;
+       int err = 0;
 
        if (damon_target_has_pid(ctx)) {
                err = damon_sysfs_update_target_pid(target, sys_target->pid);
@@ -1203,8 +1203,10 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
 
        damon_for_each_target_safe(t, next, ctx) {
                if (i < sysfs_targets->nr) {
-                       damon_sysfs_update_target(t, ctx,
+                       err = damon_sysfs_update_target(t, ctx,
                                        sysfs_targets->targets_arr[i]);
+                       if (err)
+                               return err;
                } else {
                        if (damon_target_has_pid(ctx))
                                put_pid(t->pid);
index 9710f43a89acd3ade2c8289b6e9352fcf11cf9e5..32eedf3afd45883a7c34920c4a097906b743621e 100644 (file)
@@ -3443,7 +3443,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
                 * handled in the specific fault path, and it'll prohibit the
                 * fault-around logic.
                 */
-               if (!pte_none(vmf->pte[count]))
+               if (!pte_none(ptep_get(&vmf->pte[count])))
                        goto skip;
 
                count++;
index f31f02472396e7671132e965a93664a62238fca5..4f542444a91f2a30df8381693fa1f28686a81534 100644 (file)
@@ -2769,13 +2769,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        int nr = folio_nr_pages(folio);
 
                        xas_split(&xas, folio, folio_order(folio));
-                       if (folio_test_swapbacked(folio)) {
-                               __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS,
-                                                       -nr);
-                       } else {
-                               __lruvec_stat_mod_folio(folio, NR_FILE_THPS,
-                                                       -nr);
-                               filemap_nr_thps_dec(mapping);
+                       if (folio_test_pmd_mappable(folio)) {
+                               if (folio_test_swapbacked(folio)) {
+                                       __lruvec_stat_mod_folio(folio,
+                                                       NR_SHMEM_THPS, -nr);
+                               } else {
+                                       __lruvec_stat_mod_folio(folio,
+                                                       NR_FILE_THPS, -nr);
+                                       filemap_nr_thps_dec(mapping);
+                               }
                        }
                }
 
index 7efcc68ccc6eaeaa910474a745f835c7f7b04d66..6a831009b4cbf9e191707571aacf0a6d4c8db7e1 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -468,7 +468,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
                        page = pfn_swap_entry_to_page(entry);
        }
        /* return 1 if the page is an normal ksm page or KSM-placed zero page */
-       ret = (page && PageKsm(page)) || is_ksm_zero_pte(*pte);
+       ret = (page && PageKsm(page)) || is_ksm_zero_pte(ptent);
        pte_unmap_unlock(pte, ptl);
        return ret;
 }
index 774bd6e21e2788ac1ee094c84176b488543fd6c3..1c1061df9cd17cb664d5d6b9faf1ac79db2cef6a 100644 (file)
@@ -2936,7 +2936,8 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
  * Moreover, it should not come from DMA buffer and is not readily
  * reclaimable. So those GFP bits should be masked off.
  */
-#define OBJCGS_CLEAR_MASK      (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+#define OBJCGS_CLEAR_MASK      (__GFP_DMA | __GFP_RECLAIMABLE | \
+                                __GFP_ACCOUNT | __GFP_NOFAIL)
 
 /*
  * mod_objcg_mlstate() may be called with irq enabled, so
index 46f2f5d3d183b586e9936a92ed08ba08f6bf9755..ee2fd6a6af40728b3990773b464f362222be0621 100644 (file)
@@ -3107,7 +3107,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
  */
 void folio_wait_stable(struct folio *folio)
 {
-       if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
+       if (mapping_stable_writes(folio_mapping(folio)))
                folio_wait_writeback(folio);
 }
 EXPORT_SYMBOL_GPL(folio_wait_stable);
index 96d9eae5c7cc8e21e2fd7d6c8dacb172ab6e7e90..0b6ca553bebec523580e0fb1d43ede8646134342 100644 (file)
@@ -312,7 +312,7 @@ static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
 
        ret = -EEXIST;
        /* Refuse to overwrite any PTE, even a PTE marker (e.g. UFFD WP). */
-       if (!pte_none(*dst_pte))
+       if (!pte_none(ptep_get(dst_pte)))
                goto out_unlock;
 
        set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
index aa01f6ea5a75b7add33836dbe1d66c98dbd2c2e6..744b4d7e3fae2d2f60a34599ea4a1e43d7412315 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -414,6 +414,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack)
 
 static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
+#ifdef CONFIG_STACK_GROWSUP
+       /*
+        * For an upwards growing stack the calculation is much simpler.
+        * Memory for the maximum stack size is reserved at the top of the
+        * task. mmap_base starts directly below the stack and grows
+        * downwards.
+        */
+       return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd);
+#else
        unsigned long gap = rlim_stack->rlim_cur;
        unsigned long pad = stack_guard_gap;
 
@@ -431,6 +440,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
                gap = MAX_GAP;
 
        return PAGE_ALIGN(STACK_TOP - gap - rnd);
+#endif
 }
 
 void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
index b5c406a6e7654f95fd323936219f0abd3dc66d65..abb090f94ed2609eeb9cd54b4e5faed1c3cb7bfe 100644 (file)
@@ -37,7 +37,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
        ktime_t tstamp = skb->tstamp;
        struct ip_frag_state state;
        struct iphdr *iph;
-       int err;
+       int err = 0;
 
        /* for offloaded checksums cleanup checksum before fragmentation */
        if (skb->ip_summed == CHECKSUM_PARTIAL &&
index 0d548431f3fadfd37af7c58664d3c6a706e8ff80..c879246be48d82f83072a48b19ec073df8efe27b 100644 (file)
@@ -1119,7 +1119,9 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
        if (i == max_netdevices)
                return -ENFILE;
 
-       snprintf(res, IFNAMSIZ, name, i);
+       /* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */
+       strscpy(buf, name, IFNAMSIZ);
+       snprintf(res, IFNAMSIZ, buf, i);
        return i;
 }
 
@@ -10049,6 +10051,54 @@ void netif_tx_stop_all_queues(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_tx_stop_all_queues);
 
+static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
+{
+       void __percpu *v;
+
+       /* Drivers implementing ndo_get_peer_dev must support tstat
+        * accounting, so that skb_do_redirect() can bump the dev's
+        * RX stats upon network namespace switch.
+        */
+       if (dev->netdev_ops->ndo_get_peer_dev &&
+           dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS)
+               return -EOPNOTSUPP;
+
+       switch (dev->pcpu_stat_type) {
+       case NETDEV_PCPU_STAT_NONE:
+               return 0;
+       case NETDEV_PCPU_STAT_LSTATS:
+               v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
+               break;
+       case NETDEV_PCPU_STAT_TSTATS:
+               v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+               break;
+       case NETDEV_PCPU_STAT_DSTATS:
+               v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return v ? 0 : -ENOMEM;
+}
+
+static void netdev_do_free_pcpu_stats(struct net_device *dev)
+{
+       switch (dev->pcpu_stat_type) {
+       case NETDEV_PCPU_STAT_NONE:
+               return;
+       case NETDEV_PCPU_STAT_LSTATS:
+               free_percpu(dev->lstats);
+               break;
+       case NETDEV_PCPU_STAT_TSTATS:
+               free_percpu(dev->tstats);
+               break;
+       case NETDEV_PCPU_STAT_DSTATS:
+               free_percpu(dev->dstats);
+               break;
+       }
+}
+
 /**
  * register_netdevice() - register a network device
  * @dev: device to register
@@ -10109,9 +10159,13 @@ int register_netdevice(struct net_device *dev)
                goto err_uninit;
        }
 
+       ret = netdev_do_alloc_pcpu_stats(dev);
+       if (ret)
+               goto err_uninit;
+
        ret = dev_index_reserve(net, dev->ifindex);
        if (ret < 0)
-               goto err_uninit;
+               goto err_free_pcpu;
        dev->ifindex = ret;
 
        /* Transfer changeable features to wanted_features and enable
@@ -10217,6 +10271,8 @@ err_uninit_notify:
        call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
 err_ifindex_release:
        dev_index_release(net, dev->ifindex);
+err_free_pcpu:
+       netdev_do_free_pcpu_stats(dev);
 err_uninit:
        if (dev->netdev_ops->ndo_uninit)
                dev->netdev_ops->ndo_uninit(dev);
@@ -10469,6 +10525,7 @@ void netdev_run_todo(void)
                WARN_ON(rcu_access_pointer(dev->ip_ptr));
                WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 
+               netdev_do_free_pcpu_stats(dev);
                if (dev->priv_destructor)
                        dev->priv_destructor(dev);
                if (dev->needs_free_netdev)
index 383f96b0a1c78026629ef4e6b8c172019d29d35c..7e4d7c3bcc849a9211eca4246cda7fa76af13c36 100644 (file)
@@ -81,6 +81,7 @@
 #include <net/xdp.h>
 #include <net/mptcp.h>
 #include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netkit.h>
 #include <linux/un.h>
 
 #include "dev.h"
@@ -2468,6 +2469,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
 EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
 
+static struct net_device *skb_get_peer_dev(struct net_device *dev)
+{
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (likely(ops->ndo_get_peer_dev))
+               return INDIRECT_CALL_1(ops->ndo_get_peer_dev,
+                                      netkit_peer_dev, dev);
+       return NULL;
+}
+
 int skb_do_redirect(struct sk_buff *skb)
 {
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
@@ -2481,17 +2492,15 @@ int skb_do_redirect(struct sk_buff *skb)
        if (unlikely(!dev))
                goto out_drop;
        if (flags & BPF_F_PEER) {
-               const struct net_device_ops *ops = dev->netdev_ops;
-
-               if (unlikely(!ops->ndo_get_peer_dev ||
-                            !skb_at_tc_ingress(skb)))
+               if (unlikely(!skb_at_tc_ingress(skb)))
                        goto out_drop;
-               dev = ops->ndo_get_peer_dev(dev);
+               dev = skb_get_peer_dev(dev);
                if (unlikely(!dev ||
                             !(dev->flags & IFF_UP) ||
                             net_eq(net, dev_net(dev))))
                        goto out_drop;
                skb->dev = dev;
+               dev_sw_netstats_rx_add(dev, skb->len);
                return -EAGAIN;
        }
        return flags & BPF_F_NEIGH ?
index ceb684be4cbf862e25cabb4c46a6f2dcdb679994..4c2e77bd12f4b17f57f115ce4f9b99fb1da0a875 100644 (file)
@@ -180,18 +180,17 @@ static void gso_test_func(struct kunit *test)
        }
 
        if (tcase->frag_skbs) {
-               unsigned int total_size = 0, total_true_size = 0, alloc_size = 0;
+               unsigned int total_size = 0, total_true_size = 0;
                struct sk_buff *frag_skb, *prev = NULL;
 
-               page = alloc_page(GFP_KERNEL);
-               KUNIT_ASSERT_NOT_NULL(test, page);
-               page_ref_add(page, tcase->nr_frag_skbs - 1);
-
                for (i = 0; i < tcase->nr_frag_skbs; i++) {
                        unsigned int frag_size;
 
+                       page = alloc_page(GFP_KERNEL);
+                       KUNIT_ASSERT_NOT_NULL(test, page);
+
                        frag_size = tcase->frag_skbs[i];
-                       frag_skb = build_skb(page_address(page) + alloc_size,
+                       frag_skb = build_skb(page_address(page),
                                             frag_size + shinfo_size);
                        KUNIT_ASSERT_NOT_NULL(test, frag_skb);
                        __skb_put(frag_skb, frag_size);
@@ -204,11 +203,8 @@ static void gso_test_func(struct kunit *test)
 
                        total_size += frag_size;
                        total_true_size += frag_skb->truesize;
-                       alloc_size += frag_size + shinfo_size;
                }
 
-               KUNIT_ASSERT_LE(test, alloc_size, PAGE_SIZE);
-
                skb->len += total_size;
                skb->data_len += total_size;
                skb->truesize += total_true_size;
index 6c31eefbd77786ba3651f79313af867883210654..93ecfceac1bc49bd843728518215ade5ced374a5 100644 (file)
@@ -826,6 +826,8 @@ static void sk_psock_destroy(struct work_struct *work)
 
        if (psock->sk_redir)
                sock_put(psock->sk_redir);
+       if (psock->sk_pair)
+               sock_put(psock->sk_pair);
        sock_put(psock->sk);
        kfree(psock);
 }
index 3bbd5afb7b31cf000f3e4e6ade6542c39456b9e9..fe3553f60bf39e64602d932505a0851e692348a0 100644 (file)
@@ -505,6 +505,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
                                ret = skb->len;
                        break;
                }
+               ret = 0;
        }
        rtnl_unlock();
 
index 76c3ea75b8ddc03f5d5e7d2494e12de80250bfdc..efeeca2b13285a3149645ab945b0364391f6721b 100644 (file)
@@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
        int tv = get_random_u32_below(max_delay);
 
        im->tm_running = 1;
-       if (!mod_timer(&im->timer, jiffies+tv+2))
-               refcount_inc(&im->refcnt);
+       if (refcount_inc_not_zero(&im->refcnt)) {
+               if (mod_timer(&im->timer, jiffies + tv + 2))
+                       ip_ma_put(im);
+       }
 }
 
 static void igmp_gq_start_timer(struct in_device *in_dev)
index f01aee832aab2f97c88808aafdf0f5b6ee0b57c6..7d0e7aaa71e0a1ff79f9e7c622f0f999e0139f18 100644 (file)
@@ -1481,5 +1481,6 @@ static void __exit inet_diag_exit(void)
 module_init(inet_diag_init);
 module_exit(inet_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);
index 598c1b114d2c2256fa06735b823b38cef70a8a34..a532f749e47781cc951f2003f621cec4387a2384 100644 (file)
@@ -751,12 +751,12 @@ int __inet_hash(struct sock *sk, struct sock *osk)
                if (err)
                        goto unlock;
        }
+       sock_set_flag(sk, SOCK_RCU_FREE);
        if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
                sk->sk_family == AF_INET6)
                __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
        else
                __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
-       sock_set_flag(sk, SOCK_RCU_FREE);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 unlock:
        spin_unlock(&ilb2->lock);
index 63a40e4b678f59e29aa4bcbc8fe15116f1b4c568..fe2140c8375c8ebcc69880142c42655233007900 100644 (file)
@@ -257,5 +257,6 @@ static void __exit raw_diag_exit(void)
 module_init(raw_diag_init);
 module_exit(raw_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RAW socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
index 3290a4442b4ac746a0ace35f909b95aa6cb9c06c..16615d107cf06f3da7988c8e3ab889456d74dd88 100644 (file)
@@ -780,7 +780,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
                        goto reject_redirect;
        }
 
-       n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+       n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw);
        if (!n)
                n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
        if (!IS_ERR(n)) {
index 01b50fa791898831dc777bb3a64ad93ad57c1bfd..4cbe4b44425a6a5daf55abe348c167932ca07222 100644 (file)
@@ -247,4 +247,5 @@ static void __exit tcp_diag_exit(void)
 module_init(tcp_diag_init);
 module_exit(tcp_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */);
index de3f2d31f510a9f1c41ec8c9a830df2e07d768fc..dc41a22ee80e829582349e8e644f204eff07df0e 100644 (file)
@@ -296,5 +296,6 @@ static void __exit udp_diag_exit(void)
 module_init(udp_diag_init);
 module_exit(udp_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UDP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */);
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */);
index 037ab74f5ade5b1961ff7292c4113f7aae4ef2d2..cb0291decf2e56c7d4111e649f41d28577af987e 100644 (file)
@@ -88,7 +88,7 @@ config MAC80211_LEDS
 
 config MAC80211_DEBUGFS
        bool "Export mac80211 internals in DebugFS"
-       depends on MAC80211 && DEBUG_FS
+       depends on MAC80211 && CFG80211_DEBUGFS
        help
          Select this to see extensive information about
          the internal state of mac80211 in debugfs.
index ec91e131b29e5aa8e5e2baf177819fcf6d973fe0..80aeb25f1b68d1e63e2aca0049258a2524848675 100644 (file)
 #include "debugfs_netdev.h"
 #include "driver-ops.h"
 
+struct ieee80211_if_read_sdata_data {
+       ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int);
+       struct ieee80211_sub_if_data *sdata;
+};
+
+static ssize_t ieee80211_if_read_sdata_handler(struct wiphy *wiphy,
+                                              struct file *file,
+                                              char *buf,
+                                              size_t bufsize,
+                                              void *data)
+{
+       struct ieee80211_if_read_sdata_data *d = data;
+
+       return d->format(d->sdata, buf, bufsize);
+}
+
 static ssize_t ieee80211_if_read_sdata(
-       struct ieee80211_sub_if_data *sdata,
+       struct file *file,
        char __user *userbuf,
        size_t count, loff_t *ppos,
        ssize_t (*format)(const struct ieee80211_sub_if_data *sdata, char *, int))
 {
+       struct ieee80211_sub_if_data *sdata = file->private_data;
+       struct ieee80211_if_read_sdata_data data = {
+               .format = format,
+               .sdata = sdata,
+       };
        char buf[200];
-       ssize_t ret = -EINVAL;
 
-       wiphy_lock(sdata->local->hw.wiphy);
-       ret = (*format)(sdata, buf, sizeof(buf));
-       wiphy_unlock(sdata->local->hw.wiphy);
+       return wiphy_locked_debugfs_read(sdata->local->hw.wiphy,
+                                        file, buf, sizeof(buf),
+                                        userbuf, count, ppos,
+                                        ieee80211_if_read_sdata_handler,
+                                        &data);
+}
 
-       if (ret >= 0)
-               ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
+struct ieee80211_if_write_sdata_data {
+       ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int);
+       struct ieee80211_sub_if_data *sdata;
+};
+
+static ssize_t ieee80211_if_write_sdata_handler(struct wiphy *wiphy,
+                                               struct file *file,
+                                               char *buf,
+                                               size_t count,
+                                               void *data)
+{
+       struct ieee80211_if_write_sdata_data *d = data;
 
-       return ret;
+       return d->write(d->sdata, buf, count);
 }
 
 static ssize_t ieee80211_if_write_sdata(
-       struct ieee80211_sub_if_data *sdata,
+       struct file *file,
        const char __user *userbuf,
        size_t count, loff_t *ppos,
        ssize_t (*write)(struct ieee80211_sub_if_data *sdata, const char *, int))
 {
+       struct ieee80211_sub_if_data *sdata = file->private_data;
+       struct ieee80211_if_write_sdata_data data = {
+               .write = write,
+               .sdata = sdata,
+       };
        char buf[64];
-       ssize_t ret;
 
-       if (count >= sizeof(buf))
-               return -E2BIG;
+       return wiphy_locked_debugfs_write(sdata->local->hw.wiphy,
+                                         file, buf, sizeof(buf),
+                                         userbuf, count,
+                                         ieee80211_if_write_sdata_handler,
+                                         &data);
+}
 
-       if (copy_from_user(buf, userbuf, count))
-               return -EFAULT;
-       buf[count] = '\0';
+struct ieee80211_if_read_link_data {
+       ssize_t (*format)(const struct ieee80211_link_data *, char *, int);
+       struct ieee80211_link_data *link;
+};
 
-       wiphy_lock(sdata->local->hw.wiphy);
-       ret = (*write)(sdata, buf, count);
-       wiphy_unlock(sdata->local->hw.wiphy);
+static ssize_t ieee80211_if_read_link_handler(struct wiphy *wiphy,
+                                             struct file *file,
+                                             char *buf,
+                                             size_t bufsize,
+                                             void *data)
+{
+       struct ieee80211_if_read_link_data *d = data;
 
-       return ret;
+       return d->format(d->link, buf, bufsize);
 }
 
 static ssize_t ieee80211_if_read_link(
-       struct ieee80211_link_data *link,
+       struct file *file,
        char __user *userbuf,
        size_t count, loff_t *ppos,
        ssize_t (*format)(const struct ieee80211_link_data *link, char *, int))
 {
+       struct ieee80211_link_data *link = file->private_data;
+       struct ieee80211_if_read_link_data data = {
+               .format = format,
+               .link = link,
+       };
        char buf[200];
-       ssize_t ret = -EINVAL;
 
-       wiphy_lock(link->sdata->local->hw.wiphy);
-       ret = (*format)(link, buf, sizeof(buf));
-       wiphy_unlock(link->sdata->local->hw.wiphy);
+       return wiphy_locked_debugfs_read(link->sdata->local->hw.wiphy,
+                                        file, buf, sizeof(buf),
+                                        userbuf, count, ppos,
+                                        ieee80211_if_read_link_handler,
+                                        &data);
+}
+
+struct ieee80211_if_write_link_data {
+       ssize_t (*write)(struct ieee80211_link_data *, const char *, int);
+       struct ieee80211_link_data *link;
+};
 
-       if (ret >= 0)
-               ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
+static ssize_t ieee80211_if_write_link_handler(struct wiphy *wiphy,
+                                              struct file *file,
+                                              char *buf,
+                                              size_t count,
+                                              void *data)
+{
+       struct ieee80211_if_write_sdata_data *d = data;
 
-       return ret;
+       return d->write(d->sdata, buf, count);
 }
 
 static ssize_t ieee80211_if_write_link(
-       struct ieee80211_link_data *link,
+       struct file *file,
        const char __user *userbuf,
        size_t count, loff_t *ppos,
        ssize_t (*write)(struct ieee80211_link_data *link, const char *, int))
 {
+       struct ieee80211_link_data *link = file->private_data;
+       struct ieee80211_if_write_link_data data = {
+               .write = write,
+               .link = link,
+       };
        char buf[64];
-       ssize_t ret;
-
-       if (count >= sizeof(buf))
-               return -E2BIG;
-
-       if (copy_from_user(buf, userbuf, count))
-               return -EFAULT;
-       buf[count] = '\0';
-
-       wiphy_lock(link->sdata->local->hw.wiphy);
-       ret = (*write)(link, buf, count);
-       wiphy_unlock(link->sdata->local->hw.wiphy);
 
-       return ret;
+       return wiphy_locked_debugfs_write(link->sdata->local->hw.wiphy,
+                                         file, buf, sizeof(buf),
+                                         userbuf, count,
+                                         ieee80211_if_write_link_handler,
+                                         &data);
 }
 
 #define IEEE80211_IF_FMT(name, type, field, format_string)             \
@@ -173,7 +233,7 @@ static ssize_t ieee80211_if_read_##name(struct file *file,          \
                                        char __user *userbuf,           \
                                        size_t count, loff_t *ppos)     \
 {                                                                      \
-       return ieee80211_if_read_sdata(file->private_data,              \
+       return ieee80211_if_read_sdata(file,                            \
                                       userbuf, count, ppos,            \
                                       ieee80211_if_fmt_##name);        \
 }
@@ -183,7 +243,7 @@ static ssize_t ieee80211_if_write_##name(struct file *file,         \
                                         const char __user *userbuf,    \
                                         size_t count, loff_t *ppos)    \
 {                                                                      \
-       return ieee80211_if_write_sdata(file->private_data, userbuf,    \
+       return ieee80211_if_write_sdata(file, userbuf,                  \
                                        count, ppos,                    \
                                        ieee80211_if_parse_##name);     \
 }
@@ -211,7 +271,7 @@ static ssize_t ieee80211_if_read_##name(struct file *file,          \
                                        char __user *userbuf,           \
                                        size_t count, loff_t *ppos)     \
 {                                                                      \
-       return ieee80211_if_read_link(file->private_data,               \
+       return ieee80211_if_read_link(file,                             \
                                      userbuf, count, ppos,             \
                                      ieee80211_if_fmt_##name); \
 }
@@ -221,7 +281,7 @@ static ssize_t ieee80211_if_write_##name(struct file *file,         \
                                         const char __user *userbuf,    \
                                         size_t count, loff_t *ppos)    \
 {                                                                      \
-       return ieee80211_if_write_link(file->private_data, userbuf,     \
+       return ieee80211_if_write_link(file, userbuf,                   \
                                       count, ppos,                     \
                                       ieee80211_if_parse_##name);      \
 }
index 06e3613bf46bd918a69ff3d50cfa1411d466e4a8..5bf507ebb096be315ceb13162a761e20134c3434 100644 (file)
@@ -312,23 +312,14 @@ static ssize_t sta_aql_write(struct file *file, const char __user *userbuf,
 STA_OPS_RW(aql);
 
 
-static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
-                                       size_t count, loff_t *ppos)
+static ssize_t sta_agg_status_do_read(struct wiphy *wiphy, struct file *file,
+                                     char *buf, size_t bufsz, void *data)
 {
-       char *buf, *p;
-       ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40;
+       struct sta_info *sta = data;
+       char *p = buf;
        int i;
-       struct sta_info *sta = file->private_data;
        struct tid_ampdu_rx *tid_rx;
        struct tid_ampdu_tx *tid_tx;
-       ssize_t ret;
-
-       buf = kzalloc(bufsz, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-       p = buf;
-
-       rcu_read_lock();
 
        p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n",
                        sta->ampdu_mlme.dialog_token_allocator + 1);
@@ -338,8 +329,8 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
        for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
                bool tid_rx_valid;
 
-               tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
-               tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
+               tid_rx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_rx[i]);
+               tid_tx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_tx[i]);
                tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid);
 
                p += scnprintf(p, bufsz + buf - p, "%02d", i);
@@ -358,31 +349,39 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
                                tid_tx ? skb_queue_len(&tid_tx->pending) : 0);
                p += scnprintf(p, bufsz + buf - p, "\n");
        }
-       rcu_read_unlock();
 
-       ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+       return p - buf;
+}
+
+static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
+                                  size_t count, loff_t *ppos)
+{
+       struct sta_info *sta = file->private_data;
+       struct wiphy *wiphy = sta->local->hw.wiphy;
+       size_t bufsz = 71 + IEEE80211_NUM_TIDS * 40;
+       char *buf = kmalloc(bufsz, GFP_KERNEL);
+       ssize_t ret;
+
+       if (!buf)
+               return -ENOMEM;
+
+       ret = wiphy_locked_debugfs_read(wiphy, file, buf, bufsz,
+                                       userbuf, count, ppos,
+                                       sta_agg_status_do_read, sta);
        kfree(buf);
+
        return ret;
 }
 
-static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf,
-                                   size_t count, loff_t *ppos)
+static ssize_t sta_agg_status_do_write(struct wiphy *wiphy, struct file *file,
+                                      char *buf, size_t count, void *data)
 {
-       char _buf[25] = {}, *buf = _buf;
-       struct sta_info *sta = file->private_data;
+       struct sta_info *sta = data;
        bool start, tx;
        unsigned long tid;
-       char *pos;
+       char *pos = buf;
        int ret, timeout = 5000;
 
-       if (count > sizeof(_buf))
-               return -EINVAL;
-
-       if (copy_from_user(buf, userbuf, count))
-               return -EFAULT;
-
-       buf[sizeof(_buf) - 1] = '\0';
-       pos = buf;
        buf = strsep(&pos, " ");
        if (!buf)
                return -EINVAL;
@@ -420,7 +419,6 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
        if (ret || tid >= IEEE80211_NUM_TIDS)
                return -EINVAL;
 
-       wiphy_lock(sta->local->hw.wiphy);
        if (tx) {
                if (start)
                        ret = ieee80211_start_tx_ba_session(&sta->sta, tid,
@@ -432,10 +430,22 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
                                               3, true);
                ret = 0;
        }
-       wiphy_unlock(sta->local->hw.wiphy);
 
        return ret ?: count;
 }
+
+static ssize_t sta_agg_status_write(struct file *file,
+                                   const char __user *userbuf,
+                                   size_t count, loff_t *ppos)
+{
+       struct sta_info *sta = file->private_data;
+       struct wiphy *wiphy = sta->local->hw.wiphy;
+       char _buf[26];
+
+       return wiphy_locked_debugfs_write(wiphy, file, _buf, sizeof(_buf),
+                                         userbuf, count,
+                                         sta_agg_status_do_write, sta);
+}
 STA_OPS_RW(agg_status);
 
 /* link sta attributes */
index 568633b38c47360d0de6e6f856da63b3776ad19f..f690c385a345a60d81d994336fea44aa7e1792ef 100644 (file)
@@ -23,7 +23,7 @@
 static inline struct ieee80211_sub_if_data *
 get_bss_sdata(struct ieee80211_sub_if_data *sdata)
 {
-       if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+       if (sdata && sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
                                     u.ap);
 
@@ -695,11 +695,14 @@ static inline void drv_flush(struct ieee80211_local *local,
                             struct ieee80211_sub_if_data *sdata,
                             u32 queues, bool drop)
 {
-       struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL;
+       struct ieee80211_vif *vif;
 
        might_sleep();
        lockdep_assert_wiphy(local->hw.wiphy);
 
+       sdata = get_bss_sdata(sdata);
+       vif = sdata ? &sdata->vif : NULL;
+
        if (sdata && !check_sdata_in_driver(sdata))
                return;
 
@@ -716,6 +719,8 @@ static inline void drv_flush_sta(struct ieee80211_local *local,
        might_sleep();
        lockdep_assert_wiphy(local->hw.wiphy);
 
+       sdata = get_bss_sdata(sdata);
+
        if (sdata && !check_sdata_in_driver(sdata))
                return;
 
index 68cea2685224b92fbb03896b0ab82d7c143cfd80..749f4ecab99030310e6c5b82633f8cad85fe589b 100644 (file)
@@ -271,6 +271,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
        case NL80211_CHAN_WIDTH_80:
        case NL80211_CHAN_WIDTH_80P80:
        case NL80211_CHAN_WIDTH_160:
+       case NL80211_CHAN_WIDTH_320:
                bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
                                IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
                break;
index 8df1bdb647e299344110610688618ca74ec32026..5409c2ea3f5728a05999db17b7af1b1fb56f757e 100644 (file)
@@ -245,4 +245,5 @@ static void __exit mptcp_diag_exit(void)
 module_init(mptcp_diag_init);
 module_exit(mptcp_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPTCP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */);
index cd15ec73073e05a133a3130f90e45e33394e759f..c53914012d01d38c2dc0a3578bf3651595956e72 100644 (file)
@@ -108,6 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                        mp_opt->suboptions |= OPTION_MPTCP_DSS;
                        mp_opt->use_map = 1;
                        mp_opt->mpc_map = 1;
+                       mp_opt->use_ack = 0;
                        mp_opt->data_len = get_unaligned_be16(ptr);
                        ptr += 2;
                }
index 1529ec35881552a53b18afb31ded27eece49a8d6..bf4d96f6f99a6e87d1b30c84a316d12c925bbcb2 100644 (file)
@@ -1515,8 +1515,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
        struct mptcp_pm_addr_entry *entry;
 
        list_for_each_entry(entry, rm_list, list) {
-               remove_anno_list_by_saddr(msk, &entry->addr);
-               if (alist.nr < MPTCP_RM_IDS_MAX)
+               if ((remove_anno_list_by_saddr(msk, &entry->addr) ||
+                    lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) &&
+                   alist.nr < MPTCP_RM_IDS_MAX)
                        alist.ids[alist.nr++] = entry->addr.id;
        }
 
index a0b8356cd8c58f2d2ea46e385804b4525cf7950a..bc81ea53a04992ef4793da9fd688a7c72b3227d5 100644 (file)
@@ -1230,6 +1230,8 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
        mptcp_do_fallback(ssk);
 }
 
+#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
+
 static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
                              struct mptcp_data_frag *dfrag,
                              struct mptcp_sendmsg_info *info)
@@ -1256,6 +1258,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
                return -EAGAIN;
 
        /* compute send limit */
+       if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE))
+               ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE;
        info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
        copy = info->size_goal;
 
@@ -3398,10 +3402,11 @@ static void mptcp_release_cb(struct sock *sk)
        if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
                __mptcp_clean_una_wakeup(sk);
        if (unlikely(msk->cb_flags)) {
-               /* be sure to set the current sk state before tacking actions
-                * depending on sk_state, that is processing MPTCP_ERROR_REPORT
+               /* be sure to set the current sk state before taking actions
+                * depending on sk_state (MPTCP_ERROR_REPORT)
+                * On sk release avoid actions depending on the first subflow
                 */
-               if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
+               if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first)
                        __mptcp_set_connected(sk);
                if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
                        __mptcp_error_report(sk);
index 77f5e8932abf64370e00ffe473a0cd9431494898..35368073370048f400ea82416cb0e0a0c6595a58 100644 (file)
@@ -738,8 +738,11 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
        val = READ_ONCE(inet_sk(sk)->tos);
        mptcp_for_each_subflow(msk, subflow) {
                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+               bool slow;
 
+               slow = lock_sock_fast(ssk);
                __ip_sock_set_tos(ssk, val);
+               unlock_sock_fast(ssk, slow);
        }
        release_sock(sk);
 
index f8854bff286cbd7229e0c543689f892f40e80cab..62fb1031763d14f82dcfee50191a21cb38ad8cf9 100644 (file)
@@ -89,11 +89,6 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
        if ((had_link == has_link) || chained)
                return 0;
 
-       if (had_link)
-               netif_carrier_off(ndp->ndev.dev);
-       else
-               netif_carrier_on(ndp->ndev.dev);
-
        if (!ndp->multi_package && !nc->package->multi_channel) {
                if (had_link) {
                        ndp->flags |= NCSI_DEV_RESHUFFLE;
index 35d2f9c9ada0252e01af3bee1faa1f63a0c94498..4c133e06be1de2f8972b50ac87e6b0b7bfc9ac6d 100644 (file)
@@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
        ip_set_dereference((inst)->ip_set_list)[id]
 #define ip_set_ref_netlink(inst,id)    \
        rcu_dereference_raw((inst)->ip_set_list)[id]
+#define ip_set_dereference_nfnl(p)     \
+       rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
 
 /* The set types are implemented in modules and registered set types
  * can be found in ip_set_type_list. Adding/deleting types is
@@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set)
 static struct ip_set *
 ip_set_rcu_get(struct net *net, ip_set_id_t index)
 {
-       struct ip_set *set;
        struct ip_set_net *inst = ip_set_pernet(net);
 
-       rcu_read_lock();
-       /* ip_set_list itself needs to be protected */
-       set = rcu_dereference(inst->ip_set_list)[index];
-       rcu_read_unlock();
-
-       return set;
+       /* ip_set_list and the set pointer need to be protected */
+       return ip_set_dereference_nfnl(inst->ip_set_list)[index];
 }
 
 static inline void
@@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
        ip_set(inst, to_id) = from;
        write_unlock_bh(&ip_set_ref_lock);
 
+       /* Make sure all readers of the old set pointers are completed. */
+       synchronize_rcu();
+
        return 0;
 }
 
index a761ee6796f6fa448ba6ce8dbc50b34aaebd8e6b..c0a42989b982266aa7378f06c0a46b5668335d3a 100644 (file)
@@ -7263,10 +7263,11 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
 
                if (err < 0) {
                        NL_SET_BAD_ATTR(extack, attr);
-                       break;
+                       return err;
                }
        }
-       return err;
+
+       return 0;
 }
 
 /*
@@ -9679,16 +9680,14 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
        call_rcu(&trans->rcu, nft_trans_gc_trans_free);
 }
 
-static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
-                                                 unsigned int gc_seq,
-                                                 bool sync)
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+                                                unsigned int gc_seq)
 {
-       struct nft_set_elem_catchall *catchall, *next;
+       struct nft_set_elem_catchall *catchall;
        const struct nft_set *set = gc->set;
-       struct nft_elem_priv *elem_priv;
        struct nft_set_ext *ext;
 
-       list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+       list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
 
                if (!nft_set_elem_expired(ext))
@@ -9698,35 +9697,42 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
 
                nft_set_elem_dead(ext);
 dead_elem:
-               if (sync)
-                       gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
-               else
-                       gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
-
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
                if (!gc)
                        return NULL;
 
-               elem_priv = catchall->elem;
-               if (sync) {
-                       nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
-                       nft_setelem_catchall_destroy(catchall);
-               }
-
-               nft_trans_gc_elem_add(gc, elem_priv);
+               nft_trans_gc_elem_add(gc, catchall->elem);
        }
 
        return gc;
 }
 
-struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
-                                                unsigned int gc_seq)
-{
-       return nft_trans_gc_catchall(gc, gc_seq, false);
-}
-
 struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
 {
-       return nft_trans_gc_catchall(gc, 0, true);
+       struct nft_set_elem_catchall *catchall, *next;
+       const struct nft_set *set = gc->set;
+       struct nft_elem_priv *elem_priv;
+       struct nft_set_ext *ext;
+
+       WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net));
+
+       list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+               ext = nft_set_elem_ext(set, catchall->elem);
+
+               if (!nft_set_elem_expired(ext))
+                       continue;
+
+               gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
+               if (!gc)
+                       return NULL;
+
+               elem_priv = catchall->elem;
+               nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
+               nft_setelem_catchall_destroy(catchall);
+               nft_trans_gc_elem_add(gc, elem_priv);
+       }
+
+       return gc;
 }
 
 static void nf_tables_module_autoload_cleanup(struct net *net)
index e596d1a842f7024a5b0237985d9d69a999528b95..f6e791a6810151823fa021849f4809af715e5a67 100644 (file)
@@ -38,13 +38,14 @@ void nft_byteorder_eval(const struct nft_expr *expr,
 
        switch (priv->size) {
        case 8: {
+               u64 *dst64 = (void *)dst;
                u64 src64;
 
                switch (priv->op) {
                case NFT_BYTEORDER_NTOH:
                        for (i = 0; i < priv->len / 8; i++) {
                                src64 = nft_reg_load64(&src[i]);
-                               nft_reg_store64(&dst[i],
+                               nft_reg_store64(&dst64[i],
                                                be64_to_cpu((__force __be64)src64));
                        }
                        break;
@@ -52,7 +53,7 @@ void nft_byteorder_eval(const struct nft_expr *expr,
                        for (i = 0; i < priv->len / 8; i++) {
                                src64 = (__force __u64)
                                        cpu_to_be64(nft_reg_load64(&src[i]));
-                               nft_reg_store64(&dst[i], src64);
+                               nft_reg_store64(&dst64[i], src64);
                        }
                        break;
                }
index f7da7c43333b5ae4d320813179ea6b54742f91da..ba0d3683a45d32aae8e7240b035c7facc604aea0 100644 (file)
@@ -63,7 +63,7 @@ nft_meta_get_eval_time(enum nft_meta_keys key,
 {
        switch (key) {
        case NFT_META_TIME_NS:
-               nft_reg_store64(dest, ktime_get_real_ns());
+               nft_reg_store64((u64 *)dest, ktime_get_real_ns());
                break;
        case NFT_META_TIME_DAY:
                nft_reg_store8(dest, nft_meta_weekday());
index 6f1186abd47b4366e10f09f9dbed80101bbdf69c..baa3fea4fe65c8f938e665a7fb6b0e4fc0f8f9ad 100644 (file)
@@ -624,14 +624,12 @@ static void nft_rbtree_gc(struct nft_set *set)
 {
        struct nft_rbtree *priv = nft_set_priv(set);
        struct nft_rbtree_elem *rbe, *rbe_end = NULL;
-       struct nftables_pernet *nft_net;
        struct rb_node *node, *next;
        struct nft_trans_gc *gc;
        struct net *net;
 
        set  = nft_set_container_of(priv);
        net  = read_pnet(&set->net);
-       nft_net = nft_pernet(net);
 
        gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
        if (!gc)
index f6b200cb3c0668db7bf380d0868583440edca89b..9a7980e3309d6a2950688f8b69b08c15c288f601 100644 (file)
@@ -262,4 +262,5 @@ static void __exit packet_diag_exit(void)
 module_init(packet_diag_init);
 module_exit(packet_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PACKET socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */);
index 981ca5b98bcb9096fa9bd6ee3561d2792a17782f..1d95f8bc769fa28eb203d6c4afc00815f47a230d 100644 (file)
@@ -73,6 +73,7 @@ static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
 static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
                                               gfp_t gfp)
 {
+       static atomic_t rxrpc_bundle_id;
        struct rxrpc_bundle *bundle;
 
        bundle = kzalloc(sizeof(*bundle), gfp);
@@ -85,6 +86,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
                bundle->upgrade         = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
                bundle->service_id      = call->dest_srx.srx_service;
                bundle->security_level  = call->security_level;
+               bundle->debug_id        = atomic_inc_return(&rxrpc_bundle_id);
                refcount_set(&bundle->ref, 1);
                atomic_set(&bundle->active, 1);
                INIT_LIST_HEAD(&bundle->waiting_calls);
@@ -105,7 +107,8 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle,
 
 static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
 {
-       trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
+       trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref),
+                          rxrpc_bundle_free);
        rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
        key_put(bundle->key);
        kfree(bundle);
@@ -239,7 +242,6 @@ dont_reuse:
  */
 int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
 {
-       static atomic_t rxrpc_bundle_id;
        struct rxrpc_bundle *bundle, *candidate;
        struct rxrpc_local *local = call->local;
        struct rb_node *p, **pp, *parent;
@@ -306,7 +308,6 @@ int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
        }
 
        _debug("new bundle");
-       candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
        rb_link_node(&candidate->local_node, parent, pp);
        rb_insert_color(&candidate->local_node, &local->client_bundles);
        call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
index 030d64f282f3704fbdb8d312554c440c9721d809..92495e73b8699185cf76c60aa88f62d77a29dd56 100644 (file)
@@ -643,12 +643,8 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
                        clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
                        smp_mb(); /* Read data before setting avail bit */
                        set_bit(i, &call->rtt_avail);
-                       if (type != rxrpc_rtt_rx_cancel)
-                               rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
-                                                  sent_at, resp_time);
-                       else
-                               trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_cancel, i,
-                                                  orig_serial, acked_serial, 0, 0);
+                       rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
+                                          sent_at, resp_time);
                        matched = true;
                }
 
@@ -801,28 +797,21 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
                           summary.ack_reason, nr_acks);
        rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]);
 
-       switch (ack.reason) {
-       case RXRPC_ACK_PING_RESPONSE:
-               rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                        rxrpc_rtt_rx_ping_response);
-               break;
-       case RXRPC_ACK_REQUESTED:
-               rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                        rxrpc_rtt_rx_requested_ack);
-               break;
-       default:
-               if (acked_serial != 0)
+       if (acked_serial != 0) {
+               switch (ack.reason) {
+               case RXRPC_ACK_PING_RESPONSE:
                        rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                                rxrpc_rtt_rx_cancel);
-               break;
-       }
-
-       if (ack.reason == RXRPC_ACK_PING) {
-               rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
-                              rxrpc_propose_ack_respond_to_ping);
-       } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
-               rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
-                              rxrpc_propose_ack_respond_to_ack);
+                                                rxrpc_rtt_rx_ping_response);
+                       break;
+               case RXRPC_ACK_REQUESTED:
+                       rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+                                                rxrpc_rtt_rx_requested_ack);
+                       break;
+               default:
+                       rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+                                                rxrpc_rtt_rx_other_ack);
+                       break;
+               }
        }
 
        /* If we get an EXCEEDS_WINDOW ACK from the server, it probably
@@ -835,7 +824,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
            rxrpc_is_client_call(call)) {
                rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
                                          0, -ENETRESET);
-               return;
+               goto send_response;
        }
 
        /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also
@@ -849,7 +838,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
            rxrpc_is_client_call(call)) {
                rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
                                          0, -ENETRESET);
-               return;
+               goto send_response;
        }
 
        /* Discard any out-of-order or duplicate ACKs (outside lock). */
@@ -857,7 +846,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
                trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
                                           first_soft_ack, call->acks_first_seq,
                                           prev_pkt, call->acks_prev_seq);
-               return;
+               goto send_response;
        }
 
        info.rxMTU = 0;
@@ -897,7 +886,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
        case RXRPC_CALL_SERVER_AWAIT_ACK:
                break;
        default:
-               return;
+               goto send_response;
        }
 
        if (before(hard_ack, call->acks_hard_ack) ||
@@ -909,7 +898,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
        if (after(hard_ack, call->acks_hard_ack)) {
                if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
                        rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
-                       return;
+                       goto send_response;
                }
        }
 
@@ -927,6 +916,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
                                   rxrpc_propose_ack_ping_for_lost_reply);
 
        rxrpc_congestion_management(call, skb, &summary, acked_serial);
+
+send_response:
+       if (ack.reason == RXRPC_ACK_PING)
+               rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
+                              rxrpc_propose_ack_respond_to_ping);
+       else if (sp->hdr.flags & RXRPC_REQUEST_ACK)
+               rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
+                              rxrpc_propose_ack_respond_to_ack);
 }
 
 /*
index 0db0ecf1d11038a49e487e36b2eb33a028ae8727..b3f4a503ee2ba4fd9620567208e2e77ed80b41b1 100644 (file)
@@ -1549,6 +1549,9 @@ static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data,
        if (bind) {
                struct flow_action_entry *entry = entry_data;
 
+               if (tcf_ct_helper(act))
+                       return -EOPNOTSUPP;
+
                entry->id = FLOW_ACTION_CT;
                entry->ct.action = tcf_ct_action(act);
                entry->ct.zone = tcf_ct_zone(act);
index c3d6b92dd3862fc79947456a8d2eb342509406d1..eb05131ff1dd671e734457e28b2d7b64eab07f85 100644 (file)
@@ -527,4 +527,5 @@ static void __exit sctp_diag_exit(void)
 module_init(sctp_diag_init);
 module_exit(sctp_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SCTP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);
index da97f946b79b00c82c8dbd496a2c4304dcd164b7..2a1388841951e4b6d447ab1f7b2dff4586ad4218 100644 (file)
@@ -598,8 +598,12 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
        struct smc_llc_qentry *qentry;
        int rc;
 
-       /* receive CONFIRM LINK request from server over RoCE fabric */
-       qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+       /* Receive CONFIRM LINK request from server over RoCE fabric.
+        * Increasing the client's timeout by twice as much as the server's
+        * timeout by default can temporarily avoid decline messages of
+        * both sides crossing or colliding
+        */
+       qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME,
                              SMC_LLC_CONFIRM_LINK);
        if (!qentry) {
                struct smc_clc_msg_decline dclc;
index 7ff2152971a5b8a57ab746bf8ed2cc006b9546e9..a584613aca125620338d07344bb0911b15c0285a 100644 (file)
@@ -268,5 +268,6 @@ static void __exit smc_diag_exit(void)
 module_init(smc_diag_init);
 module_exit(smc_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SMC socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */);
 MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME);
index 73137f4aeb68f95677e34a3b2d5e823565024483..18733451c9e0c23a63d9400d408979aab46ecf19 100644 (file)
@@ -113,4 +113,5 @@ module_init(tipc_diag_init);
 module_exit(tipc_diag_exit);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
index 5bc076f2fa74a295d5236ffd359f8d1b11f6ea53..c763008a8adbaa0538615431984c4f95926bcc3f 100644 (file)
@@ -102,6 +102,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
                return -EMSGSIZE;
 
        skb_put(skb, TLV_SPACE(len));
+       memset(tlv, 0, TLV_SPACE(len));
        tlv->tlv_type = htons(type);
        tlv->tlv_len = htons(TLV_LENGTH(len));
        if (len && data)
index a78e8e7224091331e0ca86049a14d02139c056e6..316f761879624d688af85b60527868d5f0d00a49 100644 (file)
@@ -1232,11 +1232,14 @@ void tls_sw_splice_eof(struct socket *sock)
        lock_sock(sk);
 
 retry:
+       /* same checks as in tls_sw_push_pending_record() */
        rec = ctx->open_rec;
        if (!rec)
                goto unlock;
 
        msg_pl = &rec->msg_plaintext;
+       if (msg_pl->sg.size == 0)
+               goto unlock;
 
        /* Check the BPF advisor and perform transmission. */
        ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA,
index 45506a95b25f8acdb99699c3c9256f50d0e7e5d0..ac1f2bc18fc9685652c26ac3b68f19bfd82f8332 100644 (file)
@@ -213,8 +213,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 }
 #endif /* CONFIG_SECURITY_NETWORK */
 
-#define unix_peer(sk) (unix_sk(sk)->peer)
-
 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 {
        return unix_peer(osk) == sk;
@@ -2581,15 +2579,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
 
        if (!(state->flags & MSG_PEEK))
                WRITE_ONCE(u->oob_skb, NULL);
-
+       else
+               skb_get(oob_skb);
        unix_state_unlock(sk);
 
        chunk = state->recv_actor(oob_skb, 0, chunk, state);
 
-       if (!(state->flags & MSG_PEEK)) {
+       if (!(state->flags & MSG_PEEK))
                UNIXCB(oob_skb).consumed += 1;
-               kfree_skb(oob_skb);
-       }
+
+       consume_skb(oob_skb);
 
        mutex_unlock(&u->iolock);
 
index 616b55c5b89080c86f27b0d5b02fa9a6945e740e..bec09a3a1d44ce56d43e16583fdf3b417cce4033 100644 (file)
@@ -339,4 +339,5 @@ static void __exit unix_diag_exit(void)
 module_init(unix_diag_init);
 module_exit(unix_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UNIX socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */);
index 2f9d8271c6ec7df2007267d3905703c6c9686d10..7ea7c3a0d0d06224f49ad5f073bf772b9528a30a 100644 (file)
@@ -159,12 +159,17 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re
 
 int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
 {
+       struct sock *sk_pair;
+
        if (restore) {
                sk->sk_write_space = psock->saved_write_space;
                sock_replace_proto(sk, psock->sk_proto);
                return 0;
        }
 
+       sk_pair = unix_peer(sk);
+       sock_hold(sk_pair);
+       psock->sk_pair = sk_pair;
        unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
        sock_replace_proto(sk, &unix_stream_bpf_prot);
        return 0;
index a2823b1c5e28b15d745e7a5b4a7c9de83698e70a..2e29994f92ffa2facee45cd53ec791034182508c 100644 (file)
@@ -174,5 +174,6 @@ static void __exit vsock_diag_exit(void)
 module_init(vsock_diag_init);
 module_exit(vsock_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VMware Virtual Sockets monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
                               40 /* AF_VSOCK */);
index 758c9a2a12c0d3ce5f1baefdcc3a9dd7076b7eb1..409d74c57ca0d8c8d36c2260897fce39557620ee 100644 (file)
@@ -191,13 +191,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
                return err;
        }
 
+       wiphy_lock(&rdev->wiphy);
        list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
                if (!wdev->netdev)
                        continue;
                nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
        }
 
-       wiphy_lock(&rdev->wiphy);
        nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
 
        wiphy_net_set(&rdev->wiphy, net);
@@ -206,13 +206,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
        WARN_ON(err);
 
        nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
-       wiphy_unlock(&rdev->wiphy);
 
        list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
                if (!wdev->netdev)
                        continue;
                nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
        }
+       wiphy_unlock(&rdev->wiphy);
 
        return 0;
 }
@@ -221,7 +221,9 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
 {
        struct cfg80211_registered_device *rdev = data;
 
+       wiphy_lock(&rdev->wiphy);
        rdev_rfkill_poll(rdev);
+       wiphy_unlock(&rdev->wiphy);
 }
 
 void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
index 4c692c7faf30d408ab2cb3101b5344786bb690ab..cb61d33d4f1ebc4c42f21e568d51e15d5bca0797 100644 (file)
@@ -293,6 +293,7 @@ struct cfg80211_cqm_config {
        u32 rssi_hyst;
        s32 last_rssi_event_value;
        enum nl80211_cqm_rssi_threshold_event last_rssi_event_type;
+       bool use_range_api;
        int n_rssi_thresholds;
        s32 rssi_thresholds[] __counted_by(n_rssi_thresholds);
 };
index 0878b162890af7c57073991468415a87dc9edc16..40e49074e2eeb956a8f5e1c23563e61abd8900d1 100644 (file)
@@ -4,6 +4,7 @@
  *
  * Copyright 2009      Luis R. Rodriguez <lrodriguez@atheros.com>
  * Copyright 2007      Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2023 Intel Corporation
  */
 
 #include <linux/slab.h>
@@ -109,3 +110,162 @@ void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev)
        DEBUGFS_ADD(long_retry_limit);
        DEBUGFS_ADD(ht40allow_map);
 }
+
+struct debugfs_read_work {
+       struct wiphy_work work;
+       ssize_t (*handler)(struct wiphy *wiphy,
+                          struct file *file,
+                          char *buf,
+                          size_t count,
+                          void *data);
+       struct wiphy *wiphy;
+       struct file *file;
+       char *buf;
+       size_t bufsize;
+       void *data;
+       ssize_t ret;
+       struct completion completion;
+};
+
+static void wiphy_locked_debugfs_read_work(struct wiphy *wiphy,
+                                          struct wiphy_work *work)
+{
+       struct debugfs_read_work *w = container_of(work, typeof(*w), work);
+
+       w->ret = w->handler(w->wiphy, w->file, w->buf, w->bufsize, w->data);
+       complete(&w->completion);
+}
+
+static void wiphy_locked_debugfs_read_cancel(struct dentry *dentry,
+                                            void *data)
+{
+       struct debugfs_read_work *w = data;
+
+       wiphy_work_cancel(w->wiphy, &w->work);
+       complete(&w->completion);
+}
+
+ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
+                                 char *buf, size_t bufsize,
+                                 char __user *userbuf, size_t count,
+                                 loff_t *ppos,
+                                 ssize_t (*handler)(struct wiphy *wiphy,
+                                                    struct file *file,
+                                                    char *buf,
+                                                    size_t bufsize,
+                                                    void *data),
+                                 void *data)
+{
+       struct debugfs_read_work work = {
+               .handler = handler,
+               .wiphy = wiphy,
+               .file = file,
+               .buf = buf,
+               .bufsize = bufsize,
+               .data = data,
+               .ret = -ENODEV,
+               .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion),
+       };
+       struct debugfs_cancellation cancellation = {
+               .cancel = wiphy_locked_debugfs_read_cancel,
+               .cancel_data = &work,
+       };
+
+       /* don't leak stack data or whatever */
+       memset(buf, 0, bufsize);
+
+       wiphy_work_init(&work.work, wiphy_locked_debugfs_read_work);
+       wiphy_work_queue(wiphy, &work.work);
+
+       debugfs_enter_cancellation(file, &cancellation);
+       wait_for_completion(&work.completion);
+       debugfs_leave_cancellation(file, &cancellation);
+
+       if (work.ret < 0)
+               return work.ret;
+
+       if (WARN_ON(work.ret > bufsize))
+               return -EINVAL;
+
+       return simple_read_from_buffer(userbuf, count, ppos, buf, work.ret);
+}
+EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_read);
+
+struct debugfs_write_work {
+       struct wiphy_work work;
+       ssize_t (*handler)(struct wiphy *wiphy,
+                          struct file *file,
+                          char *buf,
+                          size_t count,
+                          void *data);
+       struct wiphy *wiphy;
+       struct file *file;
+       char *buf;
+       size_t count;
+       void *data;
+       ssize_t ret;
+       struct completion completion;
+};
+
+static void wiphy_locked_debugfs_write_work(struct wiphy *wiphy,
+                                           struct wiphy_work *work)
+{
+       struct debugfs_write_work *w = container_of(work, typeof(*w), work);
+
+       w->ret = w->handler(w->wiphy, w->file, w->buf, w->count, w->data);
+       complete(&w->completion);
+}
+
+static void wiphy_locked_debugfs_write_cancel(struct dentry *dentry,
+                                             void *data)
+{
+       struct debugfs_write_work *w = data;
+
+       wiphy_work_cancel(w->wiphy, &w->work);
+       complete(&w->completion);
+}
+
+ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy,
+                                  struct file *file, char *buf, size_t bufsize,
+                                  const char __user *userbuf, size_t count,
+                                  ssize_t (*handler)(struct wiphy *wiphy,
+                                                     struct file *file,
+                                                     char *buf,
+                                                     size_t count,
+                                                     void *data),
+                                  void *data)
+{
+       struct debugfs_write_work work = {
+               .handler = handler,
+               .wiphy = wiphy,
+               .file = file,
+               .buf = buf,
+               .count = count,
+               .data = data,
+               .ret = -ENODEV,
+               .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion),
+       };
+       struct debugfs_cancellation cancellation = {
+               .cancel = wiphy_locked_debugfs_write_cancel,
+               .cancel_data = &work,
+       };
+
+       /* mostly used for strings so enforce NUL-termination for safety */
+       if (count >= bufsize)
+               return -EINVAL;
+
+       memset(buf, 0, bufsize);
+
+       if (copy_from_user(buf, userbuf, count))
+               return -EFAULT;
+
+       wiphy_work_init(&work.work, wiphy_locked_debugfs_write_work);
+       wiphy_work_queue(wiphy, &work.work);
+
+       debugfs_enter_cancellation(file, &cancellation);
+       wait_for_completion(&work.completion);
+       debugfs_leave_cancellation(file, &cancellation);
+
+       return work.ret;
+}
+EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_write);
index 569234bc2be6ae5e94fd248a9162941fd0bbdf17..1cbbb11ea5033fdeb0bfe1b8c1e796bfb748ec86 100644 (file)
@@ -3822,6 +3822,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
        struct net_device *dev = wdev->netdev;
        void *hdr;
 
+       lockdep_assert_wiphy(&rdev->wiphy);
+
        WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
                cmd != NL80211_CMD_DEL_INTERFACE &&
                cmd != NL80211_CMD_SET_INTERFACE);
@@ -3989,6 +3991,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 
                if_idx = 0;
 
+               wiphy_lock(&rdev->wiphy);
                list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
                        if (if_idx < if_start) {
                                if_idx++;
@@ -3998,10 +4001,12 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
                                               cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                               rdev, wdev,
                                               NL80211_CMD_NEW_INTERFACE) < 0) {
+                               wiphy_unlock(&rdev->wiphy);
                                goto out;
                        }
                        if_idx++;
                }
+               wiphy_unlock(&rdev->wiphy);
 
                wp_idx++;
        }
@@ -12787,10 +12792,6 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
        int i, n, low_index;
        int err;
 
-       /* RSSI reporting disabled? */
-       if (!cqm_config)
-               return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
-
        /*
         * Obtain current RSSI value if possible, if not and no RSSI threshold
         * event has been received yet, we should receive an event after a
@@ -12865,23 +12866,25 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
            wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
                return -EOPNOTSUPP;
 
-       if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) {
-               if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */
-                       return rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
-
-               return rdev_set_cqm_rssi_config(rdev, dev,
-                                               thresholds[0], hysteresis);
-       }
-
-       if (!wiphy_ext_feature_isset(&rdev->wiphy,
-                                    NL80211_EXT_FEATURE_CQM_RSSI_LIST))
-               return -EOPNOTSUPP;
-
        if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */
                n_thresholds = 0;
 
        old = wiphy_dereference(wdev->wiphy, wdev->cqm_config);
 
+       /* if already disabled just succeed */
+       if (!n_thresholds && !old)
+               return 0;
+
+       if (n_thresholds > 1) {
+               if (!wiphy_ext_feature_isset(&rdev->wiphy,
+                                            NL80211_EXT_FEATURE_CQM_RSSI_LIST) ||
+                   !rdev->ops->set_cqm_rssi_range_config)
+                       return -EOPNOTSUPP;
+       } else {
+               if (!rdev->ops->set_cqm_rssi_config)
+                       return -EOPNOTSUPP;
+       }
+
        if (n_thresholds) {
                cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds,
                                                 n_thresholds),
@@ -12894,13 +12897,26 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
                memcpy(cqm_config->rssi_thresholds, thresholds,
                       flex_array_size(cqm_config, rssi_thresholds,
                                       n_thresholds));
+               cqm_config->use_range_api = n_thresholds > 1 ||
+                                           !rdev->ops->set_cqm_rssi_config;
 
                rcu_assign_pointer(wdev->cqm_config, cqm_config);
+
+               if (cqm_config->use_range_api)
+                       err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config);
+               else
+                       err = rdev_set_cqm_rssi_config(rdev, dev,
+                                                      thresholds[0],
+                                                      hysteresis);
        } else {
                RCU_INIT_POINTER(wdev->cqm_config, NULL);
+               /* if enabled as range also disable via range */
+               if (old->use_range_api)
+                       err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
+               else
+                       err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
        }
 
-       err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config);
        if (err) {
                rcu_assign_pointer(wdev->cqm_config, old);
                kfree_rcu(cqm_config, rcu_head);
@@ -19009,10 +19025,11 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work)
        s32 rssi_level;
 
        cqm_config = wiphy_dereference(wdev->wiphy, wdev->cqm_config);
-       if (!wdev->cqm_config)
+       if (!cqm_config)
                return;
 
-       cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config);
+       if (cqm_config->use_range_api)
+               cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config);
 
        rssi_level = cqm_config->last_rssi_event_value;
        rssi_event = cqm_config->last_rssi_event_type;
index 22b36c8143cfd5a96ccb7e1934880a98f16f179c..9f8955367275e2439d910f978fc3b2b7a1669978 100644 (file)
@@ -211,4 +211,5 @@ static void __exit xsk_diag_exit(void)
 module_init(xsk_diag_init);
 module_exit(xsk_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("XDP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
index 68d0134bdbf9d1488a1e4881b6de77016d6e8e75..1a965fe68e011196d476a5d422bab347077112cd 100644 (file)
@@ -487,14 +487,14 @@ UIMAGE_OPTS-y ?=
 UIMAGE_TYPE ?= kernel
 UIMAGE_LOADADDR ?= arch_must_set_this
 UIMAGE_ENTRYADDR ?= $(UIMAGE_LOADADDR)
-UIMAGE_NAME ?= 'Linux-$(KERNELRELEASE)'
+UIMAGE_NAME ?= Linux-$(KERNELRELEASE)
 
 quiet_cmd_uimage = UIMAGE  $@
       cmd_uimage = $(BASH) $(MKIMAGE) -A $(UIMAGE_ARCH) -O linux \
                        -C $(UIMAGE_COMPRESSION) $(UIMAGE_OPTS-y) \
                        -T $(UIMAGE_TYPE) \
                        -a $(UIMAGE_LOADADDR) -e $(UIMAGE_ENTRYADDR) \
-                       -n $(UIMAGE_NAME) -d $< $@
+                       -n '$(UIMAGE_NAME)' -d $< $@
 
 # XZ
 # ---------------------------------------------------------------------------
index 84f5fb7f1cecc2e5b99902b5734e4d30a1afeebe..d83ba5d8f3f49f6c5e7349fefff382c0264fafb8 100755 (executable)
@@ -97,8 +97,7 @@ my (@stack, $re, $dre, $sub, $x, $xs, $funcre, $min_stack);
                #   11160:       a7 fb ff 60             aghi   %r15,-160
                # or
                #  100092:       e3 f0 ff c8 ff 71       lay     %r15,-56(%r15)
-               $re = qr/.*(?:lay|ag?hi).*\%r15,-(([0-9]{2}|[3-9])[0-9]{2})
-                     (?:\(\%r15\))?$/ox;
+               $re = qr/.*(?:lay|ag?hi).*\%r15,-([0-9]+)(?:\(\%r15\))?$/o;
        } elsif ($arch eq 'sparc' || $arch eq 'sparc64') {
                # f0019d10:       9d e3 bf 90     save  %sp, -112, %sp
                $re = qr/.*save.*%sp, -(([0-9]{2}|[3-9])[0-9]{2}), %sp/o;
index 39e86be60dd2d7a1fc8d8689f940e437a03dfb1b..ff0b192be91ff6efd3ac17d1c0850bfac92f31bc 100644 (file)
@@ -17,7 +17,7 @@
  *     if (argc <= 1)
  *             printf("%s: no command arguments :(\n", *argv);
  *     else
- *             printf("%s: %d command arguments!\n", *argv, args - 1);
+ *             printf("%s: %d command arguments!\n", *argv, argc - 1);
  * }
  *
  * after:
@@ -47,7 +47,7 @@
  *             // perturb_local_entropy()
  *     } else {
  *             local_entropy ^= 3896280633962944730;
- *             printf("%s: %d command arguments!\n", *argv, args - 1);
+ *             printf("%s: %d command arguments!\n", *argv, argc - 1);
  *     }
  *
  *     // latent_entropy_execute() 4.
index 366395cab490dec0cd80da7623727122438e78ab..746ff2d272f25667d31dbe2299ce00e2f9ea000a 100644 (file)
@@ -278,8 +278,6 @@ static bool is_flexible_array(const_tree field)
 {
        const_tree fieldtype;
        const_tree typesize;
-       const_tree elemtype;
-       const_tree elemsize;
 
        fieldtype = TREE_TYPE(field);
        typesize = TYPE_SIZE(fieldtype);
@@ -287,20 +285,12 @@ static bool is_flexible_array(const_tree field)
        if (TREE_CODE(fieldtype) != ARRAY_TYPE)
                return false;
 
-       elemtype = TREE_TYPE(fieldtype);
-       elemsize = TYPE_SIZE(elemtype);
-
        /* size of type is represented in bits */
 
        if (typesize == NULL_TREE && TYPE_DOMAIN(fieldtype) != NULL_TREE &&
            TYPE_MAX_VALUE(TYPE_DOMAIN(fieldtype)) == NULL_TREE)
                return true;
 
-       if (typesize != NULL_TREE &&
-           (TREE_CONSTANT(typesize) && (!tree_to_uhwi(typesize) ||
-            tree_to_uhwi(typesize) == tree_to_uhwi(elemsize))))
-               return true;
-
        return false;
 }
 
@@ -349,8 +339,7 @@ static int relayout_struct(tree type)
 
        /*
         * enforce that we don't randomize the layout of the last
-        * element of a struct if it's a 0 or 1-length array
-        * or a proper flexible array
+        * element of a struct if it's a proper flexible array
         */
        if (is_flexible_array(newtree[num_fields - 1])) {
                has_flexarray = true;
index 0572330bf8a78aed51877dfa7429aab23d55069d..a76925b46ce6309439ec0a554775dbbf2dd445cd 100644 (file)
@@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base)
 static void sym_validate_range(struct symbol *sym)
 {
        struct property *prop;
+       struct symbol *range_sym;
        int base;
        long long val, val2;
-       char str[64];
 
        switch (sym->type) {
        case S_INT:
@@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym)
        if (!prop)
                return;
        val = strtoll(sym->curr.val, NULL, base);
-       val2 = sym_get_range_val(prop->expr->left.sym, base);
+       range_sym = prop->expr->left.sym;
+       val2 = sym_get_range_val(range_sym, base);
        if (val >= val2) {
-               val2 = sym_get_range_val(prop->expr->right.sym, base);
+               range_sym = prop->expr->right.sym;
+               val2 = sym_get_range_val(range_sym, base);
                if (val <= val2)
                        return;
        }
-       if (sym->type == S_INT)
-               sprintf(str, "%lld", val2);
-       else
-               sprintf(str, "0x%llx", val2);
-       sym->curr.val = xstrdup(str);
+       sym->curr.val = range_sym->curr.val;
 }
 
 static void sym_set_changed(struct symbol *sym)
index 973b5e5ae2dddc955b01bab3fff585d58421dbea..cb6406f485a960041db048a5f5e8ae5bfad40bb5 100644 (file)
@@ -1383,13 +1383,15 @@ static void section_rela(struct module *mod, struct elf_info *elf,
        const Elf_Rela *rela;
 
        for (rela = start; rela < stop; rela++) {
+               Elf_Sym *tsym;
                Elf_Addr taddr, r_offset;
                unsigned int r_type, r_sym;
 
                r_offset = TO_NATIVE(rela->r_offset);
                get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym);
 
-               taddr = TO_NATIVE(rela->r_addend);
+               tsym = elf->symtab_start + r_sym;
+               taddr = tsym->st_value + TO_NATIVE(rela->r_addend);
 
                switch (elf->hdr->e_machine) {
                case EM_RISCV:
@@ -1404,7 +1406,7 @@ static void section_rela(struct module *mod, struct elf_info *elf,
                        break;
                }
 
-               check_section_mismatch(mod, elf, elf->symtab_start + r_sym,
+               check_section_mismatch(mod, elf, tsym,
                                       fsecndx, fromsec, r_offset, taddr);
        }
 }
index 757a4d193e0fb5919778847693a3353f2825265b..a9ef6d86de8397ece6aaebce969199294f39ce32 100644 (file)
@@ -21,6 +21,10 @@ static int cs35l56_hda_i2c_probe(struct i2c_client *clt)
                return -ENOMEM;
 
        cs35l56->base.dev = &clt->dev;
+
+#ifdef CS35L56_WAKE_HOLD_TIME_US
+       cs35l56->base.can_hibernate = true;
+#endif
        cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c);
        if (IS_ERR(cs35l56->base.regmap)) {
                ret = PTR_ERR(cs35l56->base.regmap);
index 03264915c618332a214fe1ee35678502625f8765..db90feb49c16e2ca0104be952efd32279837bd61 100644 (file)
@@ -2135,6 +2135,9 @@ static int azx_probe(struct pci_dev *pci,
        if (chip->driver_caps & AZX_DCAPS_I915_COMPONENT) {
                err = snd_hdac_i915_init(azx_bus(chip));
                if (err < 0) {
+                       if (err == -EPROBE_DEFER)
+                               goto out_free;
+
                        /* if the controller is bound only with HDMI/DP
                         * (for HSW and BDW), we need to abort the probe;
                         * for other chips, still continue probing as other
index 669ae3d6e447e6058dfc2b35d188c8eb590f0d32..5618b1d9bfd130d1d99fa2d9feea6981267316a5 100644 (file)
@@ -9832,6 +9832,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED),
+       SND_PCI_QUIRK(0x103c, 0x890e, "HP 255 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
        SND_PCI_QUIRK(0x103c, 0x8919, "HP Pavilion Aero Laptop 13-be0xxx", ALC287_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x896d, "HP ZBook Firefly 16 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9867,6 +9868,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
        SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9900,12 +9902,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
        SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK),
        SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK),
        SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
@@ -9944,13 +9950,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
        SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
+       SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
        SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+       SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
        SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
        SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
@@ -10821,22 +10831,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x12, 0x90a60130},
                {0x17, 0x90170110},
                {0x21, 0x03211020}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
-               {0x14, 0x90170110},
-               {0x21, 0x04211020}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
-               {0x14, 0x90170110},
-               {0x21, 0x04211030}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-               ALC295_STANDARD_PINS,
-               {0x17, 0x21014020},
-               {0x18, 0x21a19030}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-               ALC295_STANDARD_PINS,
-               {0x17, 0x21014040},
-               {0x18, 0x21a19050}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-               ALC295_STANDARD_PINS),
        SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC298_STANDARD_PINS,
                {0x17, 0x90170110}),
@@ -10880,6 +10874,9 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
        SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
                {0x19, 0x40000000},
                {0x1b, 0x40000000}),
+       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+               {0x19, 0x40000000},
+               {0x1b, 0x40000000}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x19, 0x40000000},
                {0x1a, 0x40000000}),
index 5f6f84837a490375c8175626e3fec40057bb56d9..7c7493cb571f97bf98b0b4841aeb756d43990718 100644 (file)
 #define ARM_CPU_PART_CORTEX_A78AE      0xD42
 #define ARM_CPU_PART_CORTEX_X1         0xD44
 #define ARM_CPU_PART_CORTEX_A510       0xD46
+#define ARM_CPU_PART_CORTEX_A520       0xD80
 #define ARM_CPU_PART_CORTEX_A710       0xD47
 #define ARM_CPU_PART_CORTEX_A715       0xD4D
 #define ARM_CPU_PART_CORTEX_X2         0xD48
 #define ARM_CPU_PART_NEOVERSE_N2       0xD49
 #define ARM_CPU_PART_CORTEX_A78C       0xD4B
 
-#define APM_CPU_PART_POTENZA           0x000
+#define APM_CPU_PART_XGENE             0x000
+#define APM_CPU_VAR_POTENZA            0x00
 
 #define CAVIUM_CPU_PART_THUNDERX       0x0A1
 #define CAVIUM_CPU_PART_THUNDERX_81XX  0x0A2
 #define MIDR_CORTEX_A78AE      MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
 #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
 #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
 #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
 #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
 #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
index f7ddd73a8c0fa2dabffd2782f674f3b484079875..89d2fc872d9f5e63dce2e2a74dfb422c9e255030 100644 (file)
@@ -505,6 +505,38 @@ struct kvm_smccc_filter {
 #define KVM_HYPERCALL_EXIT_SMC         (1U << 0)
 #define KVM_HYPERCALL_EXIT_16BIT       (1U << 1)
 
+/*
+ * Get feature ID registers userspace writable mask.
+ *
+ * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model
+ * Feature Register 2"):
+ *
+ * "The Feature ID space is defined as the System register space in
+ * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7},
+ * op2=={0-7}."
+ *
+ * This covers all currently known R/O registers that indicate
+ * anything useful feature wise, including the ID registers.
+ *
+ * If we ever need to introduce a new range, it will be described as
+ * such in the range field.
+ */
+#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2)          \
+       ({                                                              \
+               __u64 __op1 = (op1) & 3;                                \
+               __op1 -= (__op1 == 3);                                  \
+               (__op1 << 6 | ((crm) & 7) << 3 | (op2));                \
+       })
+
+#define KVM_ARM_FEATURE_ID_RANGE       0
+#define KVM_ARM_FEATURE_ID_RANGE_SIZE  (3 * 8 * 8)
+
+struct reg_mask_range {
+       __u64 addr;             /* Pointer to mask array */
+       __u32 range;            /* Requested range */
+       __u32 reserved[13];
+};
+
 #endif
 
 #endif /* __ARM_KVM_H__ */
index fd157f46727e9af774d9dfd5a6cccc6d3d88e32c..86e556429e0eb61bac0c873c68c0ebf28e862c61 100644 (file)
@@ -36,11 +36,13 @@ enum perf_event_arm_regs {
        PERF_REG_ARM64_LR,
        PERF_REG_ARM64_SP,
        PERF_REG_ARM64_PC,
+       PERF_REG_ARM64_MAX,
 
        /* Extended/pseudo registers */
-       PERF_REG_ARM64_VG = 46, // SVE Vector Granule
-
-       PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1,
-       PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1
+       PERF_REG_ARM64_VG = 46,                         /* SVE Vector Granule */
+       PERF_REG_ARM64_EXTENDED_MAX
 };
+
+#define PERF_REG_EXTENDED_MASK (1ULL << PERF_REG_ARM64_VG)
+
 #endif /* _ASM_ARM64_PERF_REGS_H */
index 7f64b8bb510702f668c4b60aa7cd19d5b7eb5a1c..7b42feedf647190ad498de0937e8fb557e40f39c 100644 (file)
@@ -22,7 +22,7 @@ endif
 arm64_tools_dir = $(top_srcdir)/arch/arm64/tools
 arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg
 arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk
-arm64_generated_dir = $(top_srcdir)/tools/arch/arm64/include/generated
+arm64_generated_dir = $(OUTPUT)arch/arm64/include/generated
 arm64_sysreg_defs = $(arm64_generated_dir)/asm/sysreg-defs.h
 
 all: $(arm64_sysreg_defs)
index 87245c584784ec1f0f877fbe0be54ee136df5456..8d94739d75c67c80fbea76fe3b5c08f1f8b1ea83 100644 (file)
@@ -75,7 +75,6 @@
 
 /* We now return you to your regularly scheduled HPUX. */
 
-#define ENOSYM         215     /* symbol does not exist in executable */
 #define        ENOTSOCK        216     /* Socket operation on non-socket */
 #define        EDESTADDRREQ    217     /* Destination address required */
 #define        EMSGSIZE        218     /* Message too long */
 #define        ETIMEDOUT       238     /* Connection timed out */
 #define        ECONNREFUSED    239     /* Connection refused */
 #define        EREFUSED        ECONNREFUSED    /* for HP's NFS apparently */
-#define        EREMOTERELEASE  240     /* Remote peer released connection */
 #define        EHOSTDOWN       241     /* Host is down */
 #define        EHOSTUNREACH    242     /* No route to host */
 
index a73cf01a1606671bf77a995c665f90ca7428c9ab..abe926d43cbe0a06342f8c53f202cdd707ea1693 100644 (file)
@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
        __u8 reserved[1728];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST        6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST  7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS        64
+struct kvm_s390_vm_cpu_uv_feat {
+       union {
+               struct {
+                       __u64 : 4;
+                       __u64 ap : 1;           /* bit 4 */
+                       __u64 ap_intr : 1;      /* bit 5 */
+                       __u64 : 58;
+               };
+               __u64 feat;
+       };
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW       0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW       1
index 798e60b5454b7e108a07550cb354a8ee27c0de8f..4af140cf5719e6d75f5206d614002210bed782ca 100644 (file)
 #define X86_FEATURE_CAT_L3             ( 7*32+ 4) /* Cache Allocation Technology L3 */
 #define X86_FEATURE_CAT_L2             ( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3             ( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_INVPCID_SINGLE     ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_XCOMPACTED         ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
 #define X86_FEATURE_MSR_TSX_CTRL       (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
 #define X86_FEATURE_SMBA               (11*32+21) /* "" Slow Memory Bandwidth Allocation */
 #define X86_FEATURE_BMEC               (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK         (11*32+23) /* Shadow stack support for user mode applications */
+
+#define X86_FEATURE_SRSO               (11*32+24) /* "" AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS         (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT     (11*32+26) /* "" Issue an IBPB only on VMEXIT */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_OSPKE              (16*32+ 4) /* OS Protection Keys Enable */
 #define X86_FEATURE_WAITPKG            (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
 #define X86_FEATURE_AVX512_VBMI2       (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK              (16*32+ 7) /* "" Shadow stack */
 #define X86_FEATURE_GFNI               (16*32+ 8) /* Galois Field New Instructions */
 #define X86_FEATURE_VAES               (16*32+ 9) /* Vector AES */
 #define X86_FEATURE_VPCLMULQDQ         (16*32+10) /* Carry-Less Multiplication Double Quadword */
 
 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
 #define X86_FEATURE_NO_NESTED_DATA_BP  (20*32+ 0) /* "" No Nested Data Breakpoints */
+#define X86_FEATURE_WRMSR_XX_BASE_NS   (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
 #define X86_FEATURE_LFENCE_RDTSC       (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
 #define X86_FEATURE_NULL_SEL_CLR_BASE  (20*32+ 6) /* "" Null Selector Clears Base */
 #define X86_FEATURE_AUTOIBRS           (20*32+ 8) /* "" Automatic IBRS */
 #define X86_FEATURE_NO_SMM_CTL_MSR     (20*32+ 9) /* "" SMM_CTL MSR is not present */
 
+#define X86_FEATURE_SBPB               (20*32+27) /* "" Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE                (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO            (20*32+29) /* "" CPU is not affected by SRSO */
+
 /*
  * BUG word(s)
  */
 #define X86_BUG_RETBLEED               X86_BUG(27) /* CPU is affected by RETBleed */
 #define X86_BUG_EIBRS_PBRSB            X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
 #define X86_BUG_SMT_RSB                        X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS                    X86_BUG(30) /* CPU is affected by Gather Data Sampling */
 
+/* BUG word 2 */
+#define X86_BUG_SRSO                   X86_BUG(1*32 + 0) /* AMD SRSO bug */
+#define X86_BUG_DIV0                   X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
 #endif /* _ASM_X86_CPUFEATURES_H */
index fafe9be7a6f4ff6b7adc0ae3ea34d30b3d9ba79d..702d93fdd10e8d44015cc687cb90106ae5bd422c 100644 (file)
 # define DISABLE_TDX_GUEST     (1 << (X86_FEATURE_TDX_GUEST & 31))
 #endif
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+#define DISABLE_USER_SHSTK     0
+#else
+#define DISABLE_USER_SHSTK     (1 << (X86_FEATURE_USER_SHSTK & 31))
+#endif
+
+#ifdef CONFIG_X86_KERNEL_IBT
+#define DISABLE_IBT    0
+#else
+#define DISABLE_IBT    (1 << (X86_FEATURE_IBT & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
 #define DISABLED_MASK9 (DISABLE_SGX)
 #define DISABLED_MASK10        0
 #define DISABLED_MASK11        (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
-                        DISABLE_CALL_DEPTH_TRACKING)
+                        DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
 #define DISABLED_MASK12        (DISABLE_LAM)
 #define DISABLED_MASK13        0
 #define DISABLED_MASK14        0
 #define DISABLED_MASK16        (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
                         DISABLE_ENQCMD)
 #define DISABLED_MASK17        0
-#define DISABLED_MASK18        0
+#define DISABLED_MASK18        (DISABLE_IBT)
 #define DISABLED_MASK19        0
 #define DISABLED_MASK20        0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
index 1d111350197f3169a8eec402d77980dd617c6b95..1d51e1850ed03d46e84c71de0c451067d0baac5b 100644 (file)
 #define MSR_INTEGRITY_CAPS_ARRAY_BIST          BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT)
 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT   4
 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST       BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK        GENMASK_ULL(10, 9)
 
 #define MSR_LBR_NHM_FROM               0x00000680
 #define MSR_LBR_NHM_TO                 0x000006c0
 #define MSR_AMD64_CPUID_FN_1           0xc0011004
 #define MSR_AMD64_LS_CFG               0xc0011020
 #define MSR_AMD64_DC_CFG               0xc0011022
+#define MSR_AMD64_TW_CFG               0xc0011023
 
 #define MSR_AMD64_DE_CFG               0xc0011029
 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT   1
 /* AMD Last Branch Record MSRs */
 #define MSR_AMD64_LBR_SELECT                   0xc000010e
 
+/* Zen4 */
+#define MSR_ZEN4_BP_CFG                 0xc001102e
+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+
+/* Fam 19h MSRs */
+#define MSR_F19H_UMC_PERF_CTL           0xc0010800
+#define MSR_F19H_UMC_PERF_CTR           0xc0010801
+
+/* Zen 2 */
+#define MSR_ZEN2_SPECTRAL_CHICKEN       0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT   BIT_ULL(1)
+
 /* Fam 17h MSRs */
 #define MSR_F17H_IRPERF                        0xc00000e9
 
-#define MSR_ZEN2_SPECTRAL_CHICKEN      0xc00110e3
-#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT  BIT_ULL(1)
-
 /* Fam 16h MSRs */
 #define MSR_F16H_L2I_PERF_CTL          0xc0010230
 #define MSR_F16H_L2I_PERF_CTR          0xc0010231
 #define MSR_IA32_VMX_MISC_INTEL_PT                 (1ULL << 14)
 #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
 #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
-/* AMD-V MSRs */
 
+/* AMD-V MSRs */
 #define MSR_VM_CR                       0xc0010114
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
+#define SVM_VM_CR_VALID_MASK           0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK                0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK         0x0010ULL
+
 /* Hardware Feedback Interface */
 #define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
 #define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
index e8d7ebbca1a4dc4120f5ae8c924a9d1c1f35db22..384e2cc6ac190d1d8683f1e657ef0b2be57c6a9f 100644 (file)
 #define ARCH_MAP_VDSO_32               0x2002
 #define ARCH_MAP_VDSO_64               0x2003
 
+/* Don't use 0x3001-0x3004 because of old glibcs */
+
 #define ARCH_GET_UNTAG_MASK            0x4001
 #define ARCH_ENABLE_TAGGED_ADDR                0x4002
 #define ARCH_GET_MAX_TAG_BITS          0x4003
 #define ARCH_FORCE_TAGGED_SVA          0x4004
 
+#define ARCH_SHSTK_ENABLE              0x5001
+#define ARCH_SHSTK_DISABLE             0x5002
+#define ARCH_SHSTK_LOCK                        0x5003
+#define ARCH_SHSTK_UNLOCK              0x5004
+#define ARCH_SHSTK_STATUS              0x5005
+
+/* ARCH_SHSTK_ features bits */
+#define ARCH_SHSTK_SHSTK               (1ULL <<  0)
+#define ARCH_SHSTK_WRSS                        (1ULL <<  1)
+
 #endif /* _ASM_X86_PRCTL_H */
index 264eeb9c46a9f5ea9264829b41eda164e72d0d2e..318e2dad27e048c08fea615cef8654aa1fcb7d81 100644 (file)
@@ -1421,7 +1421,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
        if (error)
                goto setval_error;
 
-       if (new_val->addr_family == ADDR_FAMILY_IPV6) {
+       if (new_val->addr_family & ADDR_FAMILY_IPV6) {
                error = fprintf(nmfile, "\n[ipv6]\n");
                if (error < 0)
                        goto setval_error;
@@ -1455,14 +1455,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
        if (error < 0)
                goto setval_error;
 
-       error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
-       if (error < 0)
-               goto setval_error;
-
-       error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
-       if (error < 0)
-               goto setval_error;
+       /* we do not want ipv4 addresses in ipv6 section and vice versa */
+       if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
+               error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+               if (error < 0)
+                       goto setval_error;
+       }
 
+       if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
+               error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+               if (error < 0)
+                       goto setval_error;
+       }
        fclose(nmfile);
        fclose(ifcfg_file);
 
index ae5a7a8249a208cf5782e8627ba4354959d38e2b..440a91b35823bfaa3cc6d8b38baa86362dbddf61 100755 (executable)
@@ -53,7 +53,7 @@
 #                       or "manual" if no boot-time protocol should be used)
 #
 # address1=ipaddr1/plen
-# address=ipaddr2/plen
+# address2=ipaddr2/plen
 #
 # gateway=gateway1;gateway2
 #
@@ -61,7 +61,7 @@
 #
 # [ipv6]
 # address1=ipaddr1/plen
-# address2=ipaddr1/plen
+# address2=ipaddr2/plen
 #
 # gateway=gateway1;gateway2
 #
index 156743d399aed2237175cc87ba82ce9b00acf3cc..2fd551915c2025ee7d7adc53f30e44e7b6bf01c1 100644 (file)
@@ -8,6 +8,7 @@
  */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
 
 #define __get_unaligned_t(type, ptr) ({                                                \
        const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);      \
index 76d94644539127e5ba6228b2250ebd96aff92cdf..756b013fb8324bd7a320e60cebec2ca692faa149 100644 (file)
@@ -816,15 +816,21 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
 #define __NR_set_mempolicy_home_node 450
 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
-
 #define __NR_cachestat 451
 __SYSCALL(__NR_cachestat, sys_cachestat)
-
 #define __NR_fchmodat2 452
 __SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+#define __NR_map_shadow_stack 453
+__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack)
+#define __NR_futex_wake 454
+__SYSCALL(__NR_futex_wake, sys_futex_wake)
+#define __NR_futex_wait 455
+__SYSCALL(__NR_futex_wait, sys_futex_wait)
+#define __NR_futex_requeue 456
+__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
 
 #undef __NR_syscalls
-#define __NR_syscalls 453
+#define __NR_syscalls 457
 
 /*
  * 32 bit systems traditionally used different
index 794c1d857677d9b9b2e20222f671f15fbb86c4aa..de723566c5ae82382192923e17478209f7c94f41 100644 (file)
@@ -1134,6 +1134,26 @@ extern "C" {
 #define DRM_IOCTL_MODE_PAGE_FLIP       DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
 #define DRM_IOCTL_MODE_DIRTYFB         DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
 
+/**
+ * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object.
+ *
+ * KMS dumb buffers provide a very primitive way to allocate a buffer object
+ * suitable for scanout and map it for software rendering. KMS dumb buffers are
+ * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb
+ * buffers are not suitable to be displayed on any other device than the KMS
+ * device where they were allocated from. Also see
+ * :ref:`kms_dumb_buffer_objects`.
+ *
+ * The IOCTL argument is a struct drm_mode_create_dumb.
+ *
+ * User-space is expected to create a KMS dumb buffer via this IOCTL, then add
+ * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via
+ * &DRM_IOCTL_MODE_MAP_DUMB.
+ *
+ * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported.
+ * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate
+ * driver preferences for dumb buffers.
+ */
 #define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb)
 #define DRM_IOCTL_MODE_MAP_DUMB    DRM_IOWR(0xB3, struct drm_mode_map_dumb)
 #define DRM_IOCTL_MODE_DESTROY_DUMB    DRM_IOWR(0xB4, struct drm_mode_destroy_dumb)
index 7000e5910a1d7453d99b92152689fa62a2422776..218edb0a96f8c043df13a5bf25f85ec754ee449a 100644 (file)
@@ -38,13 +38,13 @@ extern "C" {
  */
 
 /**
- * DOC: uevents generated by i915 on it's device node
+ * DOC: uevents generated by i915 on its device node
  *
  * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch
- *     event from the gpu l3 cache. Additional information supplied is ROW,
+ *     event from the GPU L3 cache. Additional information supplied is ROW,
  *     BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep
- *     track of these events and if a specific cache-line seems to have a
- *     persistent error remap it with the l3 remapping tool supplied in
+ *     track of these events, and if a specific cache-line seems to have a
+ *     persistent error, remap it with the L3 remapping tool supplied in
  *     intel-gpu-tools.  The value supplied with the event is always 1.
  *
  * I915_ERROR_UEVENT - Generated upon error detection, currently only via
index fd1fb0d5389d3abd075f3c84aa44e790dfe6d637..7a8f4c2901873f252895649d19bb5b26b8cf932a 100644 (file)
@@ -71,7 +71,8 @@ struct fscrypt_policy_v2 {
        __u8 contents_encryption_mode;
        __u8 filenames_encryption_mode;
        __u8 flags;
-       __u8 __reserved[4];
+       __u8 log2_data_unit_size;
+       __u8 __reserved[3];
        __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
 };
 
index f089ab290978450eced8a4fb705fc2d299a6d5e7..211b86de35ac53f6457bbd2fae8c973ce6b3a968 100644 (file)
@@ -264,6 +264,7 @@ struct kvm_xen_exit {
 #define KVM_EXIT_RISCV_SBI        35
 #define KVM_EXIT_RISCV_CSR        36
 #define KVM_EXIT_NOTIFY           37
+#define KVM_EXIT_LOONGARCH_IOCSR  38
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -336,6 +337,13 @@ struct kvm_run {
                        __u32 len;
                        __u8  is_write;
                } mmio;
+               /* KVM_EXIT_LOONGARCH_IOCSR */
+               struct {
+                       __u64 phys_addr;
+                       __u8  data[8];
+                       __u32 len;
+                       __u8  is_write;
+               } iocsr_io;
                /* KVM_EXIT_HYPERCALL */
                struct {
                        __u64 nr;
@@ -1192,6 +1200,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_COUNTER_OFFSET 227
 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
+#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1362,6 +1371,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_ARM64          0x6000000000000000ULL
 #define KVM_REG_MIPS           0x7000000000000000ULL
 #define KVM_REG_RISCV          0x8000000000000000ULL
+#define KVM_REG_LOONGARCH      0x9000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT     52
 #define KVM_REG_SIZE_MASK      0x00f0000000000000ULL
@@ -1418,9 +1428,16 @@ struct kvm_device_attr {
        __u64   addr;           /* userspace address of attr data */
 };
 
-#define  KVM_DEV_VFIO_GROUP                    1
-#define   KVM_DEV_VFIO_GROUP_ADD                       1
-#define   KVM_DEV_VFIO_GROUP_DEL                       2
+#define  KVM_DEV_VFIO_FILE                     1
+
+#define   KVM_DEV_VFIO_FILE_ADD                        1
+#define   KVM_DEV_VFIO_FILE_DEL                        2
+
+/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */
+#define  KVM_DEV_VFIO_GROUP    KVM_DEV_VFIO_FILE
+
+#define   KVM_DEV_VFIO_GROUP_ADD       KVM_DEV_VFIO_FILE_ADD
+#define   KVM_DEV_VFIO_GROUP_DEL       KVM_DEV_VFIO_FILE_DEL
 #define   KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE             3
 
 enum kvm_device_type {
@@ -1555,6 +1572,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_MTE_COPY_TAGS    _IOR(KVMIO,  0xb4, struct kvm_arm_copy_mte_tags)
 /* Available with KVM_CAP_COUNTER_OFFSET */
 #define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO,  0xb5, struct kvm_arm_counter_offset)
+#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO,  0xb6, struct reg_mask_range)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE        _IOWR(KVMIO,  0xe0, struct kvm_create_device)
index 8eb0d7b758d2835addc1c4a7eeb0d9da74c0d448..bb242fdcfe6b29bf96e287023701dd8629042969 100644 (file)
@@ -100,8 +100,9 @@ enum fsconfig_command {
        FSCONFIG_SET_PATH       = 3,    /* Set parameter, supplying an object by path */
        FSCONFIG_SET_PATH_EMPTY = 4,    /* Set parameter, supplying an object by (empty) path */
        FSCONFIG_SET_FD         = 5,    /* Set parameter, supplying an object by fd */
-       FSCONFIG_CMD_CREATE     = 6,    /* Invoke superblock creation */
+       FSCONFIG_CMD_CREATE     = 6,    /* Create new or reuse existing superblock */
        FSCONFIG_CMD_RECONFIGURE = 7,   /* Invoke superblock reconfiguration */
+       FSCONFIG_CMD_CREATE_EXCL = 8,   /* Create new superblock, fail if reusing existing superblock */
 };
 
 /*
index f5c48b61ab62244104bbf1b2100d3db7286f8c82..649560c685f13b73feaafb96b64c351b6eec2c25 100644 (file)
  */
 #define VHOST_VDPA_RESUME              _IO(VHOST_VIRTIO, 0x7E)
 
+/* Get the group for the descriptor table including driver & device areas
+ * of a virtqueue: read index, write group in num.
+ * The virtqueue index is stored in the index field of vhost_vring_state.
+ * The group ID of the descriptor table for this specific virtqueue
+ * is returned via num field of vhost_vring_state.
+ */
+#define VHOST_VDPA_GET_VRING_DESC_GROUP        _IOWR(VHOST_VIRTIO, 0x7F,       \
+                                             struct vhost_vring_state)
 #endif
index 64d139400db1034c83144cc93c0633990a90e82c..3110f84dd02944641276f76c3b87f79f3165022e 100644 (file)
@@ -18,4 +18,4 @@ CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
 CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
 CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
 CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
-CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_H,nfsd.h)
+CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
index bc5065bd99b2f05f4835da18d8a2a1a6fd28ce26..8e757e249dab84b3e62bb9ddf8773d79fa48359e 100644 (file)
@@ -15,7 +15,7 @@
 /* Enums */
 static const char * const devlink_op_strmap[] = {
        [3] = "get",
-       [7] = "port-get",
+       // skip "port-get", duplicate reply value
        [DEVLINK_CMD_PORT_NEW] = "port-new",
        [13] = "sb-get",
        [17] = "sb-pool-get",
@@ -2399,6 +2399,7 @@ void devlink_port_set_req_free(struct devlink_port_set_req *req)
 
 int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -2416,7 +2417,7 @@ int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req)
        if (req->_present.port_function)
                devlink_dl_port_function_put(nlh, DEVLINK_ATTR_PORT_FUNCTION, &req->port_function);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -2537,6 +2538,7 @@ void devlink_port_del_req_free(struct devlink_port_del_req *req)
 
 int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -2550,7 +2552,7 @@ int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req)
        if (req->_present.port_index)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -2568,6 +2570,7 @@ void devlink_port_split_req_free(struct devlink_port_split_req *req)
 
 int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -2583,7 +2586,7 @@ int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req)
        if (req->_present.port_split_count)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_SPLIT_COUNT, req->port_split_count);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -2602,6 +2605,7 @@ void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req)
 int devlink_port_unsplit(struct ynl_sock *ys,
                         struct devlink_port_unsplit_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -2615,7 +2619,7 @@ int devlink_port_unsplit(struct ynl_sock *ys,
        if (req->_present.port_index)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -2926,6 +2930,7 @@ void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req)
 int devlink_sb_pool_set(struct ynl_sock *ys,
                        struct devlink_sb_pool_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -2945,7 +2950,7 @@ int devlink_sb_pool_set(struct ynl_sock *ys,
        if (req->_present.sb_pool_size)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_POOL_SIZE, req->sb_pool_size);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3126,6 +3131,7 @@ devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req)
 int devlink_sb_port_pool_set(struct ynl_sock *ys,
                             struct devlink_sb_port_pool_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3145,7 +3151,7 @@ int devlink_sb_port_pool_set(struct ynl_sock *ys,
        if (req->_present.sb_threshold)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3334,6 +3340,7 @@ devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req
 int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys,
                                struct devlink_sb_tc_pool_bind_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3357,7 +3364,7 @@ int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys,
        if (req->_present.sb_threshold)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3376,6 +3383,7 @@ void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req)
 int devlink_sb_occ_snapshot(struct ynl_sock *ys,
                            struct devlink_sb_occ_snapshot_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3389,7 +3397,7 @@ int devlink_sb_occ_snapshot(struct ynl_sock *ys,
        if (req->_present.sb_index)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3409,6 +3417,7 @@ devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req)
 int devlink_sb_occ_max_clear(struct ynl_sock *ys,
                             struct devlink_sb_occ_max_clear_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3422,7 +3431,7 @@ int devlink_sb_occ_max_clear(struct ynl_sock *ys,
        if (req->_present.sb_index)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3544,6 +3553,7 @@ void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req)
 int devlink_eswitch_set(struct ynl_sock *ys,
                        struct devlink_eswitch_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3561,7 +3571,7 @@ int devlink_eswitch_set(struct ynl_sock *ys,
        if (req->_present.eswitch_encap_mode)
                mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, req->eswitch_encap_mode);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3895,6 +3905,7 @@ devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_se
 int devlink_dpipe_table_counters_set(struct ynl_sock *ys,
                                     struct devlink_dpipe_table_counters_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3910,7 +3921,7 @@ int devlink_dpipe_table_counters_set(struct ynl_sock *ys,
        if (req->_present.dpipe_table_counters_enabled)
                mnl_attr_put_u8(nlh, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, req->dpipe_table_counters_enabled);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3929,6 +3940,7 @@ void devlink_resource_set_req_free(struct devlink_resource_set_req *req)
 int devlink_resource_set(struct ynl_sock *ys,
                         struct devlink_resource_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3944,7 +3956,7 @@ int devlink_resource_set(struct ynl_sock *ys,
        if (req->_present.resource_size)
                mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, req->resource_size);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -4319,6 +4331,7 @@ void devlink_param_set_req_free(struct devlink_param_set_req *req)
 
 int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -4336,7 +4349,7 @@ int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req)
        if (req->_present.param_value_cmode)
                mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, req->param_value_cmode);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -4631,6 +4644,7 @@ void devlink_region_del_req_free(struct devlink_region_del_req *req)
 
 int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -4648,7 +4662,7 @@ int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req)
        if (req->_present.region_snapshot_id)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -4922,6 +4936,7 @@ void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req)
 int devlink_port_param_set(struct ynl_sock *ys,
                           struct devlink_port_param_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -4935,7 +4950,7 @@ int devlink_port_param_set(struct ynl_sock *ys,
        if (req->_present.port_index)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5360,6 +5375,7 @@ devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req
 int devlink_health_reporter_set(struct ynl_sock *ys,
                                struct devlink_health_reporter_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5381,7 +5397,7 @@ int devlink_health_reporter_set(struct ynl_sock *ys,
        if (req->_present.health_reporter_auto_dump)
                mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, req->health_reporter_auto_dump);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5402,6 +5418,7 @@ devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_
 int devlink_health_reporter_recover(struct ynl_sock *ys,
                                    struct devlink_health_reporter_recover_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5417,7 +5434,7 @@ int devlink_health_reporter_recover(struct ynl_sock *ys,
        if (req->_present.health_reporter_name_len)
                mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5438,6 +5455,7 @@ devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnos
 int devlink_health_reporter_diagnose(struct ynl_sock *ys,
                                     struct devlink_health_reporter_diagnose_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5453,7 +5471,7 @@ int devlink_health_reporter_diagnose(struct ynl_sock *ys,
        if (req->_present.health_reporter_name_len)
                mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5556,6 +5574,7 @@ devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_
 int devlink_health_reporter_dump_clear(struct ynl_sock *ys,
                                       struct devlink_health_reporter_dump_clear_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5571,7 +5590,7 @@ int devlink_health_reporter_dump_clear(struct ynl_sock *ys,
        if (req->_present.health_reporter_name_len)
                mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5592,6 +5611,7 @@ void devlink_flash_update_req_free(struct devlink_flash_update_req *req)
 int devlink_flash_update(struct ynl_sock *ys,
                         struct devlink_flash_update_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5609,7 +5629,7 @@ int devlink_flash_update(struct ynl_sock *ys,
        if (req->_present.flash_update_overwrite_mask)
                mnl_attr_put(nlh, DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, sizeof(struct nla_bitfield32), &req->flash_update_overwrite_mask);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5780,6 +5800,7 @@ void devlink_trap_set_req_free(struct devlink_trap_set_req *req)
 
 int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5795,7 +5816,7 @@ int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req)
        if (req->_present.trap_action)
                mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5968,6 +5989,7 @@ void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req)
 int devlink_trap_group_set(struct ynl_sock *ys,
                           struct devlink_trap_group_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5985,7 +6007,7 @@ int devlink_trap_group_set(struct ynl_sock *ys,
        if (req->_present.trap_policer_id)
                mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -6152,6 +6174,7 @@ devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req)
 int devlink_trap_policer_set(struct ynl_sock *ys,
                             struct devlink_trap_policer_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -6169,7 +6192,7 @@ int devlink_trap_policer_set(struct ynl_sock *ys,
        if (req->_present.trap_policer_burst)
                mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, req->trap_policer_burst);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -6190,6 +6213,7 @@ devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *r
 int devlink_health_reporter_test(struct ynl_sock *ys,
                                 struct devlink_health_reporter_test_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -6205,7 +6229,7 @@ int devlink_health_reporter_test(struct ynl_sock *ys,
        if (req->_present.health_reporter_name_len)
                mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -6384,6 +6408,7 @@ void devlink_rate_set_req_free(struct devlink_rate_set_req *req)
 
 int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -6407,7 +6432,7 @@ int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req)
        if (req->_present.rate_parent_node_name_len)
                mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -6427,6 +6452,7 @@ void devlink_rate_new_req_free(struct devlink_rate_new_req *req)
 
 int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -6450,7 +6476,7 @@ int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req)
        if (req->_present.rate_parent_node_name_len)
                mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -6469,6 +6495,7 @@ void devlink_rate_del_req_free(struct devlink_rate_del_req *req)
 
 int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -6482,7 +6509,7 @@ int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req)
        if (req->_present.rate_node_name_len)
                mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -6645,6 +6672,7 @@ void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req)
 int devlink_linecard_set(struct ynl_sock *ys,
                         struct devlink_linecard_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -6660,7 +6688,7 @@ int devlink_linecard_set(struct ynl_sock *ys,
        if (req->_present.linecard_type_len)
                mnl_attr_put_strz(nlh, DEVLINK_ATTR_LINECARD_TYPE, req->linecard_type);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -6810,6 +6838,7 @@ void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req)
 int devlink_selftests_run(struct ynl_sock *ys,
                          struct devlink_selftests_run_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -6823,7 +6852,7 @@ int devlink_selftests_run(struct ynl_sock *ys,
        if (req->_present.selftests)
                devlink_dl_selftest_id_put(nlh, DEVLINK_ATTR_SELFTESTS, &req->selftests);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
index 74b883a14958f4212616cf76f0237b543d5ce7cd..660435639e2bf8f522bb3f6dece55e93a144e002 100644 (file)
@@ -1843,6 +1843,7 @@ void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req)
 int ethtool_linkinfo_set(struct ynl_sock *ys,
                         struct ethtool_linkinfo_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -1862,7 +1863,7 @@ int ethtool_linkinfo_set(struct ynl_sock *ys,
        if (req->_present.transceiver)
                mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TRANSCEIVER, req->transceiver);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -2067,6 +2068,7 @@ void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req)
 int ethtool_linkmodes_set(struct ynl_sock *ys,
                          struct ethtool_linkmodes_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -2094,7 +2096,7 @@ int ethtool_linkmodes_set(struct ynl_sock *ys,
        if (req->_present.rate_matching)
                mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_RATE_MATCHING, req->rate_matching);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -2398,6 +2400,7 @@ void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req)
 
 int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -2409,7 +2412,7 @@ int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req)
        if (req->_present.msgmask)
                ethtool_bitset_put(nlh, ETHTOOL_A_DEBUG_MSGMASK, &req->msgmask);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -2577,6 +2580,7 @@ void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req)
 
 int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -2590,7 +2594,7 @@ int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req)
        if (req->_present.sopass_len)
                mnl_attr_put(nlh, ETHTOOL_A_WOL_SOPASS, req->_present.sopass_len, req->sopass);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3045,6 +3049,7 @@ void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req)
 int ethtool_privflags_set(struct ynl_sock *ys,
                          struct ethtool_privflags_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3056,7 +3061,7 @@ int ethtool_privflags_set(struct ynl_sock *ys,
        if (req->_present.flags)
                ethtool_bitset_put(nlh, ETHTOOL_A_PRIVFLAGS_FLAGS, &req->flags);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3273,6 +3278,7 @@ void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req)
 
 int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3312,7 +3318,7 @@ int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req)
        if (req->_present.tx_push_buf_len_max)
                mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, req->tx_push_buf_len_max);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3495,6 +3501,7 @@ void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req)
 int ethtool_channels_set(struct ynl_sock *ys,
                         struct ethtool_channels_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3520,7 +3527,7 @@ int ethtool_channels_set(struct ynl_sock *ys,
        if (req->_present.combined_count)
                mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_COUNT, req->combined_count);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -3798,6 +3805,7 @@ void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req)
 int ethtool_coalesce_set(struct ynl_sock *ys,
                         struct ethtool_coalesce_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -3861,7 +3869,7 @@ int ethtool_coalesce_set(struct ynl_sock *ys,
        if (req->_present.tx_aggr_time_usecs)
                mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, req->tx_aggr_time_usecs);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -4036,6 +4044,7 @@ void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req)
 
 int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -4055,7 +4064,7 @@ int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req)
        if (req->_present.stats_src)
                mnl_attr_put_u32(nlh, ETHTOOL_A_PAUSE_STATS_SRC, req->stats_src);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -4242,6 +4251,7 @@ void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req)
 
 int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -4263,7 +4273,7 @@ int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req)
        if (req->_present.tx_lpi_timer)
                mnl_attr_put_u32(nlh, ETHTOOL_A_EEE_TX_LPI_TIMER, req->tx_lpi_timer);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -4437,6 +4447,7 @@ void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req)
 int ethtool_cable_test_act(struct ynl_sock *ys,
                           struct ethtool_cable_test_act_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -4446,7 +4457,7 @@ int ethtool_cable_test_act(struct ynl_sock *ys,
        if (req->_present.header)
                ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_HEADER, &req->header);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -4465,6 +4476,7 @@ ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req)
 int ethtool_cable_test_tdr_act(struct ynl_sock *ys,
                               struct ethtool_cable_test_tdr_act_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -4474,7 +4486,7 @@ int ethtool_cable_test_tdr_act(struct ynl_sock *ys,
        if (req->_present.header)
                ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_TDR_HEADER, &req->header);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -4782,6 +4794,7 @@ void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req)
 
 int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -4799,7 +4812,7 @@ int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req)
        if (req->_present.stats)
                ethtool_fec_stat_put(nlh, ETHTOOL_A_FEC_STATS, &req->stats);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5235,6 +5248,7 @@ void ethtool_module_set_req_free(struct ethtool_module_set_req *req)
 
 int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5248,7 +5262,7 @@ int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req)
        if (req->_present.power_mode)
                mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE, req->power_mode);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5397,6 +5411,7 @@ void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req)
 
 int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5412,7 +5427,7 @@ int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req)
        if (req->_present.pw_d_status)
                mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_PW_D_STATUS, req->pw_d_status);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -5746,6 +5761,7 @@ void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req)
 int ethtool_plca_set_cfg(struct ynl_sock *ys,
                         struct ethtool_plca_set_cfg_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -5771,7 +5787,7 @@ int ethtool_plca_set_cfg(struct ynl_sock *ys,
        if (req->_present.burst_tmr)
                mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_TMR, req->burst_tmr);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -6124,6 +6140,7 @@ void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req)
 
 int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -6143,7 +6160,7 @@ int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req)
        if (req->_present.tx_min_frag_size)
                mnl_attr_put_u32(nlh, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, req->tx_min_frag_size);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
index 4271b5d43c58678b2480cd4a7dc0018a14684751..f30bef23bc310198e37cf731b4904e5904e12706 100644 (file)
@@ -72,6 +72,7 @@ void fou_add_req_free(struct fou_add_req *req)
 
 int fou_add(struct ynl_sock *ys, struct fou_add_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -99,7 +100,7 @@ int fou_add(struct ynl_sock *ys, struct fou_add_req *req)
        if (req->_present.ifindex)
                mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
@@ -117,6 +118,7 @@ void fou_del_req_free(struct fou_del_req *req)
 
 int fou_del(struct ynl_sock *ys, struct fou_del_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -140,7 +142,7 @@ int fou_del(struct ynl_sock *ys, struct fou_del_req *req)
        if (req->_present.peer_v6_len)
                mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
index 7c67765daf9021931d109f01df7725ecb0ee2a16..6901f8462cca8ba2320356c5db83edf1d28ce4a5 100644 (file)
@@ -295,6 +295,7 @@ void handshake_done_req_free(struct handshake_done_req *req)
 
 int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req)
 {
+       struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
        struct nlmsghdr *nlh;
        int err;
 
@@ -308,7 +309,7 @@ int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req)
        for (unsigned int i = 0; i < req->n_remote_auth; i++)
                mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]);
 
-       err = ynl_exec(ys, nlh, NULL);
+       err = ynl_exec(ys, nlh, &yrs);
        if (err < 0)
                return -1;
 
index c4003a83cd5d87dc1764f83f230b0aca5ce7a1a3..8337aa6de25e9bad9a5c1ad2176f307f3dd9bc87 100755 (executable)
@@ -1505,6 +1505,12 @@ def put_op_name(family, cw):
     cw.block_start(line=f"static const char * const {map_name}[] =")
     for op_name, op in family.msgs.items():
         if op.rsp_value:
+            # Make sure we don't add duplicated entries, if multiple commands
+            # produce the same response in legacy families.
+            if family.rsp_by_value[op.rsp_value] != op:
+                cw.p(f'// skip "{op_name}", duplicate reply value')
+                continue
+
             if op.req_value == op.rsp_value:
                 cw.p(f'[{op.enum_name}] = "{op_name}",')
             else:
@@ -1703,14 +1709,14 @@ def print_req(ri):
     ret_ok = '0'
     ret_err = '-1'
     direction = "request"
-    local_vars = ['struct nlmsghdr *nlh;',
+    local_vars = ['struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };',
+                  'struct nlmsghdr *nlh;',
                   'int err;']
 
     if 'reply' in ri.op[ri.op_mode]:
         ret_ok = 'rsp'
         ret_err = 'NULL'
-        local_vars += [f'{type_name(ri, rdir(direction))} *rsp;',
-                       'struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };']
+        local_vars += [f'{type_name(ri, rdir(direction))} *rsp;']
 
     print_prototype(ri, direction, terminate=False)
     ri.cw.block_start()
@@ -1726,7 +1732,6 @@ def print_req(ri):
         attr.attr_put(ri, "req")
     ri.cw.nl()
 
-    parse_arg = "NULL"
     if 'reply' in ri.op[ri.op_mode]:
         ri.cw.p('rsp = calloc(1, sizeof(*rsp));')
         ri.cw.p('yrs.yarg.data = rsp;')
@@ -1736,8 +1741,7 @@ def print_req(ri):
         else:
             ri.cw.p(f'yrs.rsp_cmd = {ri.op.rsp_value};')
         ri.cw.nl()
-        parse_arg = '&yrs'
-    ri.cw.p(f"err = ynl_exec(ys, nlh, {parse_arg});")
+    ri.cw.p("err = ynl_exec(ys, nlh, &yrs);")
     ri.cw.p('if (err < 0)')
     if 'reply' in ri.op[ri.op_mode]:
         ri.cw.p('goto err_free;')
index 1da7f4b91b4f8af584fdb97c4ca3fa258604273a..dc42de1785cee715a2ebca5a9d682e565ec7324a 100644 (file)
@@ -1,3 +1,5 @@
+arch/arm64/tools/gen-sysreg.awk
+arch/arm64/tools/sysreg
 tools/perf
 tools/arch
 tools/scripts
index d88da787e815b5a3b8e43c7f0f61b9de9ca19239..058c9aecf6087d065a31115492b4e80bed69c7a2 100644 (file)
@@ -434,6 +434,21 @@ export INSTALL SHELL_PATH
 
 SHELL = $(SHELL_PATH)
 
+arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools
+ifneq ($(OUTPUT),)
+  arm64_gen_sysreg_outdir := $(OUTPUT)
+else
+  arm64_gen_sysreg_outdir := $(CURDIR)
+endif
+
+arm64-sysreg-defs: FORCE
+       $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir)
+
+arm64-sysreg-defs-clean:
+       $(call QUIET_CLEAN,arm64-sysreg-defs)
+       $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \
+               clean > /dev/null
+
 beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/
 linux_uapi_dir := $(srctree)/tools/include/uapi/linux
 asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
@@ -450,15 +465,6 @@ drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)')
 
-arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools
-
-arm64-sysreg-defs: FORCE
-       $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir)
-
-arm64-sysreg-defs-clean:
-       $(call QUIET_CLEAN,arm64-sysreg-defs)
-       $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) clean > /dev/null
-
 $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl)
        $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@
 
index 80be0e98ea0c5488ef27a08637e8a12b991c6eae..116ff501bf9263ac82a184e4fa408205e52292dd 100644 (file)
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    n64     cachestat                       sys_cachestat
 452    n64     fchmodat2                       sys_fchmodat2
+453    n64     map_shadow_stack                sys_map_shadow_stack
+454    n64     futex_wake                      sys_futex_wake
+455    n64     futex_wait                      sys_futex_wait
+456    n64     futex_requeue                   sys_futex_requeue
index e1412519b4ad9ae2a38750a1c4e3dce63ce83ecb..7fab411378f2dd7dda7b136e8fec9b28186e0fea 100644 (file)
 450    nospu   set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
+453    common  map_shadow_stack                sys_ni_syscall
+454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
+456    common  futex_requeue                   sys_futex_requeue
index cc0bc144b6616226b7df2aa717a7e57b798756d5..86fec9b080f61bd0a5ef09395649cf15a9c90502 100644 (file)
 450  common    set_mempolicy_home_node sys_set_mempolicy_home_node     sys_set_mempolicy_home_node
 451  common    cachestat               sys_cachestat                   sys_cachestat
 452  common    fchmodat2               sys_fchmodat2                   sys_fchmodat2
+453  common    map_shadow_stack        sys_map_shadow_stack            sys_map_shadow_stack
+454  common    futex_wake              sys_futex_wake                  sys_futex_wake
+455  common    futex_wait              sys_futex_wait                  sys_futex_wait
+456  common    futex_requeue           sys_futex_requeue               sys_futex_requeue
index 2a62eaf30d69a3f016be8587e85d4a409d19f777..8cb8bf68721cf6fab663bdeaf0299899784ee0bf 100644 (file)
 451    common  cachestat               sys_cachestat
 452    common  fchmodat2               sys_fchmodat2
 453    64      map_shadow_stack        sys_map_shadow_stack
+454    common  futex_wake              sys_futex_wake
+455    common  futex_wait              sys_futex_wait
+456    common  futex_requeue           sys_futex_requeue
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index f007a9b27065f8708934849633a77fe7a19f1c4d..0092b9b39611d7e9d0bcc0f5c89668e96a615d68 100644 (file)
@@ -1643,7 +1643,7 @@ static int top_print_work(struct perf_kwork *kwork __maybe_unused, struct kwork_
        /*
         * pid
         */
-       ret += printf(" %*ld ", PRINT_PID_WIDTH, work->id);
+       ret += printf(" %*" PRIu64 " ", PRINT_PID_WIDTH, work->id);
 
        /*
         * tgid
index 39b74d83c7c4a7e8f1434a8b2005f0d7d8d4904f..cfcb7e2c3813f2dbd3fd5ca9d123a6883713d672 100644 (file)
@@ -383,6 +383,7 @@ struct ucred {
 #define SOL_MPTCP      284
 #define SOL_MCTP       285
 #define SOL_SMC                286
+#define SOL_VSOCK      287
 
 /* IPX options */
 #define IPX_TYPE       1
index fb661c48992fe585804d74f9778711046f353dc0..988473bf907aee74f9863fe52bb59a5f3b4dd387 100644 (file)
@@ -347,7 +347,7 @@ CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
 CFLAGS_libstring.o     += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_header.o        += -include $(OUTPUT)PERF-VERSION-FILE
-CFLAGS_arm-spe.o       += -I$(srctree)/tools/arch/arm64/include/ -I$(srctree)/tools/arch/arm64/include/generated/
+CFLAGS_arm-spe.o       += -I$(srctree)/tools/arch/arm64/include/ -I$(OUTPUT)arch/arm64/include/generated/
 
 $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE
        $(call rule_mkdir)
index e105245eb905d2436ad754589f301e1e1283fa2e..f1716c089c9912f4f9bfca827bde4e509db8d22e 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/zalloc.h>
 #include <linux/string.h>
 #include <bpf/bpf.h>
+#include <inttypes.h>
 
 #include "bpf_skel/lock_contention.skel.h"
 #include "bpf_skel/lock_data.h"
@@ -250,7 +251,7 @@ static const char *lock_contention_get_name(struct lock_contention *con,
                if (cgrp)
                        return cgrp->name;
 
-               snprintf(name_buf, sizeof(name_buf), "cgroup:%lu", cgrp_id);
+               snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id);
                return name_buf;
        }
 
index 4a356a706785549bc248ed431f78a16a4c409f6c..40ad221e88811b0fc18993983ca91808f61fd739 100755 (executable)
@@ -4151,7 +4151,7 @@ def parseKernelLog(data):
                        elif(re.match('Enabling non-boot CPUs .*', msg)):
                                # start of first cpu resume
                                cpu_start = ktime
-                       elif(re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg)) \
+                       elif(re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) \
                                or re.match('psci: CPU(?P<cpu>[0-9]*) killed.*', msg)):
                                # end of a cpu suspend, start of the next
                                m = re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg)
index 9a10512e340787decdeee31ca29b740d02e16ee2..7a334377f92b978fa642a0071b19f33d7e6fe74e 100644 (file)
@@ -211,9 +211,6 @@ int *fd_instr_count_percpu;
 struct timeval interval_tv = { 5, 0 };
 struct timespec interval_ts = { 5, 0 };
 
-/* Save original CPU model */
-unsigned int model_orig;
-
 unsigned int num_iterations;
 unsigned int header_iterations;
 unsigned int debug;
@@ -224,24 +221,16 @@ unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int list_header_only;
 unsigned int dump_only;
-unsigned int do_snb_cstates;
-unsigned int do_knl_cstates;
-unsigned int do_slm_cstates;
-unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
 unsigned int has_turbo;
 unsigned int is_hybrid;
-unsigned int do_irtl_snb;
-unsigned int do_irtl_hsw;
 unsigned int units = 1000000;  /* MHz etc */
 unsigned int genuine_intel;
 unsigned int authentic_amd;
 unsigned int hygon_genuine;
 unsigned int max_level, max_extended_level;
 unsigned int has_invariant_tsc;
-unsigned int do_nhm_platform_info;
-unsigned int no_MSR_MISC_PWR_MGMT;
 unsigned int aperf_mperf_multiplier = 1;
 double bclk;
 double base_hz;
@@ -250,7 +239,6 @@ double tsc_tweak = 1.0;
 unsigned int show_pkg_only;
 unsigned int show_core_only;
 char *output_buffer, *outp;
-unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
 unsigned int do_ipc;
@@ -261,65 +249,686 @@ unsigned int gfx_cur_mhz;
 unsigned int gfx_act_mhz;
 unsigned int tj_max;
 unsigned int tj_max_override;
-int tcc_offset_bits;
 double rapl_power_units, rapl_time_units;
 double rapl_dram_energy_units, rapl_energy_units;
 double rapl_joule_counter_range;
-unsigned int do_core_perf_limit_reasons;
-unsigned int has_automatic_cstate_conversion;
-unsigned int dis_cstate_prewake;
-unsigned int do_gfx_perf_limit_reasons;
-unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
 unsigned long long tsc_hz;
 int base_cpu;
-double discover_bclk(unsigned int family, unsigned int model);
 unsigned int has_hwp;          /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
                        /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
 unsigned int has_hwp_notify;   /* IA32_HWP_INTERRUPT */
 unsigned int has_hwp_activity_window;  /* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;      /* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;      /* IA32_HWP_REQUEST_PKG */
-unsigned int has_misc_feature_control;
 unsigned int first_counter_read = 1;
 int ignore_stdin;
 
-#define RAPL_PKG               (1 << 0)
-                                       /* 0x610 MSR_PKG_POWER_LIMIT */
-                                       /* 0x611 MSR_PKG_ENERGY_STATUS */
-#define RAPL_PKG_PERF_STATUS   (1 << 1)
-                                       /* 0x613 MSR_PKG_PERF_STATUS */
-#define RAPL_PKG_POWER_INFO    (1 << 2)
-                                       /* 0x614 MSR_PKG_POWER_INFO */
-
-#define RAPL_DRAM              (1 << 3)
-                                       /* 0x618 MSR_DRAM_POWER_LIMIT */
-                                       /* 0x619 MSR_DRAM_ENERGY_STATUS */
-#define RAPL_DRAM_PERF_STATUS  (1 << 4)
-                                       /* 0x61b MSR_DRAM_PERF_STATUS */
-#define RAPL_DRAM_POWER_INFO   (1 << 5)
-                                       /* 0x61c MSR_DRAM_POWER_INFO */
-
-#define RAPL_CORES_POWER_LIMIT (1 << 6)
-                                       /* 0x638 MSR_PP0_POWER_LIMIT */
-#define RAPL_CORE_POLICY       (1 << 7)
-                                       /* 0x63a MSR_PP0_POLICY */
-
-#define RAPL_GFX               (1 << 8)
-                                       /* 0x640 MSR_PP1_POWER_LIMIT */
-                                       /* 0x641 MSR_PP1_ENERGY_STATUS */
-                                       /* 0x642 MSR_PP1_POLICY */
-
-#define RAPL_CORES_ENERGY_STATUS       (1 << 9)
-                                       /* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_PER_CORE_ENERGY   (1 << 10)
-                                       /* Indicates cores energy collection is per-core,
-                                        * not per-package. */
-#define RAPL_AMD_F17H          (1 << 11)
-                                       /* 0xc0010299 MSR_RAPL_PWR_UNIT */
-                                       /* 0xc001029a MSR_CORE_ENERGY_STAT */
-                                       /* 0xc001029b MSR_PKG_ENERGY_STAT */
-#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
+int get_msr(int cpu, off_t offset, unsigned long long *msr);
+
+/* Model specific support Start */
+
+/* List of features that may diverge among different platforms */
+struct platform_features {
+       bool has_msr_misc_feature_control;      /* MSR_MISC_FEATURE_CONTROL */
+       bool has_msr_misc_pwr_mgmt;     /* MSR_MISC_PWR_MGMT */
+       bool has_nhm_msrs;      /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
+       bool has_config_tdp;    /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
+       int bclk_freq;          /* CPU base clock */
+       int crystal_freq;       /* Crystal clock to use when not available from CPUID.15 */
+       int supported_cstates;  /* Core cstates and Package cstates supported */
+       int cst_limit;          /* MSR_PKG_CST_CONFIG_CONTROL */
+       bool has_cst_auto_convension;   /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
+       bool has_irtl_msrs;     /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
+       bool has_msr_core_c1_res;       /* MSR_CORE_C1_RES */
+       bool has_msr_module_c6_res_ms;  /* MSR_MODULE_C6_RES_MS */
+       bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
+       bool has_msr_atom_pkg_c6_residency;     /* MSR_ATOM_PKG_C6_RESIDENCY */
+       bool has_msr_knl_core_c6_residency;     /* MSR_KNL_CORE_C6_RESIDENCY */
+       bool has_ext_cst_msrs;  /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
+       bool has_cst_prewake_bit;       /* Cstate prewake bit in MSR_IA32_POWER_CTL */
+       int trl_msrs;           /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
+       int plr_msrs;           /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
+       int rapl_msrs;          /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
+       bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
+       bool has_rapl_divisor;  /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
+       bool has_fixed_rapl_unit;       /* Fixed Energy Unit used for DRAM RAPL Domain */
+       int rapl_quirk_tdp;     /* Hardcoded TDP value when cannot be retrieved from hardware */
+       int tcc_offset_bits;    /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
+       bool enable_tsc_tweak;  /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
+       bool need_perf_multiplier;      /* mperf/aperf multiplier */
+};
+
+struct platform_data {
+       unsigned int model;
+       const struct platform_features *features;
+};
+
+/* For BCLK */
+enum bclk_freq {
+       BCLK_100MHZ = 1,
+       BCLK_133MHZ,
+       BCLK_SLV,
+};
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
+
+double slm_bclk(void)
+{
+       unsigned long long msr = 3;
+       unsigned int i;
+       double freq;
+
+       if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
+               fprintf(outf, "SLM BCLK: unknown\n");
+
+       i = msr & 0xf;
+       if (i >= SLM_BCLK_FREQS) {
+               fprintf(outf, "SLM BCLK[%d] invalid\n", i);
+               i = 3;
+       }
+       freq = slm_freq_table[i];
+
+       if (!quiet)
+               fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+
+       return freq;
+}
+
+/* For Package cstate limit */
+enum package_cstate_limit {
+       CST_LIMIT_NHM = 1,
+       CST_LIMIT_SNB,
+       CST_LIMIT_HSW,
+       CST_LIMIT_SKX,
+       CST_LIMIT_ICX,
+       CST_LIMIT_SLV,
+       CST_LIMIT_AMT,
+       CST_LIMIT_KNL,
+       CST_LIMIT_GMT,
+};
+
+/* For Turbo Ratio Limit MSRs */
+enum turbo_ratio_limit_msrs {
+       TRL_BASE = BIT(0),
+       TRL_LIMIT1 = BIT(1),
+       TRL_LIMIT2 = BIT(2),
+       TRL_ATOM = BIT(3),
+       TRL_KNL = BIT(4),
+       TRL_CORECOUNT = BIT(5),
+};
+
+/* For Perf Limit Reason MSRs */
+enum perf_limit_reason_msrs {
+       PLR_CORE = BIT(0),
+       PLR_GFX = BIT(1),
+       PLR_RING = BIT(2),
+};
+
+/* For RAPL MSRs */
+enum rapl_msrs {
+       RAPL_PKG_POWER_LIMIT = BIT(0),  /* 0x610 MSR_PKG_POWER_LIMIT */
+       RAPL_PKG_ENERGY_STATUS = BIT(1),        /* 0x611 MSR_PKG_ENERGY_STATUS */
+       RAPL_PKG_PERF_STATUS = BIT(2),  /* 0x613 MSR_PKG_PERF_STATUS */
+       RAPL_PKG_POWER_INFO = BIT(3),   /* 0x614 MSR_PKG_POWER_INFO */
+       RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
+       RAPL_DRAM_ENERGY_STATUS = BIT(5),       /* 0x619 MSR_DRAM_ENERGY_STATUS */
+       RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
+       RAPL_DRAM_POWER_INFO = BIT(7),  /* 0x61c MSR_DRAM_POWER_INFO */
+       RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
+       RAPL_CORE_ENERGY_STATUS = BIT(9),       /* 0x639 MSR_PP0_ENERGY_STATUS */
+       RAPL_CORE_POLICY = BIT(10),     /* 0x63a MSR_PP0_POLICY */
+       RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
+       RAPL_GFX_ENERGY_STATUS = BIT(12),       /* 0x641 MSR_PP1_ENERGY_STATUS */
+       RAPL_GFX_POLICY = BIT(13),      /* 0x642 MSR_PP1_POLICY */
+       RAPL_AMD_PWR_UNIT = BIT(14),    /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
+       RAPL_AMD_CORE_ENERGY_STAT = BIT(15),    /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
+       RAPL_AMD_PKG_ENERGY_STAT = BIT(16),     /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
+};
+
+#define RAPL_PKG       (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
+#define RAPL_DRAM      (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
+#define RAPL_CORE      (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
+#define RAPL_GFX       (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
+
+#define RAPL_PKG_ALL   (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
+#define RAPL_DRAM_ALL  (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
+#define RAPL_CORE_ALL  (RAPL_CORE | RAPL_CORE_POLICY)
+#define RAPL_GFX_ALL   (RAPL_GFX | RAPL_GFX_POLIGY)
+
+#define RAPL_AMD_F17H  (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
+
+/* For Cstates */
+enum cstates {
+       CC1 = BIT(0),
+       CC3 = BIT(1),
+       CC6 = BIT(2),
+       CC7 = BIT(3),
+       PC2 = BIT(4),
+       PC3 = BIT(5),
+       PC6 = BIT(6),
+       PC7 = BIT(7),
+       PC8 = BIT(8),
+       PC9 = BIT(9),
+       PC10 = BIT(10),
+};
+
+static const struct platform_features nhm_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_133MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_NHM,
+       .trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features nhx_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_133MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_NHM,
+};
+
+static const struct platform_features snb_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_SNB,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features snx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_SNB,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features ivb_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_SNB,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features ivx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_SNB,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE | TRL_LIMIT1,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features hsw_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hsx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
+       .plr_msrs = PLR_CORE | PLR_RING,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features hswl_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hswg_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdw_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdwg_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .has_cst_auto_convension = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features skl_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .crystal_freq = 24000000,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .has_ext_cst_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .tcc_offset_bits = 6,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+       .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features cnl_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .has_msr_core_c1_res = 1,
+       .has_ext_cst_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .tcc_offset_bits = 6,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+       .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features adl_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .has_msr_core_c1_res = 1,
+       .has_ext_cst_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .tcc_offset_bits = 6,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+       .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features skx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_SKX,
+       .has_irtl_msrs = 1,
+       .has_cst_auto_convension = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features icx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_ICX,
+       .has_irtl_msrs = 1,
+       .has_cst_prewake_bit = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features spr_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_SKX,
+       .has_msr_core_c1_res = 1,
+       .has_irtl_msrs = 1,
+       .has_cst_prewake_bit = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features srf_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_SKX,
+       .has_msr_core_c1_res = 1,
+       .has_msr_module_c6_res_ms = 1,
+       .has_irtl_msrs = 1,
+       .has_cst_prewake_bit = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features grr_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6,
+       .cst_limit = CST_LIMIT_SKX,
+       .has_msr_core_c1_res = 1,
+       .has_msr_module_c6_res_ms = 1,
+       .has_irtl_msrs = 1,
+       .has_cst_prewake_bit = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features slv_features = {
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_SLV,
+       .supported_cstates = CC1 | CC6 | PC6,
+       .cst_limit = CST_LIMIT_SLV,
+       .has_msr_core_c1_res = 1,
+       .has_msr_module_c6_res_ms = 1,
+       .has_msr_c6_demotion_policy_config = 1,
+       .has_msr_atom_pkg_c6_residency = 1,
+       .trl_msrs = TRL_ATOM,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE,
+       .has_rapl_divisor = 1,
+       .rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features slvd_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_SLV,
+       .supported_cstates = CC1 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_SLV,
+       .has_msr_atom_pkg_c6_residency = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE,
+       .rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features amt_features = {
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_133MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_AMT,
+       .trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features gmt_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .crystal_freq = 19200000,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features gmtd_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .crystal_freq = 25000000,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .has_msr_core_c1_res = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
+};
+
+static const struct platform_features gmtp_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .crystal_freq = 19200000,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features tmt_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+       .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features tmtd_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL,
+};
+
+static const struct platform_features knl_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_KNL,
+       .has_msr_knl_core_c6_residency = 1,
+       .trl_msrs = TRL_KNL,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+       .need_perf_multiplier = 1,
+};
+
+static const struct platform_features default_features = {
+};
+
+static const struct platform_features amd_features_with_rapl = {
+       .rapl_msrs = RAPL_AMD_F17H,
+       .has_per_core_rapl = 1,
+       .rapl_quirk_tdp = 280,  /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
+};
+
+static const struct platform_data turbostat_pdata[] = {
+       { INTEL_FAM6_NEHALEM, &nhm_features },
+       { INTEL_FAM6_NEHALEM_G, &nhm_features },
+       { INTEL_FAM6_NEHALEM_EP, &nhm_features },
+       { INTEL_FAM6_NEHALEM_EX, &nhx_features },
+       { INTEL_FAM6_WESTMERE, &nhm_features },
+       { INTEL_FAM6_WESTMERE_EP, &nhm_features },
+       { INTEL_FAM6_WESTMERE_EX, &nhx_features },
+       { INTEL_FAM6_SANDYBRIDGE, &snb_features },
+       { INTEL_FAM6_SANDYBRIDGE_X, &snx_features },
+       { INTEL_FAM6_IVYBRIDGE, &ivb_features },
+       { INTEL_FAM6_IVYBRIDGE_X, &ivx_features },
+       { INTEL_FAM6_HASWELL, &hsw_features },
+       { INTEL_FAM6_HASWELL_X, &hsx_features },
+       { INTEL_FAM6_HASWELL_L, &hswl_features },
+       { INTEL_FAM6_HASWELL_G, &hswg_features },
+       { INTEL_FAM6_BROADWELL, &bdw_features },
+       { INTEL_FAM6_BROADWELL_G, &bdwg_features },
+       { INTEL_FAM6_BROADWELL_X, &bdx_features },
+       { INTEL_FAM6_BROADWELL_D, &bdx_features },
+       { INTEL_FAM6_SKYLAKE_L, &skl_features },
+       { INTEL_FAM6_SKYLAKE, &skl_features },
+       { INTEL_FAM6_SKYLAKE_X, &skx_features },
+       { INTEL_FAM6_KABYLAKE_L, &skl_features },
+       { INTEL_FAM6_KABYLAKE, &skl_features },
+       { INTEL_FAM6_COMETLAKE, &skl_features },
+       { INTEL_FAM6_COMETLAKE_L, &skl_features },
+       { INTEL_FAM6_CANNONLAKE_L, &cnl_features },
+       { INTEL_FAM6_ICELAKE_X, &icx_features },
+       { INTEL_FAM6_ICELAKE_D, &icx_features },
+       { INTEL_FAM6_ICELAKE_L, &cnl_features },
+       { INTEL_FAM6_ICELAKE_NNPI, &cnl_features },
+       { INTEL_FAM6_ROCKETLAKE, &cnl_features },
+       { INTEL_FAM6_TIGERLAKE_L, &cnl_features },
+       { INTEL_FAM6_TIGERLAKE, &cnl_features },
+       { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features },
+       { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features },
+       { INTEL_FAM6_GRANITERAPIDS_X, &spr_features },
+       { INTEL_FAM6_LAKEFIELD, &cnl_features },
+       { INTEL_FAM6_ALDERLAKE, &adl_features },
+       { INTEL_FAM6_ALDERLAKE_L, &adl_features },
+       { INTEL_FAM6_RAPTORLAKE, &adl_features },
+       { INTEL_FAM6_RAPTORLAKE_P, &adl_features },
+       { INTEL_FAM6_RAPTORLAKE_S, &adl_features },
+       { INTEL_FAM6_METEORLAKE, &cnl_features },
+       { INTEL_FAM6_METEORLAKE_L, &cnl_features },
+       { INTEL_FAM6_ARROWLAKE, &cnl_features },
+       { INTEL_FAM6_LUNARLAKE_M, &cnl_features },
+       { INTEL_FAM6_ATOM_SILVERMONT, &slv_features },
+       { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features },
+       { INTEL_FAM6_ATOM_AIRMONT, &amt_features },
+       { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features },
+       { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features },
+       { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features },
+       { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features },
+       { INTEL_FAM6_ATOM_TREMONT, &tmt_features },
+       { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features },
+       { INTEL_FAM6_ATOM_GRACEMONT, &adl_features },
+       { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features },
+       { INTEL_FAM6_ATOM_CRESTMONT, &grr_features },
+       { INTEL_FAM6_XEON_PHI_KNL, &knl_features },
+       { INTEL_FAM6_XEON_PHI_KNM, &knl_features },
+       /*
+        * Missing support for
+        * INTEL_FAM6_ICELAKE
+        * INTEL_FAM6_ATOM_SILVERMONT_MID
+        * INTEL_FAM6_ATOM_AIRMONT_MID
+        * INTEL_FAM6_ATOM_AIRMONT_NP
+        */
+       { 0, NULL },
+};
+
+static const struct platform_features *platform;
+
+void probe_platform_features(unsigned int family, unsigned int model)
+{
+       int i;
+
+       platform = &default_features;
+
+       if (authentic_amd || hygon_genuine) {
+               if (max_extended_level >= 0x80000007) {
+                       unsigned int eax, ebx, ecx, edx;
+
+                       __cpuid(0x80000007, eax, ebx, ecx, edx);
+                       /* RAPL (Fam 17h+) */
+                       if ((edx & (1 << 14)) && family >= 0x17)
+                               platform = &amd_features_with_rapl;
+               }
+               return;
+       }
+
+       if (!genuine_intel || family != 6)
+               return;
+
+       for (i = 0; turbostat_pdata[i].features; i++) {
+               if (turbostat_pdata[i].model == model) {
+                       platform = turbostat_pdata[i].features;
+                       return;
+               }
+       }
+}
+
+/* Model specific support End */
+
 #define        TJMAX_DEFAULT   100
 
 /* MSRs that are not yet in the kernel-provided header. */
@@ -333,8 +942,8 @@ int backwards_count;
 char *progname;
 
 #define CPU_SUBSET_MAXCPUS     1024    /* need to use before probe... */
-cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
+cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
 #define MAX_ADDED_COUNTERS 8
 #define MAX_ADDED_THREAD_COUNTERS 24
 #define BITMASK_SIZE 32
@@ -355,12 +964,11 @@ struct thread_data {
        unsigned int x2apic_id;
        unsigned int flags;
        bool is_atom;
-#define CPU_IS_FIRST_THREAD_IN_CORE    0x2
-#define CPU_IS_FIRST_CORE_IN_PACKAGE   0x4
        unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
 } *thread_even, *thread_odd;
 
 struct core_data {
+       int base_cpu;
        unsigned long long c3;
        unsigned long long c6;
        unsigned long long c7;
@@ -373,6 +981,7 @@ struct core_data {
 } *core_even, *core_odd;
 
 struct pkg_data {
+       int base_cpu;
        unsigned long long pc2;
        unsigned long long pc3;
        unsigned long long pc6;
@@ -456,7 +1065,7 @@ off_t idx_to_offset(int idx)
 
        switch (idx) {
        case IDX_PKG_ENERGY:
-               if (do_rapl & RAPL_AMD_F17H)
+               if (platform->rapl_msrs & RAPL_AMD_F17H)
                        offset = MSR_PKG_ENERGY_STAT;
                else
                        offset = MSR_PKG_ENERGY_STATUS;
@@ -516,17 +1125,17 @@ int idx_valid(int idx)
 {
        switch (idx) {
        case IDX_PKG_ENERGY:
-               return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
+               return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
        case IDX_DRAM_ENERGY:
-               return do_rapl & RAPL_DRAM;
+               return platform->rapl_msrs & RAPL_DRAM;
        case IDX_PP0_ENERGY:
-               return do_rapl & RAPL_CORES_ENERGY_STATUS;
+               return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
        case IDX_PP1_ENERGY:
-               return do_rapl & RAPL_GFX;
+               return platform->rapl_msrs & RAPL_GFX;
        case IDX_PKG_PERF:
-               return do_rapl & RAPL_PKG_PERF_STATUS;
+               return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
        case IDX_DRAM_PERF:
-               return do_rapl & RAPL_DRAM_PERF_STATUS;
+               return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
        default:
                return 0;
        }
@@ -563,6 +1172,9 @@ struct topo_params {
        int num_die;
        int num_cpus;
        int num_cores;
+       int allowed_packages;
+       int allowed_cpus;
+       int allowed_cores;
        int max_cpu_num;
        int max_node_num;
        int nodes_per_pkg;
@@ -575,7 +1187,7 @@ struct timeval tv_even, tv_odd, tv_delta;
 int *irq_column_2_cpu;         /* /proc/interrupts column numbers */
 int *irqs_per_cpu;             /* indexed by cpu_num */
 
-void setup_all_buffers(void);
+void setup_all_buffers(bool startup);
 
 char *sys_lpi_file;
 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
@@ -586,6 +1198,11 @@ int cpu_is_not_present(int cpu)
        return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
 }
 
+int cpu_is_not_allowed(int cpu)
+{
+       return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
+}
+
 /*
  * run func(thread, core, package) in topology order
  * skip non-present cpus
@@ -603,10 +1220,9 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
                                        struct thread_data *t;
                                        struct core_data *c;
                                        struct pkg_data *p;
-
                                        t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
 
-                                       if (cpu_is_not_present(t->cpu_id))
+                                       if (cpu_is_not_allowed(t->cpu_id))
                                                continue;
 
                                        c = GET_CORE(core_base, core_no, node_no, pkg_no);
@@ -622,6 +1238,25 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
        return 0;
 }
 
+int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       UNUSED(p);
+
+       return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
+}
+
+int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       UNUSED(c);
+
+       return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
+}
+
+int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
+}
+
 int cpu_migrate(int cpu)
 {
        CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
@@ -904,11 +1539,11 @@ void print_header(char *delim)
        if (DO_BIC(BIC_CORE_THROT_CNT))
                outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
 
-       if (do_rapl && !rapl_joules) {
-               if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+       if (platform->rapl_msrs && !rapl_joules) {
+               if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
                        outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
-       } else if (do_rapl && rapl_joules) {
-               if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+       } else if (platform->rapl_msrs && rapl_joules) {
+               if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
                        outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
        }
 
@@ -966,10 +1601,10 @@ void print_header(char *delim)
        if (DO_BIC(BIC_SYS_LPI))
                outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
 
-       if (do_rapl && !rapl_joules) {
+       if (platform->rapl_msrs && !rapl_joules) {
                if (DO_BIC(BIC_PkgWatt))
                        outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
-               if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+               if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
                        outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
                if (DO_BIC(BIC_GFXWatt))
                        outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
@@ -979,10 +1614,10 @@ void print_header(char *delim)
                        outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
                if (DO_BIC(BIC_RAM__))
                        outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
-       } else if (do_rapl && rapl_joules) {
+       } else if (platform->rapl_msrs && rapl_joules) {
                if (DO_BIC(BIC_Pkg_J))
                        outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
-               if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+               if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
                        outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
                if (DO_BIC(BIC_GFX_J))
                        outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
@@ -1106,11 +1741,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        int printed = 0;
 
        /* if showing only 1st thread in core and this isn't one, bail out */
-       if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+       if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
                return 0;
 
        /* if showing only 1st thread in pkg and this isn't one, bail out */
-       if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
                return 0;
 
        /*if not summary line and --cpu is used */
@@ -1244,7 +1879,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
 
        /* print per-core data only for 1st thread in core */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+       if (!is_cpu_first_thread_in_core(t, c, p))
                goto done;
 
        if (DO_BIC(BIC_CPU_c3))
@@ -1284,14 +1919,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
        fmt8 = "%s%.2f";
 
-       if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+       if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
-       if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+       if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
 
        /* print per-package data only for 1st core in package */
-       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_core_in_package(t, c, p))
                goto done;
 
        /* PkgTmp */
@@ -1352,7 +1987,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
 
-       if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+       if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
        if (DO_BIC(BIC_GFXWatt))
@@ -1364,7 +1999,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
                            p->energy_dram * rapl_dram_energy_units / interval_float);
        if (DO_BIC(BIC_Pkg_J))
                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
-       if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+       if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
        if (DO_BIC(BIC_GFX_J))
                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
@@ -1527,7 +2162,7 @@ void delta_core(struct core_data *new, struct core_data *old)
 
 int soft_c1_residency_display(int bic)
 {
-       if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
+       if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
                return 0;
 
        return DO_BIC_READ(bic);
@@ -1567,7 +2202,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
 
        old->c1 = new->c1 - old->c1;
 
-       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+           || soft_c1_residency_display(BIC_Avg_MHz)) {
                if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
                        old->aperf = new->aperf - old->aperf;
                        old->mperf = new->mperf - old->mperf;
@@ -1576,7 +2212,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
                }
        }
 
-       if (use_c1_residency_msr) {
+       if (platform->has_msr_core_c1_res) {
                /*
                 * Some models have a dedicated C1 residency MSR,
                 * which should be more accurate than the derivation below.
@@ -1626,7 +2262,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
        int retval = 0;
 
        /* calculate core delta only for 1st thread in core */
-       if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
+       if (is_cpu_first_thread_in_core(t, c, p))
                delta_core(c, c2);
 
        /* always calculate thread delta */
@@ -1635,7 +2271,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
                return retval;
 
        /* calculate package delta only for 1st core in package */
-       if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
+       if (is_cpu_first_core_in_package(t, c, p))
                retval = delta_package(p, p2);
 
        return retval;
@@ -1663,9 +2299,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        t->irq_count = 0;
        t->smi_count = 0;
 
-       /* tells format_counters to dump all fields from this set */
-       t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
-
        c->c3 = 0;
        c->c6 = 0;
        c->c7 = 0;
@@ -1749,7 +2382,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        }
 
        /* sum per-core values only for 1st thread in core */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+       if (!is_cpu_first_thread_in_core(t, c, p))
                return 0;
 
        average.cores.c3 += c->c3;
@@ -1769,7 +2402,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        }
 
        /* sum per-pkg values only for 1st core in pkg */
-       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_core_in_package(t, c, p))
                return 0;
 
        if (DO_BIC(BIC_Totl_c0))
@@ -1834,40 +2467,40 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
        /* Use the global time delta for the average. */
        average.threads.tv_delta = tv_delta;
 
-       average.threads.tsc /= topo.num_cpus;
-       average.threads.aperf /= topo.num_cpus;
-       average.threads.mperf /= topo.num_cpus;
-       average.threads.instr_count /= topo.num_cpus;
-       average.threads.c1 /= topo.num_cpus;
+       average.threads.tsc /= topo.allowed_cpus;
+       average.threads.aperf /= topo.allowed_cpus;
+       average.threads.mperf /= topo.allowed_cpus;
+       average.threads.instr_count /= topo.allowed_cpus;
+       average.threads.c1 /= topo.allowed_cpus;
 
        if (average.threads.irq_count > 9999999)
                sums_need_wide_columns = 1;
 
-       average.cores.c3 /= topo.num_cores;
-       average.cores.c6 /= topo.num_cores;
-       average.cores.c7 /= topo.num_cores;
-       average.cores.mc6_us /= topo.num_cores;
+       average.cores.c3 /= topo.allowed_cores;
+       average.cores.c6 /= topo.allowed_cores;
+       average.cores.c7 /= topo.allowed_cores;
+       average.cores.mc6_us /= topo.allowed_cores;
 
        if (DO_BIC(BIC_Totl_c0))
-               average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+               average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
        if (DO_BIC(BIC_Any_c0))
-               average.packages.pkg_any_core_c0 /= topo.num_packages;
+               average.packages.pkg_any_core_c0 /= topo.allowed_packages;
        if (DO_BIC(BIC_GFX_c0))
-               average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+               average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
        if (DO_BIC(BIC_CPUGFX))
-               average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+               average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
 
-       average.packages.pc2 /= topo.num_packages;
+       average.packages.pc2 /= topo.allowed_packages;
        if (DO_BIC(BIC_Pkgpc3))
-               average.packages.pc3 /= topo.num_packages;
+               average.packages.pc3 /= topo.allowed_packages;
        if (DO_BIC(BIC_Pkgpc6))
-               average.packages.pc6 /= topo.num_packages;
+               average.packages.pc6 /= topo.allowed_packages;
        if (DO_BIC(BIC_Pkgpc7))
-               average.packages.pc7 /= topo.num_packages;
+               average.packages.pc7 /= topo.allowed_packages;
 
-       average.packages.pc8 /= topo.num_packages;
-       average.packages.pc9 /= topo.num_packages;
-       average.packages.pc10 /= topo.num_packages;
+       average.packages.pc8 /= topo.allowed_packages;
+       average.packages.pc9 /= topo.allowed_packages;
+       average.packages.pc10 /= topo.allowed_packages;
 
        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -1877,7 +2510,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
                                sums_need_wide_columns = 1;
                        continue;
                }
-               average.threads.counter[i] /= topo.num_cpus;
+               average.threads.counter[i] /= topo.allowed_cpus;
        }
        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -1886,7 +2519,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
                        if (average.cores.counter[i] > 9999999)
                                sums_need_wide_columns = 1;
                }
-               average.cores.counter[i] /= topo.num_cores;
+               average.cores.counter[i] /= topo.allowed_cores;
        }
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -1895,7 +2528,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
                        if (average.packages.counter[i] > 9999999)
                                sums_need_wide_columns = 1;
                }
-               average.packages.counter[i] /= topo.num_packages;
+               average.packages.counter[i] /= topo.allowed_packages;
        }
 }
 
@@ -2092,7 +2725,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 retry:
        t->tsc = rdtsc();       /* we are running on local CPU of interest */
 
-       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+           || soft_c1_residency_display(BIC_Avg_MHz)) {
                unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
 
                /*
@@ -2158,7 +2792,7 @@ retry:
                        return -5;
                t->smi_count = msr & 0xFFFFFFFF;
        }
-       if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
+       if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
                if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
                        return -6;
        }
@@ -2169,7 +2803,7 @@ retry:
        }
 
        /* collect core counters only for 1st thread in core */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+       if (!is_cpu_first_thread_in_core(t, c, p))
                goto done;
 
        if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
@@ -2177,10 +2811,10 @@ retry:
                        return -6;
        }
 
-       if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
+       if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
                        return -7;
-       } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
+       } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
                if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
                        return -7;
        }
@@ -2212,7 +2846,7 @@ retry:
        if (DO_BIC(BIC_CORE_THROT_CNT))
                get_core_throt_cnt(cpu, &c->core_throt_cnt);
 
-       if (do_rapl & RAPL_AMD_F17H) {
+       if (platform->rapl_msrs & RAPL_AMD_F17H) {
                if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
                        return -14;
                c->core_energy = msr & 0xFFFFFFFF;
@@ -2224,7 +2858,7 @@ retry:
        }
 
        /* collect package counters only for 1st core in package */
-       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_core_in_package(t, c, p))
                goto done;
 
        if (DO_BIC(BIC_Totl_c0)) {
@@ -2247,7 +2881,7 @@ retry:
                if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
                        return -9;
        if (DO_BIC(BIC_Pkgpc6)) {
-               if (do_slm_cstates) {
+               if (platform->has_msr_atom_pkg_c6_residency) {
                        if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
                                return -10;
                } else {
@@ -2277,37 +2911,37 @@ retry:
        if (DO_BIC(BIC_SYS_LPI))
                p->sys_lpi = cpuidle_cur_sys_lpi_us;
 
-       if (do_rapl & RAPL_PKG) {
+       if (platform->rapl_msrs & RAPL_PKG) {
                if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
                        return -13;
                p->energy_pkg = msr;
        }
-       if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
+       if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
                if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
                        return -14;
                p->energy_cores = msr;
        }
-       if (do_rapl & RAPL_DRAM) {
+       if (platform->rapl_msrs & RAPL_DRAM) {
                if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
                        return -15;
                p->energy_dram = msr;
        }
-       if (do_rapl & RAPL_GFX) {
+       if (platform->rapl_msrs & RAPL_GFX) {
                if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
                        return -16;
                p->energy_gfx = msr;
        }
-       if (do_rapl & RAPL_PKG_PERF_STATUS) {
+       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
                if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
                        return -16;
                p->rapl_pkg_perf_status = msr;
        }
-       if (do_rapl & RAPL_DRAM_PERF_STATUS) {
+       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
                if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
                        return -16;
                p->rapl_dram_perf_status = msr;
        }
-       if (do_rapl & RAPL_AMD_F17H) {
+       if (platform->rapl_msrs & RAPL_AMD_F17H) {
                if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
                        return -13;
                p->energy_pkg = msr;
@@ -2414,18 +3048,58 @@ int icx_pkg_cstate_limits[16] =
        PCLRSV, PCLRSV
 };
 
-static void calculate_tsc_tweak()
+void probe_cst_limit(void)
 {
-       tsc_tweak = base_hz / tsc_hz;
-}
+       unsigned long long msr;
+       int *pkg_cstate_limits;
+
+       if (!platform->has_nhm_msrs)
+               return;
+
+       switch (platform->cst_limit) {
+       case CST_LIMIT_NHM:
+               pkg_cstate_limits = nhm_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_SNB:
+               pkg_cstate_limits = snb_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_HSW:
+               pkg_cstate_limits = hsw_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_SKX:
+               pkg_cstate_limits = skx_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_ICX:
+               pkg_cstate_limits = icx_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_SLV:
+               pkg_cstate_limits = slv_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_AMT:
+               pkg_cstate_limits = amt_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_KNL:
+               pkg_cstate_limits = phi_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_GMT:
+               pkg_cstate_limits = glm_pkg_cstate_limits;
+               break;
+       default:
+               return;
+       }
 
-void prewake_cstate_probe(unsigned int family, unsigned int model);
+       get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
+       pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+}
 
-static void dump_nhm_platform_info(void)
+static void dump_platform_info(void)
 {
        unsigned long long msr;
        unsigned int ratio;
 
+       if (!platform->has_nhm_msrs)
+               return;
+
        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
 
        fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
@@ -2435,19 +3109,27 @@ static void dump_nhm_platform_info(void)
 
        ratio = (msr >> 8) & 0xFF;
        fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
+}
+
+static void dump_power_ctl(void)
+{
+       unsigned long long msr;
+
+       if (!platform->has_nhm_msrs)
+               return;
 
        get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
        fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
                base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
 
        /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
-       if (dis_cstate_prewake)
+       if (platform->has_cst_prewake_bit)
                fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
 
        return;
 }
 
-static void dump_hsw_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit2(void)
 {
        unsigned long long msr;
        unsigned int ratio;
@@ -2466,7 +3148,7 @@ static void dump_hsw_turbo_ratio_limits(void)
        return;
 }
 
-static void dump_ivt_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit1(void)
 {
        unsigned long long msr;
        unsigned int ratio;
@@ -2509,29 +3191,7 @@ static void dump_ivt_turbo_ratio_limits(void)
        return;
 }
 
-int has_turbo_ratio_group_limits(int family, int model)
-{
-
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_GOLDMONT:
-       case INTEL_FAM6_SKYLAKE_X:
-       case INTEL_FAM6_ICELAKE_X:
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:
-       case INTEL_FAM6_ATOM_GOLDMONT_D:
-       case INTEL_FAM6_ATOM_TREMONT_D:
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
+static void dump_turbo_ratio_limits(int trl_msr_offset)
 {
        unsigned long long msr, core_counts;
        int shift;
@@ -2540,7 +3200,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
        fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
                base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
 
-       if (has_turbo_ratio_group_limits(family, model)) {
+       if (platform->trl_msrs & TRL_CORECOUNT) {
                get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
                fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
        } else {
@@ -2657,10 +3317,13 @@ static void dump_knl_turbo_ratio_limits(void)
                                ratio[i], bclk, ratio[i] * bclk, cores[i]);
 }
 
-static void dump_nhm_cst_cfg(void)
+static void dump_cst_cfg(void)
 {
        unsigned long long msr;
 
+       if (!platform->has_nhm_msrs)
+               return;
+
        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 
        fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
@@ -2673,7 +3336,7 @@ static void dump_nhm_cst_cfg(void)
                (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
 
 #define AUTOMATIC_CSTATE_CONVERSION            (1UL << 16)
-       if (has_automatic_cstate_conversion) {
+       if (platform->has_cst_auto_convension) {
                fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
        }
 
@@ -2730,39 +3393,50 @@ void print_irtl(void)
 {
        unsigned long long msr;
 
-       get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
-
-       get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (!platform->has_irtl_msrs)
+               return;
 
-       get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (platform->supported_cstates & PC3) {
+               get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
-       if (!do_irtl_hsw)
-               return;
+       if (platform->supported_cstates & PC6) {
+               get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
-       get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (platform->supported_cstates & PC7) {
+               get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
-       get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (platform->supported_cstates & PC8) {
+               get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
-       get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (platform->supported_cstates & PC9) {
+               get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
+       if (platform->supported_cstates & PC10) {
+               get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 }
 
 void free_fd_percpu(void)
@@ -2785,6 +3459,14 @@ void free_all_buffers(void)
        cpu_present_set = NULL;
        cpu_present_setsize = 0;
 
+       CPU_FREE(cpu_effective_set);
+       cpu_effective_set = NULL;
+       cpu_effective_setsize = 0;
+
+       CPU_FREE(cpu_allowed_set);
+       cpu_allowed_set = NULL;
+       cpu_allowed_setsize = 0;
+
        CPU_FREE(cpu_affinity_set);
        cpu_affinity_set = NULL;
        cpu_affinity_setsize = 0;
@@ -2927,49 +3609,102 @@ int get_physical_node_id(struct cpu_topology *thiscpu)
        return -1;
 }
 
-int get_thread_siblings(struct cpu_topology *thiscpu)
+static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
 {
-       char path[80], character;
-       FILE *filep;
-       unsigned long map;
-       int so, shift, sib_core;
-       int cpu = thiscpu->logical_cpu_id;
-       int offset = topo.max_cpu_num + 1;
-       size_t size;
-       int thread_id = 0;
+       unsigned int start, end;
+       char *next = cpu_str;
 
-       thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
-       if (thiscpu->thread_id < 0)
-               thiscpu->thread_id = thread_id++;
-       if (!thiscpu->put_ids)
-               return -1;
+       while (next && *next) {
 
-       size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
-       CPU_ZERO_S(size, thiscpu->put_ids);
+               if (*next == '-')       /* no negative cpu numbers */
+                       return 1;
 
-       sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
-       filep = fopen(path, "r");
+               start = strtoul(next, &next, 10);
 
-       if (!filep) {
-               warnx("%s: open failed", path);
-               return -1;
-       }
-       do {
-               offset -= BITMASK_SIZE;
-               if (fscanf(filep, "%lx%c", &map, &character) != 2)
-                       err(1, "%s: failed to parse file", path);
-               for (shift = 0; shift < BITMASK_SIZE; shift++) {
-                       if ((map >> shift) & 0x1) {
-                               so = shift + offset;
-                               sib_core = get_core_id(so);
-                               if (sib_core == thiscpu->physical_core_id) {
-                                       CPU_SET_S(so, size, thiscpu->put_ids);
-                                       if ((so != cpu) && (cpus[so].thread_id < 0))
-                                               cpus[so].thread_id = thread_id++;
-                               }
-                       }
-               }
-       } while (character == ',');
+               if (start >= CPU_SUBSET_MAXCPUS)
+                       return 1;
+               CPU_SET_S(start, cpu_set_size, cpu_set);
+
+               if (*next == '\0' || *next == '\n')
+                       break;
+
+               if (*next == ',') {
+                       next += 1;
+                       continue;
+               }
+
+               if (*next == '-') {
+                       next += 1;      /* start range */
+               } else if (*next == '.') {
+                       next += 1;
+                       if (*next == '.')
+                               next += 1;      /* start range */
+                       else
+                               return 1;
+               }
+
+               end = strtoul(next, &next, 10);
+               if (end <= start)
+                       return 1;
+
+               while (++start <= end) {
+                       if (start >= CPU_SUBSET_MAXCPUS)
+                               return 1;
+                       CPU_SET_S(start, cpu_set_size, cpu_set);
+               }
+
+               if (*next == ',')
+                       next += 1;
+               else if (*next != '\0' && *next != '\n')
+                       return 1;
+       }
+
+       return 0;
+}
+
+int get_thread_siblings(struct cpu_topology *thiscpu)
+{
+       char path[80], character;
+       FILE *filep;
+       unsigned long map;
+       int so, shift, sib_core;
+       int cpu = thiscpu->logical_cpu_id;
+       int offset = topo.max_cpu_num + 1;
+       size_t size;
+       int thread_id = 0;
+
+       thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
+       if (thiscpu->thread_id < 0)
+               thiscpu->thread_id = thread_id++;
+       if (!thiscpu->put_ids)
+               return -1;
+
+       size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+       CPU_ZERO_S(size, thiscpu->put_ids);
+
+       sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
+       filep = fopen(path, "r");
+
+       if (!filep) {
+               warnx("%s: open failed", path);
+               return -1;
+       }
+       do {
+               offset -= BITMASK_SIZE;
+               if (fscanf(filep, "%lx%c", &map, &character) != 2)
+                       err(1, "%s: failed to parse file", path);
+               for (shift = 0; shift < BITMASK_SIZE; shift++) {
+                       if ((map >> shift) & 0x1) {
+                               so = shift + offset;
+                               sib_core = get_core_id(so);
+                               if (sib_core == thiscpu->physical_core_id) {
+                                       CPU_SET_S(so, size, thiscpu->put_ids);
+                                       if ((so != cpu) && (cpus[so].thread_id < 0))
+                                               cpus[so].thread_id = thread_id++;
+                               }
+                       }
+               }
+       } while (character == ',');
        fclose(filep);
 
        return CPU_COUNT_S(size, thiscpu->put_ids);
@@ -2998,7 +3733,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
 
                                        t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
 
-                                       if (cpu_is_not_present(t->cpu_id))
+                                       if (cpu_is_not_allowed(t->cpu_id))
                                                continue;
 
                                        t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
@@ -3050,11 +3785,51 @@ int for_all_proc_cpus(int (func) (int))
        return 0;
 }
 
+#define PATH_EFFECTIVE_CPUS    "/sys/fs/cgroup/cpuset.cpus.effective"
+
+static char cpu_effective_str[1024];
+
+static int update_effective_str(bool startup)
+{
+       FILE *fp;
+       char *pos;
+       char buf[1024];
+       int ret;
+
+       if (cpu_effective_str[0] == '\0' && !startup)
+               return 0;
+
+       fp = fopen(PATH_EFFECTIVE_CPUS, "r");
+       if (!fp)
+               return 0;
+
+       pos = fgets(buf, 1024, fp);
+       if (!pos)
+               err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS);
+
+       fclose(fp);
+
+       ret = strncmp(cpu_effective_str, buf, 1024);
+       if (!ret)
+               return 0;
+
+       strncpy(cpu_effective_str, buf, 1024);
+       return 1;
+}
+
+static void update_effective_set(bool startup)
+{
+       update_effective_str(startup);
+
+       if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
+               err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
+}
+
 void re_initialize(void)
 {
        free_all_buffers();
-       setup_all_buffers();
-       fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
+       setup_all_buffers(false);
+       fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
 }
 
 void set_max_cpu_num(void)
@@ -3191,8 +3966,8 @@ int snapshot_gfx_rc6_ms(void)
 /*
  * snapshot_gfx_mhz()
  *
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
  *
  * return 1 if config change requires a restart, else return 0
  */
@@ -3201,9 +3976,11 @@ int snapshot_gfx_mhz(void)
        static FILE *fp;
        int retval;
 
-       if (fp == NULL)
-               fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
-       else {
+       if (fp == NULL) {
+               fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
+               if (!fp)
+                       fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
+       } else {
                rewind(fp);
                fflush(fp);
        }
@@ -3218,8 +3995,8 @@ int snapshot_gfx_mhz(void)
 /*
  * snapshot_gfx_cur_mhz()
  *
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
  *
  * return 1 if config change requires a restart, else return 0
  */
@@ -3228,9 +4005,11 @@ int snapshot_gfx_act_mhz(void)
        static FILE *fp;
        int retval;
 
-       if (fp == NULL)
-               fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
-       else {
+       if (fp == NULL) {
+               fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
+               if (!fp)
+                       fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
+       } else {
                rewind(fp);
                fflush(fp);
        }
@@ -3562,6 +4341,10 @@ restart:
                        re_initialize();
                        goto restart;
                }
+               if (update_effective_str(false)) {
+                       re_initialize();
+                       goto restart;
+               }
                do_sleep();
                if (snapshot_proc_sysfs_files())
                        goto restart;
@@ -3674,500 +4457,133 @@ void check_permissions(void)
                exit(-6);
 }
 
-/*
- * NHM adds support for additional MSRs:
- *
- * MSR_SMI_COUNT                   0x00000034
- *
- * MSR_PLATFORM_INFO               0x000000ce
- * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
- *
- * MSR_MISC_PWR_MGMT               0x000001aa
- *
- * MSR_PKG_C3_RESIDENCY            0x000003f8
- * MSR_PKG_C6_RESIDENCY            0x000003f9
- * MSR_CORE_C3_RESIDENCY           0x000003fc
- * MSR_CORE_C6_RESIDENCY           0x000003fd
- *
- * Side effect:
- * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
- * sets has_misc_feature_control
- */
-int probe_nhm_msrs(unsigned int family, unsigned int model)
+void probe_bclk(void)
 {
        unsigned long long msr;
        unsigned int base_ratio;
-       int *pkg_cstate_limits;
 
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       bclk = discover_bclk(family, model);
+       if (!platform->has_nhm_msrs)
+               return;
 
-       switch (model) {
-       case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
-       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
-               pkg_cstate_limits = nhm_pkg_cstate_limits;
-               break;
-       case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
-       case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
-       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
-       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
-               pkg_cstate_limits = snb_pkg_cstate_limits;
-               has_misc_feature_control = 1;
-               break;
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_BROADWELL_G:    /* BDW */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-               pkg_cstate_limits = hsw_pkg_cstate_limits;
-               has_misc_feature_control = 1;
-               break;
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
-               pkg_cstate_limits = skx_pkg_cstate_limits;
-               has_misc_feature_control = 1;
-               break;
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-               pkg_cstate_limits = icx_pkg_cstate_limits;
-               has_misc_feature_control = 1;
-               break;
-       case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
-               no_MSR_MISC_PWR_MGMT = 1;
-               /* FALLTHRU */
-       case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
-               pkg_cstate_limits = slv_pkg_cstate_limits;
-               break;
-       case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
-               pkg_cstate_limits = amt_pkg_cstate_limits;
-               no_MSR_MISC_PWR_MGMT = 1;
-               break;
-       case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
-               pkg_cstate_limits = phi_pkg_cstate_limits;
-               break;
-       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-       case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-       case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
-       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
-       case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
-               pkg_cstate_limits = glm_pkg_cstate_limits;
-               break;
-       default:
-               return 0;
-       }
-       get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
-       pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+       if (platform->bclk_freq == BCLK_100MHZ)
+               bclk = 100.00;
+       else if (platform->bclk_freq == BCLK_133MHZ)
+               bclk = 133.33;
+       else if (platform->bclk_freq == BCLK_SLV)
+               bclk = slm_bclk();
+       else
+               return;
 
        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
        base_ratio = (msr >> 8) & 0xFF;
 
        base_hz = base_ratio * bclk * 1000000;
        has_base_hz = 1;
-       return 1;
-}
 
-/*
- * SLV client has support for unique MSRs:
- *
- * MSR_CC6_DEMOTION_POLICY_CONFIG
- * MSR_MC6_DEMOTION_POLICY_CONFIG
- */
+       if (platform->enable_tsc_tweak)
+               tsc_tweak = base_hz / tsc_hz;
+}
 
-int has_slv_msrs(unsigned int family, unsigned int model)
+static void remove_underbar(char *s)
 {
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
+       char *to = s;
 
-       switch (model) {
-       case INTEL_FAM6_ATOM_SILVERMONT:
-       case INTEL_FAM6_ATOM_SILVERMONT_MID:
-       case INTEL_FAM6_ATOM_AIRMONT_MID:
-               return 1;
+       while (*s) {
+               if (*s != '_')
+                       *to++ = *s;
+               s++;
        }
-       return 0;
+
+       *to = 0;
 }
 
-int is_dnv(unsigned int family, unsigned int model)
+static void dump_turbo_ratio_info(void)
 {
+       if (!has_turbo)
+               return;
 
-       if (!genuine_intel)
-               return 0;
+       if (!platform->has_nhm_msrs)
+               return;
 
-       if (family != 6)
-               return 0;
+       if (platform->trl_msrs & TRL_LIMIT2)
+               dump_turbo_ratio_limit2();
 
-       switch (model) {
-       case INTEL_FAM6_ATOM_GOLDMONT_D:
-               return 1;
-       }
-       return 0;
-}
+       if (platform->trl_msrs & TRL_LIMIT1)
+               dump_turbo_ratio_limit1();
 
-int is_bdx(unsigned int family, unsigned int model)
-{
+       if (platform->trl_msrs & TRL_BASE) {
+               dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
 
-       if (!genuine_intel)
-               return 0;
+               if (is_hybrid)
+                       dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
+       }
 
-       if (family != 6)
-               return 0;
+       if (platform->trl_msrs & TRL_ATOM)
+               dump_atom_turbo_ratio_limits();
 
-       switch (model) {
-       case INTEL_FAM6_BROADWELL_X:
-               return 1;
-       }
-       return 0;
+       if (platform->trl_msrs & TRL_KNL)
+               dump_knl_turbo_ratio_limits();
+
+       if (platform->has_config_tdp)
+               dump_config_tdp();
 }
 
-int is_skx(unsigned int family, unsigned int model)
+static int read_sysfs_int(char *path)
 {
+       FILE *input;
+       int retval = -1;
 
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_SKYLAKE_X:
-               return 1;
+       input = fopen(path, "r");
+       if (input == NULL) {
+               if (debug)
+                       fprintf(outf, "NSFOD %s\n", path);
+               return (-1);
        }
-       return 0;
+       if (fscanf(input, "%d", &retval) != 1)
+               err(1, "%s: failed to read int from file", path);
+       fclose(input);
+
+       return (retval);
 }
 
-int is_icx(unsigned int family, unsigned int model)
+static void dump_sysfs_file(char *path)
 {
+       FILE *input;
+       char cpuidle_buf[64];
 
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ICELAKE_X:
-               return 1;
+       input = fopen(path, "r");
+       if (input == NULL) {
+               if (debug)
+                       fprintf(outf, "NSFOD %s\n", path);
+               return;
        }
-       return 0;
+       if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
+               err(1, "%s: failed to read file", path);
+       fclose(input);
+
+       fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
 }
 
-int is_spr(unsigned int family, unsigned int model)
+static void probe_intel_uncore_frequency(void)
 {
+       int i, j;
+       char path[128];
 
        if (!genuine_intel)
-               return 0;
+               return;
 
-       if (family != 6)
-               return 0;
+       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
+               return;
 
-       switch (model) {
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:
-               return 1;
-       }
-       return 0;
-}
+       /* Cluster level sysfs not supported yet. */
+       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
+               return;
 
-int is_ehl(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
+       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+               BIC_PRESENT(BIC_UNCORE_MHZ);
 
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_TREMONT:
-               return 1;
-       }
-       return 0;
-}
-
-int is_jvl(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_TREMONT_D:
-               return 1;
-       }
-       return 0;
-}
-
-int has_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (has_slv_msrs(family, model))
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-               /* Nehalem compatible, but do not include turbo-ratio limit support */
-       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
-       case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
-               return 0;
-       default:
-               return 1;
-       }
-}
-
-int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (has_slv_msrs(family, model))
-               return 1;
-
-       return 0;
-}
-
-int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
-       case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_GOLDMONT:
-       case INTEL_FAM6_SKYLAKE_X:
-       case INTEL_FAM6_ICELAKE_X:
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-int has_config_tdp(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_BROADWELL_G:    /* BDW */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
-       case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-/*
- * tcc_offset_bits:
- * 0: Tcc Offset not supported (Default)
- * 6: Bit 29:24 of MSR_PLATFORM_INFO
- * 4: Bit 27:24 of MSR_PLATFORM_INFO
- */
-void check_tcc_offset(int model)
-{
-       unsigned long long msr;
-
-       if (!genuine_intel)
-               return;
-
-       switch (model) {
-       case INTEL_FAM6_SKYLAKE_L:
-       case INTEL_FAM6_SKYLAKE:
-       case INTEL_FAM6_KABYLAKE_L:
-       case INTEL_FAM6_KABYLAKE:
-       case INTEL_FAM6_ICELAKE_L:
-       case INTEL_FAM6_ICELAKE:
-       case INTEL_FAM6_TIGERLAKE_L:
-       case INTEL_FAM6_TIGERLAKE:
-       case INTEL_FAM6_COMETLAKE:
-               if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
-                       msr = (msr >> 30) & 1;
-                       if (msr)
-                               tcc_offset_bits = 6;
-               }
-               return;
-       default:
-               return;
-       }
-}
-
-static void remove_underbar(char *s)
-{
-       char *to = s;
-
-       while (*s) {
-               if (*s != '_')
-                       *to++ = *s;
-               s++;
-       }
-
-       *to = 0;
-}
-
-static void dump_turbo_ratio_info(unsigned int family, unsigned int model)
-{
-       if (!has_turbo)
-               return;
-
-       if (has_hsw_turbo_ratio_limit(family, model))
-               dump_hsw_turbo_ratio_limits();
-
-       if (has_ivt_turbo_ratio_limit(family, model))
-               dump_ivt_turbo_ratio_limits();
-
-       if (has_turbo_ratio_limit(family, model)) {
-               dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
-
-               if (is_hybrid)
-                       dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
-       }
-
-       if (has_atom_turbo_ratio_limit(family, model))
-               dump_atom_turbo_ratio_limits();
-
-       if (has_knl_turbo_ratio_limit(family, model))
-               dump_knl_turbo_ratio_limits();
-
-       if (has_config_tdp(family, model))
-               dump_config_tdp();
-}
-
-static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
-{
-       if (!do_nhm_platform_info)
-               return;
-
-       dump_nhm_platform_info();
-       dump_turbo_ratio_info(family, model);
-       dump_nhm_cst_cfg();
-}
-
-static int read_sysfs_int(char *path)
-{
-       FILE *input;
-       int retval = -1;
-
-       input = fopen(path, "r");
-       if (input == NULL) {
-               if (debug)
-                       fprintf(outf, "NSFOD %s\n", path);
-               return (-1);
-       }
-       if (fscanf(input, "%d", &retval) != 1)
-               err(1, "%s: failed to read int from file", path);
-       fclose(input);
-
-       return (retval);
-}
-
-static void dump_sysfs_file(char *path)
-{
-       FILE *input;
-       char cpuidle_buf[64];
-
-       input = fopen(path, "r");
-       if (input == NULL) {
-               if (debug)
-                       fprintf(outf, "NSFOD %s\n", path);
-               return;
-       }
-       if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
-               err(1, "%s: failed to read file", path);
-       fclose(input);
-
-       fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
-}
-
-static void intel_uncore_frequency_probe(void)
-{
-       int i, j;
-       char path[128];
-
-       if (!genuine_intel)
-               return;
-
-       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
-               return;
-
-       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
-               BIC_PRESENT(BIC_UNCORE_MHZ);
-
-       if (quiet)
-               return;
+       if (quiet)
+               return;
 
        for (i = 0; i < topo.num_packages; ++i) {
                for (j = 0; j < topo.num_die; ++j) {
@@ -4194,6 +4610,20 @@ static void intel_uncore_frequency_probe(void)
        }
 }
 
+static void probe_graphics(void)
+{
+       if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+               BIC_PRESENT(BIC_GFX_rc6);
+
+       if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
+           !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+               BIC_PRESENT(BIC_GFXMHz);
+
+       if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
+           !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+               BIC_PRESENT(BIC_GFXACTMHz);
+}
+
 static void dump_sysfs_cstate_config(void)
 {
        char path[64];
@@ -4310,7 +4740,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        cpu = t->cpu_id;
 
        /* EPB is per-package */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        if (cpu_migrate(cpu)) {
@@ -4359,7 +4789,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        cpu = t->cpu_id;
 
        /* MSR_HWP_CAPABILITIES is per-package */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        if (cpu_migrate(cpu)) {
@@ -4442,7 +4872,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
        cpu = t->cpu_id;
 
        /* per-package */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        if (cpu_migrate(cpu)) {
@@ -4450,7 +4880,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
                return -1;
        }
 
-       if (do_core_perf_limit_reasons) {
+       if (platform->plr_msrs & PLR_CORE) {
                get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
                fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
                fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
@@ -4483,7 +4913,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
                        (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
 
        }
-       if (do_gfx_perf_limit_reasons) {
+       if (platform->plr_msrs & PLR_GFX) {
                get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
                fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
                fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
@@ -4503,7 +4933,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
                        (msr & 1 << 25) ? "GFXPwr, " : "",
                        (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
        }
-       if (do_ring_perf_limit_reasons) {
+       if (platform->plr_msrs & PLR_RING) {
                get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
                fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
                fprintf(outf, " (Active: %s%s%s%s%s%s)",
@@ -4525,208 +4955,74 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define        RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
 #define        RAPL_TIME_GRANULARITY   0x3F    /* 6 bit time granularity */
 
-double get_tdp_intel(unsigned int model)
+double get_quirk_tdp(void)
 {
-       unsigned long long msr;
-
-       if (do_rapl & RAPL_PKG_POWER_INFO)
-               if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
-                       return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+       if (platform->rapl_quirk_tdp)
+               return platform->rapl_quirk_tdp;
 
-       switch (model) {
-       case INTEL_FAM6_ATOM_SILVERMONT:
-       case INTEL_FAM6_ATOM_SILVERMONT_D:
-               return 30.0;
-       default:
-               return 135.0;
-       }
+       return 135.0;
 }
 
-double get_tdp_amd(unsigned int family)
+double get_tdp_intel(void)
 {
-       UNUSED(family);
+       unsigned long long msr;
 
-       /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
-       return 280.0;
+       if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
+               if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
+                       return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+       return get_quirk_tdp();
 }
 
-/*
- * rapl_dram_energy_units_probe()
- * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
- */
-static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+double get_tdp_amd(void)
 {
-       /* only called for genuine_intel, family 6 */
-
-       switch (model) {
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-               return (rapl_dram_energy_units = 15.3 / 1000000);
-       default:
-               return (rapl_energy_units);
-       }
+       return get_quirk_tdp();
 }
 
-void rapl_probe_intel(unsigned int family, unsigned int model)
+void rapl_probe_intel(void)
 {
        unsigned long long msr;
        unsigned int time_unit;
        double tdp;
 
-       if (family != 6)
-               return;
-
-       switch (model) {
-       case INTEL_FAM6_SANDYBRIDGE:
-       case INTEL_FAM6_IVYBRIDGE:
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_BROADWELL_G:    /* BDW */
-               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-                       BIC_PRESENT(BIC_GFX_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-                       BIC_PRESENT(BIC_GFXWatt);
-               }
-               break;
-       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-       case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-               do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
-               if (rapl_joules)
-                       BIC_PRESENT(BIC_Pkg_J);
-               else
-                       BIC_PRESENT(BIC_PkgWatt);
-               break;
-       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
-               do_rapl =
-                   RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
-                   | RAPL_GFX | RAPL_PKG_POWER_INFO;
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-                       BIC_PRESENT(BIC_RAM_J);
-                       BIC_PRESENT(BIC_GFX_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-                       BIC_PRESENT(BIC_RAMWatt);
-                       BIC_PRESENT(BIC_GFXWatt);
-               }
-               break;
-       case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
-               do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
-               BIC_PRESENT(BIC_PKG__);
-               if (rapl_joules)
-                       BIC_PRESENT(BIC_Pkg_J);
-               else
-                       BIC_PRESENT(BIC_PkgWatt);
-               break;
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-               do_rapl =
-                   RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
-                   | RAPL_GFX | RAPL_PKG_POWER_INFO;
-               BIC_PRESENT(BIC_PKG__);
-               BIC_PRESENT(BIC_RAM__);
-               if (rapl_joules) {
+       if (rapl_joules) {
+               if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
                        BIC_PRESENT(BIC_Pkg_J);
+               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
                        BIC_PRESENT(BIC_Cor_J);
+               if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
                        BIC_PRESENT(BIC_RAM_J);
+               if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
                        BIC_PRESENT(BIC_GFX_J);
-               } else {
+       } else {
+               if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
                        BIC_PRESENT(BIC_PkgWatt);
+               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
                        BIC_PRESENT(BIC_CorWatt);
+               if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
                        BIC_PRESENT(BIC_RAMWatt);
+               if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
                        BIC_PRESENT(BIC_GFXWatt);
-               }
-               break;
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
-       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
-               do_rapl =
-                   RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
-                   RAPL_PKG_POWER_INFO;
+       }
+
+       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
                BIC_PRESENT(BIC_PKG__);
+       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
                BIC_PRESENT(BIC_RAM__);
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_RAM_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_RAMWatt);
-               }
-               break;
-       case INTEL_FAM6_SANDYBRIDGE_X:
-       case INTEL_FAM6_IVYBRIDGE_X:
-               do_rapl =
-                   RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
-                   RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
-               BIC_PRESENT(BIC_PKG__);
-               BIC_PRESENT(BIC_RAM__);
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-                       BIC_PRESENT(BIC_RAM_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-                       BIC_PRESENT(BIC_RAMWatt);
-               }
-               break;
-       case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
-       case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
-               do_rapl = RAPL_PKG | RAPL_CORES;
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-               }
-               break;
-       case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
-               do_rapl =
-                   RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
-                   RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
-               BIC_PRESENT(BIC_PKG__);
-               BIC_PRESENT(BIC_RAM__);
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-                       BIC_PRESENT(BIC_RAM_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-                       BIC_PRESENT(BIC_RAMWatt);
-               }
-               break;
-       default:
-               return;
-       }
 
        /* units on package 0, verify later other packages match */
        if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
                return;
 
        rapl_power_units = 1.0 / (1 << (msr & 0xF));
-       if (model == INTEL_FAM6_ATOM_SILVERMONT)
+       if (platform->has_rapl_divisor)
                rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
        else
                rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
 
-       rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+       if (platform->has_fixed_rapl_unit)
+               rapl_dram_energy_units = (15.3 / 1000000);
+       else
+               rapl_dram_energy_units = rapl_energy_units;
 
        time_unit = msr >> 16 & 0xF;
        if (time_unit == 0)
@@ -4734,32 +5030,18 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
 
        rapl_time_units = 1.0 / (1 << (time_unit));
 
-       tdp = get_tdp_intel(model);
+       tdp = get_tdp_intel();
 
        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
        if (!quiet)
                fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 }
 
-void rapl_probe_amd(unsigned int family, unsigned int model)
+void rapl_probe_amd(void)
 {
        unsigned long long msr;
-       unsigned int eax, ebx, ecx, edx;
-       unsigned int has_rapl = 0;
        double tdp;
 
-       UNUSED(model);
-
-       if (max_extended_level >= 0x80000007) {
-               __cpuid(0x80000007, eax, ebx, ecx, edx);
-               /* RAPL (Fam 17h+) */
-               has_rapl = edx & (1 << 14);
-       }
-
-       if (!has_rapl || family < 0x17)
-               return;
-
-       do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
        if (rapl_joules) {
                BIC_PRESENT(BIC_Pkg_J);
                BIC_PRESENT(BIC_Cor_J);
@@ -4775,128 +5057,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
        rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
        rapl_power_units = ldexp(1.0, -(msr & 0xf));
 
-       tdp = get_tdp_amd(family);
+       tdp = get_tdp_amd();
 
        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
        if (!quiet)
                fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 }
 
-/*
- * rapl_probe()
- *
- * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
- */
-void rapl_probe(unsigned int family, unsigned int model)
-{
-       if (genuine_intel)
-               rapl_probe_intel(family, model);
-       if (authentic_amd || hygon_genuine)
-               rapl_probe_amd(family, model);
-}
-
-void perf_limit_reasons_probe(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return;
-
-       if (family != 6)
-               return;
-
-       switch (model) {
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-               do_gfx_perf_limit_reasons = 1;
-               /* FALLTHRU */
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-               do_core_perf_limit_reasons = 1;
-               do_ring_perf_limit_reasons = 1;
-       default:
-               return;
-       }
-}
-
-void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
-{
-       if (family != 6)
-               return;
-
-       switch (model) {
-       case INTEL_FAM6_BROADWELL_X:
-       case INTEL_FAM6_SKYLAKE_X:
-               has_automatic_cstate_conversion = 1;
-       }
-}
-
-void prewake_cstate_probe(unsigned int family, unsigned int model)
-{
-       if (is_icx(family, model) || is_spr(family, model))
-               dis_cstate_prewake = 1;
-}
-
-int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
-       unsigned long long msr;
-       unsigned int dts, dts2;
-       int cpu;
-
-       UNUSED(c);
-       UNUSED(p);
-
-       if (!(do_dts || do_ptm))
-               return 0;
-
-       cpu = t->cpu_id;
-
-       /* DTS is per-core, no need to print for each thread */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
-               return 0;
-
-       if (cpu_migrate(cpu)) {
-               fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
-               return -1;
-       }
-
-       if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
-               if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
-                       return 0;
-
-               dts = (msr >> 16) & 0x7F;
-               fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
-
-               if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
-                       return 0;
-
-               dts = (msr >> 16) & 0x7F;
-               dts2 = (msr >> 8) & 0x7F;
-               fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
-                       cpu, msr, tj_max - dts, tj_max - dts2);
-       }
-
-       if (do_dts && debug) {
-               unsigned int resolution;
-
-               if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
-                       return 0;
-
-               dts = (msr >> 16) & 0x7F;
-               resolution = (msr >> 27) & 0xF;
-               fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
-                       cpu, msr, tj_max - dts, resolution);
-
-               if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
-                       return 0;
-
-               dts = (msr >> 16) & 0x7F;
-               dts2 = (msr >> 8) & 0x7F;
-               fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
-                       cpu, msr, tj_max - dts, tj_max - dts2);
-       }
-
-       return 0;
-}
-
 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
 {
        fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
@@ -4918,11 +5085,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        UNUSED(c);
        UNUSED(p);
 
-       if (!do_rapl)
+       if (!platform->rapl_msrs)
                return 0;
 
        /* RAPL counters are per package, so print only for 1st thread/package */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        cpu = t->cpu_id;
@@ -4931,7 +5098,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                return -1;
        }
 
-       if (do_rapl & RAPL_AMD_F17H) {
+       if (platform->rapl_msrs & RAPL_AMD_F17H) {
                msr_name = "MSR_RAPL_PWR_UNIT";
                if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
                        return -1;
@@ -4944,7 +5111,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
                rapl_power_units, rapl_energy_units, rapl_time_units);
 
-       if (do_rapl & RAPL_PKG_POWER_INFO) {
+       if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
 
                if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
                        return -5;
@@ -4957,7 +5124,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
 
        }
-       if (do_rapl & RAPL_PKG) {
+       if (platform->rapl_msrs & RAPL_PKG) {
 
                if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
                        return -9;
@@ -4981,7 +5148,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
        }
 
-       if (do_rapl & RAPL_DRAM_POWER_INFO) {
+       if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
                if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
                        return -6;
 
@@ -4992,7 +5159,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
        }
-       if (do_rapl & RAPL_DRAM) {
+       if (platform->rapl_msrs & RAPL_DRAM) {
                if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
                        return -9;
                fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -5000,20 +5167,20 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
                print_power_limit_msr(cpu, msr, "DRAM Limit");
        }
-       if (do_rapl & RAPL_CORE_POLICY) {
+       if (platform->rapl_msrs & RAPL_CORE_POLICY) {
                if (get_msr(cpu, MSR_PP0_POLICY, &msr))
                        return -7;
 
                fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
        }
-       if (do_rapl & RAPL_CORES_POWER_LIMIT) {
+       if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
                if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
                        return -9;
                fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
                        cpu, msr, (msr >> 31) & 1 ? "" : "UN");
                print_power_limit_msr(cpu, msr, "Cores Limit");
        }
-       if (do_rapl & RAPL_GFX) {
+       if (platform->rapl_msrs & RAPL_GFX) {
                if (get_msr(cpu, MSR_PP1_POLICY, &msr))
                        return -8;
 
@@ -5029,217 +5196,24 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 }
 
 /*
- * SNB adds support for additional MSRs:
- *
- * MSR_PKG_C7_RESIDENCY            0x000003fa
- * MSR_CORE_C7_RESIDENCY           0x000003fe
- * MSR_PKG_C2_RESIDENCY            0x0000060d
- */
-
-int has_snb_msrs(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_SANDYBRIDGE:
-       case INTEL_FAM6_SANDYBRIDGE_X:
-       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
-       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_X:      /* HSW */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_BROADWELL_G:    /* BDW */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
-       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-       case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-       case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
-       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
-       case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
-               return 1;
-       }
-       return 0;
-}
-
-/*
- * HSW ULT added support for C8/C9/C10 MSRs:
- *
- * MSR_PKG_C8_RESIDENCY                0x00000630
- * MSR_PKG_C9_RESIDENCY                0x00000631
- * MSR_PKG_C10_RESIDENCY       0x00000632
- *
- * MSR_PKGC8_IRTL              0x00000633
- * MSR_PKGC9_IRTL              0x00000634
- * MSR_PKGC10_IRTL             0x00000635
- *
- */
-int has_c8910_msrs(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-       case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
-               return 1;
-       }
-       return 0;
-}
-
-/*
- * SKL adds support for additional MSRS:
+ * probe_rapl()
  *
- * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
- * MSR_PKG_ANY_CORE_C0_RES         0x00000659
- * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
- * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
+ * sets rapl_power_units, rapl_energy_units, rapl_time_units
  */
-int has_skl_msrs(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-               return 1;
-       }
-       return 0;
-}
-
-int is_slm(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
-       case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
-               return 1;
-       }
-       return 0;
-}
-
-int is_knl(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
-               return 1;
-       }
-       return 0;
-}
-
-int is_cnl(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-               return 1;
-       }
-
-       return 0;
-}
-
-unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
-{
-       if (is_knl(family, model))
-               return 1024;
-       return 1;
-}
-
-#define SLM_BCLK_FREQS 5
-double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
-
-double slm_bclk(void)
-{
-       unsigned long long msr = 3;
-       unsigned int i;
-       double freq;
-
-       if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
-               fprintf(outf, "SLM BCLK: unknown\n");
-
-       i = msr & 0xf;
-       if (i >= SLM_BCLK_FREQS) {
-               fprintf(outf, "SLM BCLK[%d] invalid\n", i);
-               i = 3;
-       }
-       freq = slm_freq_table[i];
-
-       if (!quiet)
-               fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
-
-       return freq;
-}
-
-double discover_bclk(unsigned int family, unsigned int model)
+void probe_rapl(void)
 {
-       if (has_snb_msrs(family, model) || is_knl(family, model))
-               return 100.00;
-       else if (is_slm(family, model))
-               return slm_bclk();
-       else
-               return 133.33;
-}
-
-int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
-       unsigned int eax, ebx, ecx, edx;
-
-       UNUSED(c);
-       UNUSED(p);
-
-       if (!genuine_intel)
-               return 0;
+       if (!platform->rapl_msrs)
+               return;
 
-       if (cpu_migrate(t->cpu_id)) {
-               fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
-               return -1;
-       }
+       if (genuine_intel)
+               rapl_probe_intel();
+       if (authentic_amd || hygon_genuine)
+               rapl_probe_amd();
 
-       if (max_level < 0x1a)
-               return 0;
+       if (quiet)
+               return;
 
-       __cpuid(0x1a, eax, ebx, ecx, edx);
-       eax = (eax >> 24) & 0xFF;
-       if (eax == 0x20)
-               t->is_atom = true;
-       return 0;
+       for_all_cpus(print_rapl, ODD_COUNTERS);
 }
 
 /*
@@ -5268,7 +5242,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
                return 0;
 
        /* this is a per-package concept */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        cpu = t->cpu_id;
@@ -5284,7 +5258,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        }
 
        /* Temperature Target MSR is Nehalem and newer only */
-       if (!do_nhm_platform_info)
+       if (!platform->has_nhm_msrs)
                goto guess;
 
        if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5293,34 +5267,134 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        tcc_default = (msr >> 16) & 0xFF;
 
        if (!quiet) {
-               switch (tcc_offset_bits) {
-               case 4:
-                       tcc_offset = (msr >> 24) & 0xF;
-                       fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
-                               cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
-                       break;
-               case 6:
-                       tcc_offset = (msr >> 24) & 0x3F;
+               int bits = platform->tcc_offset_bits;
+               unsigned long long enabled = 0;
+
+               if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled))
+                       enabled = (enabled >> 30) & 1;
+
+               if (bits && enabled) {
+                       tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
                        fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
                                cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
-                       break;
-               default:
+               } else {
                        fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
-                       break;
                }
        }
 
-       if (!tcc_default)
-               goto guess;
+       if (!tcc_default)
+               goto guess;
+
+       tj_max = tcc_default;
+
+       return 0;
+
+guess:
+       tj_max = TJMAX_DEFAULT;
+       fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
+
+       return 0;
+}
+
+int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       unsigned long long msr;
+       unsigned int dts, dts2;
+       int cpu;
+
+       UNUSED(c);
+       UNUSED(p);
+
+       if (!(do_dts || do_ptm))
+               return 0;
+
+       cpu = t->cpu_id;
+
+       /* DTS is per-core, no need to print for each thread */
+       if (!is_cpu_first_thread_in_core(t, c, p))
+               return 0;
+
+       if (cpu_migrate(cpu)) {
+               fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
+               return -1;
+       }
+
+       if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
+               if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+                       return 0;
+
+               dts = (msr >> 16) & 0x7F;
+               fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
+
+               if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
+                       return 0;
+
+               dts = (msr >> 16) & 0x7F;
+               dts2 = (msr >> 8) & 0x7F;
+               fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+                       cpu, msr, tj_max - dts, tj_max - dts2);
+       }
+
+       if (do_dts && debug) {
+               unsigned int resolution;
+
+               if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+                       return 0;
+
+               dts = (msr >> 16) & 0x7F;
+               resolution = (msr >> 27) & 0xF;
+               fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+                       cpu, msr, tj_max - dts, resolution);
+
+               if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
+                       return 0;
+
+               dts = (msr >> 16) & 0x7F;
+               dts2 = (msr >> 8) & 0x7F;
+               fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+                       cpu, msr, tj_max - dts, tj_max - dts2);
+       }
+
+       return 0;
+}
+
+void probe_thermal(void)
+{
+       if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
+               BIC_PRESENT(BIC_CORE_THROT_CNT);
+       else
+               BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+
+       for_all_cpus(set_temperature_target, ODD_COUNTERS);
+
+       if (quiet)
+               return;
+
+       for_all_cpus(print_thermal, ODD_COUNTERS);
+}
+
+int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       UNUSED(c);
+       UNUSED(p);
 
-       tj_max = tcc_default;
+       if (!genuine_intel)
+               return 0;
 
-       return 0;
+       if (cpu_migrate(t->cpu_id)) {
+               fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
+               return -1;
+       }
 
-guess:
-       tj_max = TJMAX_DEFAULT;
-       fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
+       if (max_level < 0x1a)
+               return 0;
 
+       __cpuid(0x1a, eax, ebx, ecx, edx);
+       eax = (eax >> 24) & 0xFF;
+       if (eax == 0x20)
+               t->is_atom = true;
        return 0;
 }
 
@@ -5354,7 +5428,7 @@ void decode_misc_feature_control(void)
 {
        unsigned long long msr;
 
-       if (!has_misc_feature_control)
+       if (!platform->has_msr_misc_feature_control)
                return;
 
        if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
@@ -5375,10 +5449,7 @@ void decode_misc_pwr_mgmt_msr(void)
 {
        unsigned long long msr;
 
-       if (!do_nhm_platform_info)
-               return;
-
-       if (no_MSR_MISC_PWR_MGMT)
+       if (!platform->has_msr_misc_pwr_mgmt)
                return;
 
        if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
@@ -5397,6 +5468,9 @@ void decode_c6_demotion_policy_msr(void)
 {
        unsigned long long msr;
 
+       if (!platform->has_msr_c6_demotion_policy_config)
+               return;
+
        if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
                fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
                        base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
@@ -5406,67 +5480,6 @@ void decode_c6_demotion_policy_msr(void)
                        base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
 }
 
-/*
- * When models are the same, for the purpose of turbostat, reuse
- */
-unsigned int intel_model_duplicates(unsigned int model)
-{
-
-       switch (model) {
-       case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
-       case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
-       case 0x1F:              /* Core i7 and i5 Processor - Nehalem */
-       case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
-       case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
-               return INTEL_FAM6_NEHALEM;
-
-       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
-       case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
-               return INTEL_FAM6_NEHALEM_EX;
-
-       case INTEL_FAM6_XEON_PHI_KNM:
-               return INTEL_FAM6_XEON_PHI_KNL;
-
-       case INTEL_FAM6_BROADWELL_X:
-       case INTEL_FAM6_BROADWELL_D:    /* BDX-DE */
-               return INTEL_FAM6_BROADWELL_X;
-
-       case INTEL_FAM6_SKYLAKE_L:
-       case INTEL_FAM6_SKYLAKE:
-       case INTEL_FAM6_KABYLAKE_L:
-       case INTEL_FAM6_KABYLAKE:
-       case INTEL_FAM6_COMETLAKE_L:
-       case INTEL_FAM6_COMETLAKE:
-               return INTEL_FAM6_SKYLAKE_L;
-
-       case INTEL_FAM6_ICELAKE_L:
-       case INTEL_FAM6_ICELAKE_NNPI:
-       case INTEL_FAM6_TIGERLAKE_L:
-       case INTEL_FAM6_TIGERLAKE:
-       case INTEL_FAM6_ROCKETLAKE:
-       case INTEL_FAM6_LAKEFIELD:
-       case INTEL_FAM6_ALDERLAKE:
-       case INTEL_FAM6_ALDERLAKE_L:
-       case INTEL_FAM6_ATOM_GRACEMONT:
-       case INTEL_FAM6_RAPTORLAKE:
-       case INTEL_FAM6_RAPTORLAKE_P:
-       case INTEL_FAM6_RAPTORLAKE_S:
-       case INTEL_FAM6_METEORLAKE:
-       case INTEL_FAM6_METEORLAKE_L:
-               return INTEL_FAM6_CANNONLAKE_L;
-
-       case INTEL_FAM6_ATOM_TREMONT_L:
-               return INTEL_FAM6_ATOM_TREMONT;
-
-       case INTEL_FAM6_ICELAKE_D:
-               return INTEL_FAM6_ICELAKE_X;
-
-       case INTEL_FAM6_EMERALDRAPIDS_X:
-               return INTEL_FAM6_SAPPHIRERAPIDS_X;
-       }
-       return model;
-}
-
 void print_dev_latency(void)
 {
        char *path = "/dev/cpu_dma_latency";
@@ -5510,6 +5523,101 @@ void linux_perf_init(void)
        BIC_PRESENT(BIC_IPC);
 }
 
+void probe_cstates(void)
+{
+       probe_cst_limit();
+
+       if (platform->supported_cstates & CC1)
+               BIC_PRESENT(BIC_CPU_c1);
+
+       if (platform->supported_cstates & CC3)
+               BIC_PRESENT(BIC_CPU_c3);
+
+       if (platform->supported_cstates & CC6)
+               BIC_PRESENT(BIC_CPU_c6);
+
+       if (platform->supported_cstates & CC7)
+               BIC_PRESENT(BIC_CPU_c7);
+
+       if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2))
+               BIC_PRESENT(BIC_Pkgpc2);
+
+       if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3))
+               BIC_PRESENT(BIC_Pkgpc3);
+
+       if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6))
+               BIC_PRESENT(BIC_Pkgpc6);
+
+       if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7))
+               BIC_PRESENT(BIC_Pkgpc7);
+
+       if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8))
+               BIC_PRESENT(BIC_Pkgpc8);
+
+       if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9))
+               BIC_PRESENT(BIC_Pkgpc9);
+
+       if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10))
+               BIC_PRESENT(BIC_Pkgpc10);
+
+       if (platform->has_msr_module_c6_res_ms)
+               BIC_PRESENT(BIC_Mod_c6);
+
+       if (platform->has_ext_cst_msrs) {
+               BIC_PRESENT(BIC_Totl_c0);
+               BIC_PRESENT(BIC_Any_c0);
+               BIC_PRESENT(BIC_GFX_c0);
+               BIC_PRESENT(BIC_CPUGFX);
+       }
+
+       if (quiet)
+               return;
+
+       dump_power_ctl();
+       dump_cst_cfg();
+       decode_c6_demotion_policy_msr();
+       print_dev_latency();
+       dump_sysfs_cstate_config();
+       print_irtl();
+}
+
+void probe_lpi(void)
+{
+       if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
+               BIC_PRESENT(BIC_CPU_LPI);
+       else
+               BIC_NOT_PRESENT(BIC_CPU_LPI);
+
+       if (!access(sys_lpi_file_sysfs, R_OK)) {
+               sys_lpi_file = sys_lpi_file_sysfs;
+               BIC_PRESENT(BIC_SYS_LPI);
+       } else if (!access(sys_lpi_file_debugfs, R_OK)) {
+               sys_lpi_file = sys_lpi_file_debugfs;
+               BIC_PRESENT(BIC_SYS_LPI);
+       } else {
+               sys_lpi_file_sysfs = NULL;
+               BIC_NOT_PRESENT(BIC_SYS_LPI);
+       }
+
+}
+
+void probe_pstates(void)
+{
+       probe_bclk();
+
+       if (quiet)
+               return;
+
+       dump_platform_info();
+       dump_turbo_ratio_info();
+       dump_sysfs_pstate_config();
+       decode_misc_pwr_mgmt_msr();
+
+       for_all_cpus(print_hwp, ODD_COUNTERS);
+       for_all_cpus(print_epb, ODD_COUNTERS);
+       for_all_cpus(print_perf_limit, ODD_COUNTERS);
+}
+
 void process_cpuid()
 {
        unsigned int eax, ebx, ecx, edx;
@@ -5569,10 +5677,8 @@ void process_cpuid()
                        edx_flags & (1 << 22) ? "ACPI-TM" : "-",
                        edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
        }
-       if (genuine_intel) {
-               model_orig = model;
-               model = intel_model_duplicates(model);
-       }
+
+       probe_platform_features(family, model);
 
        if (!(edx_flags & (1 << 5)))
                errx(1, "CPUID: no MSR");
@@ -5656,26 +5762,12 @@ void process_cpuid()
                __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
 
                if (ebx_tsc != 0) {
-
                        if (!quiet && (ebx != 0))
                                fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
                                        eax_crystal, ebx_tsc, crystal_hz);
 
                        if (crystal_hz == 0)
-                               switch (model) {
-                               case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-                                       crystal_hz = 24000000;  /* 24.0 MHz */
-                                       break;
-                               case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
-                                       crystal_hz = 25000000;  /* 25.0 MHz */
-                                       break;
-                               case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-                               case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-                                       crystal_hz = 19200000;  /* 19.2 MHz */
-                                       break;
-                               default:
-                                       crystal_hz = 0;
-                               }
+                               crystal_hz = platform->crystal_freq;
 
                        if (crystal_hz) {
                                tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
@@ -5700,147 +5792,33 @@ void process_cpuid()
        }
 
        if (has_aperf)
-               aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
+               aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
 
        BIC_PRESENT(BIC_IRQ);
        BIC_PRESENT(BIC_TSC_MHz);
+}
 
-       if (probe_nhm_msrs(family, model)) {
-               do_nhm_platform_info = 1;
-               BIC_PRESENT(BIC_CPU_c1);
-               BIC_PRESENT(BIC_CPU_c3);
-               BIC_PRESENT(BIC_CPU_c6);
-               BIC_PRESENT(BIC_SMI);
-       }
-       do_snb_cstates = has_snb_msrs(family, model);
-
-       if (do_snb_cstates)
-               BIC_PRESENT(BIC_CPU_c7);
-
-       do_irtl_snb = has_snb_msrs(family, model);
-       if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
-               BIC_PRESENT(BIC_Pkgpc2);
-       if (pkg_cstate_limit >= PCL__3)
-               BIC_PRESENT(BIC_Pkgpc3);
-       if (pkg_cstate_limit >= PCL__6)
-               BIC_PRESENT(BIC_Pkgpc6);
-       if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
-               BIC_PRESENT(BIC_Pkgpc7);
-       if (has_slv_msrs(family, model)) {
-               BIC_NOT_PRESENT(BIC_Pkgpc2);
-               BIC_NOT_PRESENT(BIC_Pkgpc3);
-               BIC_PRESENT(BIC_Pkgpc6);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-               BIC_PRESENT(BIC_Mod_c6);
-               use_c1_residency_msr = 1;
-       }
-       if (is_jvl(family, model)) {
-               BIC_NOT_PRESENT(BIC_CPU_c3);
-               BIC_NOT_PRESENT(BIC_CPU_c7);
-               BIC_NOT_PRESENT(BIC_Pkgpc2);
-               BIC_NOT_PRESENT(BIC_Pkgpc3);
-               BIC_NOT_PRESENT(BIC_Pkgpc6);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-       }
-       if (is_dnv(family, model)) {
-               BIC_PRESENT(BIC_CPU_c1);
-               BIC_NOT_PRESENT(BIC_CPU_c3);
-               BIC_NOT_PRESENT(BIC_Pkgpc3);
-               BIC_NOT_PRESENT(BIC_CPU_c7);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-               use_c1_residency_msr = 1;
-       }
-       if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
-               BIC_NOT_PRESENT(BIC_CPU_c3);
-               BIC_NOT_PRESENT(BIC_Pkgpc3);
-               BIC_NOT_PRESENT(BIC_CPU_c7);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-       }
-       if (is_bdx(family, model)) {
-               BIC_NOT_PRESENT(BIC_CPU_c7);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-       }
-       if (has_c8910_msrs(family, model)) {
-               if (pkg_cstate_limit >= PCL__8)
-                       BIC_PRESENT(BIC_Pkgpc8);
-               if (pkg_cstate_limit >= PCL__9)
-                       BIC_PRESENT(BIC_Pkgpc9);
-               if (pkg_cstate_limit >= PCL_10)
-                       BIC_PRESENT(BIC_Pkgpc10);
-       }
-       do_irtl_hsw = has_c8910_msrs(family, model);
-       if (has_skl_msrs(family, model)) {
-               BIC_PRESENT(BIC_Totl_c0);
-               BIC_PRESENT(BIC_Any_c0);
-               BIC_PRESENT(BIC_GFX_c0);
-               BIC_PRESENT(BIC_CPUGFX);
-       }
-       do_slm_cstates = is_slm(family, model);
-       do_knl_cstates = is_knl(family, model);
-
-       if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
-               BIC_NOT_PRESENT(BIC_CPU_c3);
-
-       if (!quiet)
-               decode_misc_pwr_mgmt_msr();
-
-       if (!quiet && has_slv_msrs(family, model))
-               decode_c6_demotion_policy_msr();
-
-       rapl_probe(family, model);
-       perf_limit_reasons_probe(family, model);
-       automatic_cstate_conversion_probe(family, model);
-
-       check_tcc_offset(model_orig);
-
-       if (!quiet)
-               dump_cstate_pstate_config_info(family, model);
-       intel_uncore_frequency_probe();
-
-       if (!quiet)
-               print_dev_latency();
-       if (!quiet)
-               dump_sysfs_cstate_config();
-       if (!quiet)
-               dump_sysfs_pstate_config();
+void probe_pm_features(void)
+{
+       probe_pstates();
 
-       if (has_skl_msrs(family, model) || is_ehl(family, model))
-               calculate_tsc_tweak();
+       probe_cstates();
 
-       if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
-               BIC_PRESENT(BIC_GFX_rc6);
+       probe_lpi();
 
-       if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
-               BIC_PRESENT(BIC_GFXMHz);
+       probe_intel_uncore_frequency();
 
-       if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
-               BIC_PRESENT(BIC_GFXACTMHz);
+       probe_graphics();
 
-       if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
-               BIC_PRESENT(BIC_CPU_LPI);
-       else
-               BIC_NOT_PRESENT(BIC_CPU_LPI);
+       probe_rapl();
 
-       if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
-               BIC_PRESENT(BIC_CORE_THROT_CNT);
-       else
-               BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+       probe_thermal();
 
-       if (!access(sys_lpi_file_sysfs, R_OK)) {
-               sys_lpi_file = sys_lpi_file_sysfs;
-               BIC_PRESENT(BIC_SYS_LPI);
-       } else if (!access(sys_lpi_file_debugfs, R_OK)) {
-               sys_lpi_file = sys_lpi_file_debugfs;
-               BIC_PRESENT(BIC_SYS_LPI);
-       } else {
-               sys_lpi_file_sysfs = NULL;
-               BIC_NOT_PRESENT(BIC_SYS_LPI);
-       }
+       if (platform->has_nhm_msrs)
+               BIC_PRESENT(BIC_SMI);
 
        if (!quiet)
                decode_misc_feature_control();
-
-       return;
 }
 
 /*
@@ -5855,7 +5833,7 @@ int dir_filter(const struct dirent *dirp)
                return 0;
 }
 
-void topology_probe()
+void topology_probe(bool startup)
 {
        int i;
        int max_core_id = 0;
@@ -5888,14 +5866,62 @@ void topology_probe()
        for_all_proc_cpus(mark_cpu_present);
 
        /*
-        * Validate that all cpus in cpu_subset are also in cpu_present_set
+        * Allocate and initialize cpu_effective_set
+        */
+       cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
+       if (cpu_effective_set == NULL)
+               err(3, "CPU_ALLOC");
+       cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+       CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
+       update_effective_set(startup);
+
+       /*
+        * Allocate and initialize cpu_allowed_set
+        */
+       cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
+       if (cpu_allowed_set == NULL)
+               err(3, "CPU_ALLOC");
+       cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+       CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
+
+       /*
+        * Validate and update cpu_allowed_set.
+        *
+        * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
+        * Give a warning when cpus in cpu_subset become unavailable at runtime.
+        * Give a warning when cpus are not effective because of cgroup setting.
+        *
+        * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
         */
        for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
-               if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
-                       if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
-                               err(1, "cpu%d not present", i);
+               if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+                       continue;
+
+               if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
+                       if (cpu_subset) {
+                               /* cpus in cpu_subset must be in cpu_present_set during startup */
+                               if (startup)
+                                       err(1, "cpu%d not present", i);
+                               else
+                                       fprintf(stderr, "cpu%d not present\n", i);
+                       }
+                       continue;
+               }
+
+               if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
+                       if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
+                               fprintf(stderr, "cpu%d not effective\n", i);
+                               continue;
+                       }
+               }
+
+               CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
        }
 
+       if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
+               err(-ENODEV, "No valid cpus found");
+       sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
+
        /*
         * Allocate and initialize cpu_affinity_set
         */
@@ -6009,15 +6035,19 @@ void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_
        if (*c == NULL)
                goto error;
 
-       for (i = 0; i < num_cores; i++)
+       for (i = 0; i < num_cores; i++) {
                (*c)[i].core_id = -1;
+               (*c)[i].base_cpu = -1;
+       }
 
        *p = calloc(topo.num_packages, sizeof(struct pkg_data));
        if (*p == NULL)
                goto error;
 
-       for (i = 0; i < topo.num_packages; i++)
+       for (i = 0; i < topo.num_packages; i++) {
                (*p)[i].package_id = i;
+               (*p)[i].base_cpu = -1;
+       }
 
        return;
 error:
@@ -6050,10 +6080,11 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
        p = GET_PKG(pkg_base, pkg_id);
 
        t->cpu_id = cpu_id;
-       if (thread_id == 0) {
-               t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
-               if (cpu_is_first_core_in_package(cpu_id))
-                       t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
+       if (!cpu_is_not_allowed(cpu_id)) {
+               if (c->base_cpu < 0)
+                       c->base_cpu = t->cpu_id;
+               if (p->base_cpu < 0)
+                       p->base_cpu = t->cpu_id;
        }
 
        c->core_id = core_id;
@@ -6093,59 +6124,64 @@ void allocate_irq_buffers(void)
                err(-1, "calloc %d", topo.max_cpu_num + 1);
 }
 
-void setup_all_buffers(void)
+int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       topo.allowed_cpus++;
+       if ((int)t->cpu_id == c->base_cpu)
+               topo.allowed_cores++;
+       if ((int)t->cpu_id == p->base_cpu)
+               topo.allowed_packages++;
+
+       return 0;
+}
+
+void topology_update(void)
+{
+       topo.allowed_cpus = 0;
+       topo.allowed_cores = 0;
+       topo.allowed_packages = 0;
+       for_all_cpus(update_topo, ODD_COUNTERS);
+}
+void setup_all_buffers(bool startup)
 {
-       topology_probe();
+       topology_probe(startup);
        allocate_irq_buffers();
        allocate_fd_percpu();
        allocate_counters(&thread_even, &core_even, &package_even);
        allocate_counters(&thread_odd, &core_odd, &package_odd);
        allocate_output_buffer();
        for_all_proc_cpus(initialize_counters);
+       topology_update();
 }
 
 void set_base_cpu(void)
 {
-       base_cpu = sched_getcpu();
-       if (base_cpu < 0)
-               err(-ENODEV, "No valid cpus found");
+       int i;
 
-       if (debug > 1)
-               fprintf(outf, "base_cpu = %d\n", base_cpu);
+       for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+               if (cpu_is_not_allowed(i))
+                       continue;
+               base_cpu = i;
+               if (debug > 1)
+                       fprintf(outf, "base_cpu = %d\n", base_cpu);
+               return;
+       }
+       err(-ENODEV, "No valid cpus found");
 }
 
 void turbostat_init()
 {
-       setup_all_buffers();
+       setup_all_buffers(true);
        set_base_cpu();
        check_dev_msr();
        check_permissions();
        process_cpuid();
+       probe_pm_features();
        linux_perf_init();
 
-       if (!quiet)
-               for_all_cpus(print_hwp, ODD_COUNTERS);
-
-       if (!quiet)
-               for_all_cpus(print_epb, ODD_COUNTERS);
-
-       if (!quiet)
-               for_all_cpus(print_perf_limit, ODD_COUNTERS);
-
-       if (!quiet)
-               for_all_cpus(print_rapl, ODD_COUNTERS);
-
-       for_all_cpus(set_temperature_target, ODD_COUNTERS);
-
        for_all_cpus(get_cpu_type, ODD_COUNTERS);
        for_all_cpus(get_cpu_type, EVEN_COUNTERS);
 
-       if (!quiet)
-               for_all_cpus(print_thermal, ODD_COUNTERS);
-
-       if (!quiet && do_irtl_snb)
-               print_irtl();
-
        if (DO_BIC(BIC_IPC))
                (void)get_instr_count_fd(base_cpu);
 }
@@ -6160,8 +6196,6 @@ int fork_it(char **argv)
        first_counter_read = 0;
        if (status)
                exit(status);
-       /* clear affinity side-effect of get_counters() */
-       sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
        gettimeofday(&tv_even, (struct timezone *)NULL);
 
        child_pid = fork();
@@ -6225,7 +6259,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-       fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n");
+       fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048
@@ -6508,9 +6542,6 @@ void probe_sysfs(void)
  */
 void parse_cpu_command(char *optarg)
 {
-       unsigned int start, end;
-       char *next;
-
        if (!strcmp(optarg, "core")) {
                if (cpu_subset)
                        goto error;
@@ -6533,52 +6564,8 @@ void parse_cpu_command(char *optarg)
 
        CPU_ZERO_S(cpu_subset_size, cpu_subset);
 
-       next = optarg;
-
-       while (next && *next) {
-
-               if (*next == '-')       /* no negative cpu numbers */
-                       goto error;
-
-               start = strtoul(next, &next, 10);
-
-               if (start >= CPU_SUBSET_MAXCPUS)
-                       goto error;
-               CPU_SET_S(start, cpu_subset_size, cpu_subset);
-
-               if (*next == '\0')
-                       break;
-
-               if (*next == ',') {
-                       next += 1;
-                       continue;
-               }
-
-               if (*next == '-') {
-                       next += 1;      /* start range */
-               } else if (*next == '.') {
-                       next += 1;
-                       if (*next == '.')
-                               next += 1;      /* start range */
-                       else
-                               goto error;
-               }
-
-               end = strtoul(next, &next, 10);
-               if (end <= start)
-                       goto error;
-
-               while (++start <= end) {
-                       if (start >= CPU_SUBSET_MAXCPUS)
-                               goto error;
-                       CPU_SET_S(start, cpu_subset_size, cpu_subset);
-               }
-
-               if (*next == ',')
-                       next += 1;
-               else if (*next != '\0')
-                       goto error;
-       }
+       if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
+               goto error;
 
        return;
 
@@ -6719,6 +6706,19 @@ void cmdline(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
+       int fd, ret;
+
+       fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
+       if (fd < 0)
+               goto skip_cgroup_setting;
+
+       ret = write(fd, "0\n", 2);
+       if (ret == -1)
+               perror("Can't update cgroup\n");
+
+       close(fd);
+
+skip_cgroup_setting:
        outf = stderr;
        cmdline(argc, argv);
 
index b86cb1049497f39647566dcf03e39a96a69e2d23..587b9464822261df0482c3157c619b075c315b87 100644 (file)
@@ -85,7 +85,7 @@ int main(int argc, char **argv)
         */
        ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
        if (ret >= 0) {
-               ksft_test_result(fork_test(), "fork_test");
+               ksft_test_result(fork_test(), "fork_test\n");
 
        } else {
                ksft_print_msg("SME not supported\n");
index a934d430c20c130e3efbc47560bf7af64a1d1597..a92807bfcd134987417c0ea4254bf34aec1a5615 100644 (file)
@@ -1337,7 +1337,8 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
 }
 
 static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
-                                    int sock_mapfd, int verd_mapfd, enum redir_mode mode)
+                                    int sock_mapfd, int nop_mapfd,
+                                    int verd_mapfd, enum redir_mode mode)
 {
        const char *log_prefix = redir_mode_str(mode);
        unsigned int pass;
@@ -1351,6 +1352,12 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
        if (err)
                return;
 
+       if (nop_mapfd >= 0) {
+               err = add_to_sockmap(nop_mapfd, cli0, cli1);
+               if (err)
+                       return;
+       }
+
        n = write(cli1, "a", 1);
        if (n < 0)
                FAIL_ERRNO("%s: write", log_prefix);
@@ -1387,7 +1394,7 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd,
                goto close0;
        c1 = sfd[0], p1 = sfd[1];
 
-       pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
+       pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
 
        xclose(c1);
        xclose(p1);
@@ -1677,7 +1684,7 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
        if (err)
                goto close_cli0;
 
-       pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
+       pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
 
        xclose(c1);
        xclose(p1);
@@ -1735,7 +1742,7 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
        if (err)
                goto close;
 
-       pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
+       pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
 
        xclose(c1);
        xclose(p1);
@@ -1770,8 +1777,10 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
 }
 
-static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
-                                       int verd_mapfd, enum redir_mode mode)
+static void unix_inet_redir_to_connected(int family, int type,
+                                       int sock_mapfd, int nop_mapfd,
+                                       int verd_mapfd,
+                                       enum redir_mode mode)
 {
        int c0, c1, p0, p1;
        int sfd[2];
@@ -1785,7 +1794,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
                goto close_cli0;
        c1 = sfd[0], p1 = sfd[1];
 
-       pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
+       pairs_redir_to_connected(c0, p0, c1, p1,
+                                sock_mapfd, nop_mapfd, verd_mapfd, mode);
 
        xclose(c1);
        xclose(p1);
@@ -1799,6 +1809,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
                                            struct bpf_map *inner_map, int family)
 {
        int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+       int nop_map = bpf_map__fd(skel->maps.nop_map);
        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
        int sock_map = bpf_map__fd(inner_map);
        int err;
@@ -1808,14 +1819,32 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
                return;
 
        skel->bss->test_ingress = false;
-       unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+       unix_inet_redir_to_connected(family, SOCK_DGRAM,
+                                    sock_map, -1, verdict_map,
                                     REDIR_EGRESS);
-       unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+       unix_inet_redir_to_connected(family, SOCK_DGRAM,
+                                    sock_map, -1, verdict_map,
+                                    REDIR_EGRESS);
+
+       unix_inet_redir_to_connected(family, SOCK_DGRAM,
+                                    sock_map, nop_map, verdict_map,
+                                    REDIR_EGRESS);
+       unix_inet_redir_to_connected(family, SOCK_STREAM,
+                                    sock_map, nop_map, verdict_map,
                                     REDIR_EGRESS);
        skel->bss->test_ingress = true;
-       unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+       unix_inet_redir_to_connected(family, SOCK_DGRAM,
+                                    sock_map, -1, verdict_map,
+                                    REDIR_INGRESS);
+       unix_inet_redir_to_connected(family, SOCK_STREAM,
+                                    sock_map, -1, verdict_map,
+                                    REDIR_INGRESS);
+
+       unix_inet_redir_to_connected(family, SOCK_DGRAM,
+                                    sock_map, nop_map, verdict_map,
                                     REDIR_INGRESS);
-       unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+       unix_inet_redir_to_connected(family, SOCK_STREAM,
+                                    sock_map, nop_map, verdict_map,
                                     REDIR_INGRESS);
 
        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
index 6ee22c3b251ad0d6c9677fda5d46a65820d00d8b..518f143c5b0fec333c33d5c78ecad3cb1b0169dc 100644 (file)
@@ -24,6 +24,7 @@
 
 #include "test_progs.h"
 #include "network_helpers.h"
+#include "netlink_helpers.h"
 #include "test_tc_neigh_fib.skel.h"
 #include "test_tc_neigh.skel.h"
 #include "test_tc_peer.skel.h"
@@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb)
        }
 }
 
+enum dev_mode {
+       MODE_VETH,
+       MODE_NETKIT,
+};
+
 struct netns_setup_result {
-       int ifindex_veth_src;
-       int ifindex_veth_src_fwd;
-       int ifindex_veth_dst;
-       int ifindex_veth_dst_fwd;
+       enum dev_mode dev_mode;
+       int ifindex_src;
+       int ifindex_src_fwd;
+       int ifindex_dst;
+       int ifindex_dst_fwd;
 };
 
 static int get_ifaddr(const char *name, char *ifaddr)
@@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr)
        return 0;
 }
 
+static int create_netkit(int mode, char *prim, char *peer)
+{
+       struct rtattr *linkinfo, *data, *peer_info;
+       struct rtnl_handle rth = { .fd = -1 };
+       const char *type = "netkit";
+       struct {
+               struct nlmsghdr n;
+               struct ifinfomsg i;
+               char buf[1024];
+       } req = {};
+       int err;
+
+       err = rtnl_open(&rth, 0);
+       if (!ASSERT_OK(err, "open_rtnetlink"))
+               return err;
+
+       memset(&req, 0, sizeof(req));
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+       req.n.nlmsg_type = RTM_NEWLINK;
+       req.i.ifi_family = AF_UNSPEC;
+
+       addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
+       linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+       addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+       data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+       addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+       peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
+       req.n.nlmsg_len += sizeof(struct ifinfomsg);
+       addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
+       addattr_nest_end(&req.n, peer_info);
+       addattr_nest_end(&req.n, data);
+       addattr_nest_end(&req.n, linkinfo);
+
+       err = rtnl_talk(&rth, &req.n, NULL);
+       ASSERT_OK(err, "talk_rtnetlink");
+       rtnl_close(&rth);
+       return err;
+}
+
 static int netns_setup_links_and_routes(struct netns_setup_result *result)
 {
        struct nstoken *nstoken = NULL;
-       char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-
-       SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
-       SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
+       char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+       int err;
 
-       SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD);
-       SYS(fail, "ip link set veth_dst address " MAC_DST);
+       if (result->dev_mode == MODE_VETH) {
+               SYS(fail, "ip link add src type veth peer name src_fwd");
+               SYS(fail, "ip link add dst type veth peer name dst_fwd");
+
+               SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
+               SYS(fail, "ip link set dst address " MAC_DST);
+       } else if (result->dev_mode == MODE_NETKIT) {
+               err = create_netkit(NETKIT_L3, "src", "src_fwd");
+               if (!ASSERT_OK(err, "create_ifindex_src"))
+                       goto fail;
+               err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
+               if (!ASSERT_OK(err, "create_ifindex_dst"))
+                       goto fail;
+       }
 
-       if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+       if (get_ifaddr("src_fwd", src_fwd_addr))
                goto fail;
 
-       result->ifindex_veth_src = if_nametoindex("veth_src");
-       if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
+       result->ifindex_src = if_nametoindex("src");
+       if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
                goto fail;
 
-       result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
-       if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
+       result->ifindex_src_fwd = if_nametoindex("src_fwd");
+       if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
                goto fail;
 
-       result->ifindex_veth_dst = if_nametoindex("veth_dst");
-       if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
+       result->ifindex_dst = if_nametoindex("dst");
+       if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
                goto fail;
 
-       result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
-       if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
+       result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
+       if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
                goto fail;
 
-       SYS(fail, "ip link set veth_src netns " NS_SRC);
-       SYS(fail, "ip link set veth_src_fwd netns " NS_FWD);
-       SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD);
-       SYS(fail, "ip link set veth_dst netns " NS_DST);
+       SYS(fail, "ip link set src netns " NS_SRC);
+       SYS(fail, "ip link set src_fwd netns " NS_FWD);
+       SYS(fail, "ip link set dst_fwd netns " NS_FWD);
+       SYS(fail, "ip link set dst netns " NS_DST);
 
        /** setup in 'src' namespace */
        nstoken = open_netns(NS_SRC);
        if (!ASSERT_OK_PTR(nstoken, "setns src"))
                goto fail;
 
-       SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src");
-       SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad");
-       SYS(fail, "ip link set dev veth_src up");
+       SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
+       SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
+       SYS(fail, "ip link set dev src up");
 
-       SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global");
-       SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global");
-       SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global");
+       SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
+       SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
+       SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
 
-       SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s",
-           veth_src_fwd_addr);
-       SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s",
-           veth_src_fwd_addr);
+       if (result->dev_mode == MODE_VETH) {
+               SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
+                   src_fwd_addr);
+               SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
+                   src_fwd_addr);
+       }
 
        close_netns(nstoken);
 
@@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
         * needs v4 one in order to start ARP probing. IP4_NET route is added
         * to the endpoints so that the ARP processing will reply.
         */
-       SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd");
-       SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
-       SYS(fail, "ip link set dev veth_src_fwd up");
-       SYS(fail, "ip link set dev veth_dst_fwd up");
+       SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
+       SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
+       SYS(fail, "ip link set dev src_fwd up");
+       SYS(fail, "ip link set dev dst_fwd up");
 
-       SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
-       SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
-       SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
-       SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+       SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
+       SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
+       SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
+       SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
 
        close_netns(nstoken);
 
@@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
        if (!ASSERT_OK_PTR(nstoken, "setns dst"))
                goto fail;
 
-       SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst");
-       SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad");
-       SYS(fail, "ip link set dev veth_dst up");
+       SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
+       SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
+       SYS(fail, "ip link set dev dst up");
 
-       SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global");
-       SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global");
-       SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global");
+       SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
+       SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
+       SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
 
-       SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
-       SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+       if (result->dev_mode == MODE_VETH) {
+               SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
+               SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
+       }
 
        close_netns(nstoken);
 
@@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog,
                          const struct bpf_program *chk_prog,
                          const struct netns_setup_result *setup_result)
 {
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
        int err;
 
-       /* tc qdisc add dev veth_src_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
-       /* tc filter add dev veth_src_fwd ingress bpf da src_prog */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
-       /* tc filter add dev veth_src_fwd egress bpf da chk_prog */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+       /* tc qdisc add dev src_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+       /* tc filter add dev src_fwd ingress bpf da src_prog */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+       /* tc filter add dev src_fwd egress bpf da chk_prog */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
 
-       /* tc qdisc add dev veth_dst_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-       /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
-       /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
+       /* tc qdisc add dev dst_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+       /* tc filter add dev dst_fwd ingress bpf da dst_prog */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+       /* tc filter add dev dst_fwd egress bpf da chk_prog */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
 
        return 0;
 fail:
@@ -539,10 +600,10 @@ done:
 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
                                const struct netns_setup_result *setup_result)
 {
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
        struct nstoken *nstoken;
        int err;
 
@@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
        nstoken = open_netns(NS_SRC);
        if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
                return -1;
-       /* tc qdisc add dev veth_src clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
-       /* tc filter add dev veth_src ingress bpf da ingress_host */
-       XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
-       /* tc filter add dev veth_src egress bpf da egress_host */
-       XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+       /* tc qdisc add dev src clsact */
+       QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+       /* tc filter add dev src ingress bpf da ingress_host */
+       XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+       /* tc filter add dev src egress bpf da egress_host */
+       XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
        close_netns(nstoken);
 
        /* setup ns_dst tc progs */
        nstoken = open_netns(NS_DST);
        if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
                return -1;
-       /* tc qdisc add dev veth_dst clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
-       /* tc filter add dev veth_dst ingress bpf da ingress_host */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
-       /* tc filter add dev veth_dst egress bpf da egress_host */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+       /* tc qdisc add dev dst clsact */
+       QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
+       /* tc filter add dev dst ingress bpf da ingress_host */
+       XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+       /* tc filter add dev dst egress bpf da egress_host */
+       XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
        close_netns(nstoken);
 
        /* setup ns_fwd tc progs */
        nstoken = open_netns(NS_FWD);
        if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
                return -1;
-       /* tc qdisc add dev veth_dst_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-       /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+       /* tc qdisc add dev dst_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+       /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
                          skel->progs.ingress_fwdns_prio100, 100);
-       /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+       /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
                          skel->progs.ingress_fwdns_prio101, 101);
-       /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+       /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
                          skel->progs.egress_fwdns_prio100, 100);
-       /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+       /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
                          skel->progs.egress_fwdns_prio101, 101);
 
-       /* tc qdisc add dev veth_src_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
-       /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+       /* tc qdisc add dev src_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+       /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
                          skel->progs.ingress_fwdns_prio100, 100);
-       /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+       /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
                          skel->progs.ingress_fwdns_prio101, 101);
-       /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+       /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
                          skel->progs.egress_fwdns_prio100, 100);
-       /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+       /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
                          skel->progs.egress_fwdns_prio101, 101);
        close_netns(nstoken);
        return 0;
@@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
        if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
                return;
 
-       skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-       skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+       skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+       skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
        err = test_tc_dtime__load(skel);
        if (!ASSERT_OK(err, "test_tc_dtime__load"))
@@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
        if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
                goto done;
 
-       skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-       skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+       skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+       skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
        err = test_tc_neigh__load(skel);
        if (!ASSERT_OK(err, "test_tc_neigh__load"))
@@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
        if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
                goto done;
 
-       skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-       skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+       skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+       skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
        err = test_tc_peer__load(skel);
        if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd)
 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 {
        LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
        struct test_tc_peer *skel = NULL;
        struct nstoken *nstoken = NULL;
        int err;
@@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
                goto fail;
 
        skel->rodata->IFINDEX_SRC = ifindex;
-       skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+       skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
        err = test_tc_peer__load(skel);
        if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 
        /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
         * towards dst, and "tc_dst" to redirect packets
-        * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+        * and "tc_chk" on dst_fwd to drop non-redirected packets.
         */
        /* tc qdisc add dev tun_fwd clsact */
        QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
        /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
        XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
 
-       /* tc qdisc add dev veth_dst_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-       /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
-       /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
+       /* tc qdisc add dev dst_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+       /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+       /* tc filter add dev dst_fwd egress bpf da tc_chk */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
 
        /* Setup route and neigh tables */
        SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
@@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
        SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
        SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
 
-       SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+       SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
        SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
            " dev tun_src scope global");
-       SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
-       SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+       SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
+       SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
        SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
            " dev tun_src scope global");
-       SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+       SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
 
-       SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
-       SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+       SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+       SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
 
        if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
                goto fail;
@@ -1106,9 +1167,9 @@ fail:
                close_netns(nstoken);
 }
 
-#define RUN_TEST(name)                                                                      \
+#define RUN_TEST(name, mode)                                                                \
        ({                                                                                  \
-               struct netns_setup_result setup_result;                                     \
+               struct netns_setup_result setup_result = { .dev_mode = mode, };             \
                if (test__start_subtest(#name))                                             \
                        if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
                                if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
@@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg)
 {
        netns_setup_namespaces_nofail("delete");
 
-       RUN_TEST(tc_redirect_peer);
-       RUN_TEST(tc_redirect_peer_l3);
-       RUN_TEST(tc_redirect_neigh);
-       RUN_TEST(tc_redirect_neigh_fib);
-       RUN_TEST(tc_redirect_dtime);
+       RUN_TEST(tc_redirect_peer, MODE_VETH);
+       RUN_TEST(tc_redirect_peer, MODE_NETKIT);
+       RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+       RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
+       RUN_TEST(tc_redirect_neigh, MODE_VETH);
+       RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
+       RUN_TEST(tc_redirect_dtime, MODE_VETH);
        return NULL;
 }
 
index e5c61aa6604ace3fb1b70c22d353a065a4c9162d..5cfa7a6316b63c7ac3e2263ec99f3c9592e038f4 100644 (file)
@@ -31,6 +31,7 @@
 #include "verifier_helper_restricted.skel.h"
 #include "verifier_helper_value_access.skel.h"
 #include "verifier_int_ptr.skel.h"
+#include "verifier_iterating_callbacks.skel.h"
 #include "verifier_jeq_infer_not_null.skel.h"
 #include "verifier_ld_ind.skel.h"
 #include "verifier_ldsx.skel.h"
@@ -139,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces
 void test_verifier_helper_restricted(void)    { RUN(verifier_helper_restricted); }
 void test_verifier_helper_value_access(void)  { RUN(verifier_helper_value_access); }
 void test_verifier_int_ptr(void)              { RUN(verifier_int_ptr); }
+void test_verifier_iterating_callbacks(void)  { RUN(verifier_iterating_callbacks); }
 void test_verifier_jeq_infer_not_null(void)   { RUN(verifier_jeq_infer_not_null); }
 void test_verifier_ld_ind(void)               { RUN(verifier_ld_ind); }
 void test_verifier_ldsx(void)                  { RUN(verifier_ldsx); }
index 4ce76eb064c41c8b886fe7c1f430c54c4b845dd3..d461746fd3c1e7974b69de7e7621abc172b068fb 100644 (file)
@@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data)
        return 0;
 }
 
+static int outer_loop(__u32 index, void *data)
+{
+       bpf_loop(nr_loops, empty_callback, NULL, 0);
+       __sync_add_and_fetch(&hits, nr_loops);
+       return 0;
+}
+
 SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int benchmark(void *ctx)
 {
-       for (int i = 0; i < 1000; i++) {
-               bpf_loop(nr_loops, empty_callback, NULL, 0);
-
-               __sync_add_and_fetch(&hits, nr_loops);
-       }
+       bpf_loop(1000, outer_loop, NULL, 0);
        return 0;
 }
index 76d661b20e87d0db55973ef697e083d5aadf92e0..56c764df8196793155d69967ca1c4a28099d2540 100644 (file)
@@ -33,6 +33,7 @@ int underflow_prog(void *ctx)
        if (!p)
                return 0;
        bpf_for_each_map_elem(&array_map, cb1, &p, 0);
+       bpf_kfunc_call_test_release(p);
        return 0;
 }
 
index 4c39e920dac223c7f2e6e83e80bd083379bbe405..8c0ef2742208ae929293a0aa7f24269d52c7a94a 100644 (file)
@@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx)
                return 0;
        bpf_spin_lock(&lock);
        bpf_rbtree_add(&rbtree, &f->node, rbless);
+       bpf_spin_unlock(&lock);
        return 0;
 }
 
@@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx)
        if (!f)
                return 0;
        bpf_loop(5, subprog_cb_ref, NULL, 0);
+       bpf_obj_drop(f);
        return 0;
 }
 
index e02cfd3807469599eef5e1438ed8167e10b8f205..40df2cc26eaf9d83005756fa8c67fd2e20171847 100644 (file)
@@ -24,9 +24,11 @@ struct task_struct {};
 #define STACK_TABLE_EPOCH_SHIFT 20
 #define STROBE_MAX_STR_LEN 1
 #define STROBE_MAX_CFGS 32
+#define READ_MAP_VAR_PAYLOAD_CAP                                       \
+       ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
 #define STROBE_MAX_PAYLOAD                                             \
        (STROBE_MAX_STRS * STROBE_MAX_STR_LEN +                         \
-       STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+        STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
 
 struct strobe_value_header {
        /*
@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
                                             size_t idx, void *tls_base,
                                             struct strobe_value_generic *value,
                                             struct strobemeta_payload *data,
-                                            void *payload)
+                                            size_t off)
 {
        void *location;
        uint64_t len;
@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
                return 0;
 
        bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
-       len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+       len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
        /*
         * if bpf_probe_read_user_str returns error (<0), due to casting to
         * unsinged int, it will become big number, so next check is
@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
                return 0;
 
        data->str_lens[idx] = len;
-       return len;
+       return off + len;
 }
 
-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
-                                         size_t idx, void *tls_base,
-                                         struct strobe_value_generic *value,
-                                         struct strobemeta_payload *data,
-                                         void *payload)
+static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
+                                            size_t idx, void *tls_base,
+                                            struct strobe_value_generic *value,
+                                            struct strobemeta_payload *data,
+                                            size_t off)
 {
        struct strobe_map_descr* descr = &data->map_descrs[idx];
        struct strobe_map_raw map;
@@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
 
        location = calc_location(&cfg->map_locs[idx], tls_base);
        if (!location)
-               return payload;
+               return off;
 
        bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
        if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
-               return payload;
+               return off;
 
        descr->id = map.id;
        descr->cnt = map.cnt;
@@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
                data->req_meta_valid = 1;
        }
 
-       len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
+       len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
        if (len <= STROBE_MAX_STR_LEN) {
                descr->tag_len = len;
-               payload += len;
+               off += len;
        }
 
 #ifdef NO_UNROLL
@@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
                        break;
 
                descr->key_lens[i] = 0;
-               len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+               len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
                                              map.entries[i].key);
                if (len <= STROBE_MAX_STR_LEN) {
                        descr->key_lens[i] = len;
-                       payload += len;
+                       off += len;
                }
                descr->val_lens[i] = 0;
-               len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+               len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
                                              map.entries[i].val);
                if (len <= STROBE_MAX_STR_LEN) {
                        descr->val_lens[i] = len;
-                       payload += len;
+                       off += len;
                }
        }
 
-       return payload;
+       return off;
 }
 
 #ifdef USE_BPF_LOOP
@@ -455,14 +457,20 @@ struct read_var_ctx {
        struct strobemeta_payload *data;
        void *tls_base;
        struct strobemeta_cfg *cfg;
-       void *payload;
+       size_t payload_off;
        /* value gets mutated */
        struct strobe_value_generic *value;
        enum read_type type;
 };
 
-static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
+static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
 {
+       /* lose precision info for ctx->payload_off, verifier won't track
+        * double xor, barrier_var() is needed to force clang keep both xors.
+        */
+       ctx->payload_off ^= index;
+       barrier_var(ctx->payload_off);
+       ctx->payload_off ^= index;
        switch (ctx->type) {
        case READ_INT_VAR:
                if (index >= STROBE_MAX_INTS)
@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
        case READ_MAP_VAR:
                if (index >= STROBE_MAX_MAPS)
                        return 1;
-               ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
-                                           ctx->value, ctx->data, ctx->payload);
+               if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
+                       return 1;
+               ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
+                                               ctx->value, ctx->data, ctx->payload_off);
                break;
        case READ_STR_VAR:
                if (index >= STROBE_MAX_STRS)
                        return 1;
-               ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
-                                            ctx->value, ctx->data, ctx->payload);
+               if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
+                       return 1;
+               ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
+                                               ctx->value, ctx->data, ctx->payload_off);
                break;
        }
        return 0;
@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task,
        pid_t pid = bpf_get_current_pid_tgid() >> 32;
        struct strobe_value_generic value = {0};
        struct strobemeta_cfg *cfg;
-       void *tls_base, *payload;
+       size_t payload_off;
+       void *tls_base;
 
        cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
        if (!cfg)
@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task,
 
        data->int_vals_set_mask = 0;
        data->req_meta_valid = 0;
-       payload = data->payload;
+       payload_off = 0;
        /*
         * we don't have struct task_struct definition, it should be:
         * tls_base = (void *)task->thread.fsbase;
@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task,
                .tls_base = tls_base,
                .value = &value,
                .data = data,
-               .payload = payload,
+               .payload_off = 0,
        };
        int err;
 
@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task,
        err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
        if (err != STROBE_MAX_MAPS)
                return NULL;
+
+       payload_off = ctx.payload_off;
+       /* this should not really happen, here only to satisfy verifer */
+       if (payload_off > sizeof(data->payload))
+               payload_off = sizeof(data->payload);
 #else
 #ifdef NO_UNROLL
 #pragma clang loop unroll(disable)
@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task,
 #pragma unroll
 #endif /* NO_UNROLL */
        for (int i = 0; i < STROBE_MAX_STRS; ++i) {
-               payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+               payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
        }
 #ifdef NO_UNROLL
 #pragma clang loop unroll(disable)
@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task,
 #pragma unroll
 #endif /* NO_UNROLL */
        for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
-               payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+               payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
        }
 #endif /* USE_BPF_LOOP */
 
@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task,
         * return pointer right after end of payload, so it's possible to
         * calculate exact amount of useful data that needs to be sent
         */
-       return payload;
+       return &data->payload[payload_off];
 }
 
 SEC("raw_tracepoint/kfree_skb")
index 464d35bd57c708a748163d71089ee329a76b56e1..b7250eb9c30cca8f77675e56529e6b780d76ab2e 100644 (file)
@@ -14,6 +14,13 @@ struct {
        __type(value, __u64);
 } sock_map SEC(".maps");
 
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __uint(max_entries, 2);
+       __type(key, __u32);
+       __type(value, __u64);
+} nop_map SEC(".maps");
+
 struct {
        __uint(type, BPF_MAP_TYPE_SOCKHASH);
        __uint(max_entries, 2);
index df7697b94007b12d8958738c1b38d444c10c5eff..c1f55e1d80a426f3b87c2f13a74176f19bb18e28 100644 (file)
@@ -97,4 +97,66 @@ l0_%=:       r2 = r0;                                        \
 "      ::: __clobber_all);
 }
 
+SEC("socket")
+__description("conditional loop (2)")
+__success
+__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11")
+__naked void conditional_loop2(void)
+{
+       asm volatile ("                                 \
+       r9 = 2 ll;                                      \
+       r3 = 0x20 ll;                                   \
+       r4 = 0x35 ll;                                   \
+       r8 = r4;                                        \
+       goto l1_%=;                                     \
+l0_%=: r9 -= r3;                                       \
+       r9 -= r4;                                       \
+       r9 -= r8;                                       \
+l1_%=: r8 += r4;                                       \
+       if r8 < 0x64 goto l0_%=;                        \
+       r0 = r9;                                        \
+       exit;                                           \
+"      ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2")
+__naked void uncond_loop_after_cond_jmp(void)
+{
+       asm volatile ("                                 \
+       r0 = 0;                                         \
+       if r0 > 0 goto l1_%=;                           \
+l0_%=: r0 = 1;                                         \
+       goto l0_%=;                                     \
+l1_%=: exit;                                           \
+"      ::: __clobber_all);
+}
+
+
+__naked __noinline __used
+static unsigned long never_ending_subprog()
+{
+       asm volatile ("                                 \
+       r0 = r1;                                        \
+       goto -1;                                        \
+"      ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+/* infinite loop is detected *after* check_cfg() */
+__failure __msg("infinite loop detected")
+__naked void uncond_loop_in_subprog_after_cond_jmp(void)
+{
+       asm volatile ("                                 \
+       r0 = 0;                                         \
+       if r0 > 0 goto l1_%=;                           \
+l0_%=: r0 += 1;                                        \
+       call never_ending_subprog;                      \
+l1_%=: exit;                                           \
+"      ::: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
new file mode 100644 (file)
index 0000000..5905e03
--- /dev/null
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 8);
+       __type(key, __u32);
+       __type(value, __u64);
+} map SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+       __uint(max_entries, 8);
+} ringbuf SEC(".maps");
+
+struct vm_area_struct;
+struct bpf_map;
+
+struct buf_context {
+       char *buf;
+};
+
+struct num_context {
+       __u64 i;
+       __u64 j;
+};
+
+__u8 choice_arr[2] = { 0, 1 };
+
+static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
+{
+       if (idx == 0) {
+               ctx->buf = (char *)(0xDEAD);
+               return 0;
+       }
+
+       if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
+               return 1;
+
+       return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=fp")
+int unsafe_on_2nd_iter(void *unused)
+{
+       char buf[4];
+       struct buf_context loop_ctx = { .buf = buf };
+
+       bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
+       return 0;
+}
+
+static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i = 0;
+       return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_on_zero_iter(void *unused)
+{
+       struct num_context loop_ctx = { .i = 32 };
+
+       bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
+       return choice_arr[loop_ctx.i];
+}
+
+static int widening_cb(__u32 idx, struct num_context *ctx)
+{
+       ++ctx->i;
+       return 0;
+}
+
+SEC("?raw_tp")
+__success
+int widening(void *unused)
+{
+       struct num_context loop_ctx = { .i = 0, .j = 1 };
+
+       bpf_loop(100, widening_cb, &loop_ctx, 0);
+       /* loop_ctx.j is not changed during callback iteration,
+        * verifier should not apply widening to it.
+        */
+       return choice_arr[loop_ctx.j];
+}
+
+static int loop_detection_cb(__u32 idx, struct num_context *ctx)
+{
+       for (;;) {}
+       return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("infinite loop detected")
+int loop_detection(void *unused)
+{
+       struct num_context loop_ctx = { .i = 0 };
+
+       bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
+       return 0;
+}
+
+static __always_inline __u64 oob_state_machine(struct num_context *ctx)
+{
+       switch (ctx->i) {
+       case 0:
+               ctx->i = 1;
+               break;
+       case 1:
+               ctx->i = 32;
+               break;
+       }
+       return 0;
+}
+
+static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+       return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_for_each_map_elem(void *unused)
+{
+       struct num_context loop_ctx = { .i = 0 };
+
+       bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
+       return choice_arr[loop_ctx.i];
+}
+
+static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
+{
+       return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_ringbuf_drain(void *unused)
+{
+       struct num_context loop_ctx = { .i = 0 };
+
+       bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
+       return choice_arr[loop_ctx.i];
+}
+
+static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
+{
+       return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_find_vma(void *unused)
+{
+       struct task_struct *task = bpf_get_current_task_btf();
+       struct num_context loop_ctx = { .i = 0 };
+
+       bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
+       return choice_arr[loop_ctx.i];
+}
+
+static int iter_limit_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i++;
+       return 0;
+}
+
+SEC("?raw_tp")
+__success
+int bpf_loop_iter_limit_ok(void *unused)
+{
+       struct num_context ctx = { .i = 0 };
+
+       bpf_loop(1, iter_limit_cb, &ctx, 0);
+       return choice_arr[ctx.i];
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
+int bpf_loop_iter_limit_overflow(void *unused)
+{
+       struct num_context ctx = { .i = 0 };
+
+       bpf_loop(2, iter_limit_cb, &ctx, 0);
+       return choice_arr[ctx.i];
+}
+
+static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i += 100;
+       return 0;
+}
+
+static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i += 10;
+       return 0;
+}
+
+static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i += 1;
+       bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
+       bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
+       return 0;
+}
+
+/* Check that path visiting every callback function once had been
+ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
+ * with each decimal digit corresponding to a callback visit marker.
+ */
+SEC("socket")
+__success __retval(111111)
+int bpf_loop_iter_limit_nested(void *unused)
+{
+       struct num_context ctx1 = { .i = 0 };
+       struct num_context ctx2 = { .i = 0 };
+       __u64 a, b, c;
+
+       bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
+       bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
+       a = ctx1.i;
+       b = ctx2.i;
+       /* Force 'ctx1.i' and 'ctx2.i' precise. */
+       c = choice_arr[(a + b) % 2];
+       /* This makes 'c' zero, but neither clang nor verifier know it. */
+       c /= 10;
+       /* Make sure that verifier does not visit 'impossible' states:
+        * enumerate all possible callback visit masks.
+        */
+       if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
+           b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
+               asm volatile ("r0 /= 0;" ::: "r0");
+       return 1000 * a + b + c;
+}
+
+char _license[] SEC("license") = "GPL";
index 5bc86af80a9ad4ea9fa238c6c087655d798e49da..71735dbf33d4f868e32197fe5e6e051da9fa87da 100644 (file)
@@ -75,9 +75,10 @@ l0_%=:       r0 += 1;                                        \
 "      ::: __clobber_all);
 }
 
-SEC("tracepoint")
+SEC("socket")
 __description("bounded loop, start in the middle")
-__failure __msg("back-edge")
+__success
+__failure_unpriv __msg_unpriv("back-edge")
 __naked void loop_start_in_the_middle(void)
 {
        asm volatile ("                                 \
@@ -136,7 +137,9 @@ l0_%=:      exit;                                           \
 
 SEC("tracepoint")
 __description("bounded recursion")
-__failure __msg("back-edge")
+__failure
+/* verifier limitation in detecting max stack depth */
+__msg("the call stack of 8 frames is too deep !")
 __naked void bounded_recursion(void)
 {
        asm volatile ("                                 \
index 193c0f8272d056f562266d891cdfd14c47c011e3..6b564d4c09866a0752fb27e8927ecb5aa96f15a1 100644 (file)
@@ -91,3 +91,43 @@ __naked int bpf_end_bswap(void)
 }
 
 #endif /* v4 instruction */
+
+SEC("?raw_tp")
+__success __log_level(2)
+/*
+ * Without the bug fix there will be no history between "last_idx 3 first_idx 3"
+ * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * expected log messages to the one specific mark_chain_precision operation.
+ *
+ * This is quite fragile: if verifier checkpointing heuristic changes, this
+ * might need adjusting.
+ */
+__msg("2: (07) r0 += 1                       ; R0_w=6")
+__msg("3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: parent state regs= stack=:  R0_rw=P4")
+__msg("3: R0_w=6")
+__naked int state_loop_first_last_equal(void)
+{
+       asm volatile (
+               "r0 = 0;"
+       "l0_%=:"
+               "r0 += 1;"
+               "r0 += 1;"
+               /* every few iterations we'll have a checkpoint here with
+                * first_idx == last_idx, potentially confusing precision
+                * backtracking logic
+                */
+               "if r0 >= 10 goto l1_%=;"       /* checkpoint + mark_precise */
+               "goto l0_%=;"
+       "l1_%=:"
+               "exit;"
+               ::: __clobber_common
+       );
+}
+
+char _license[] SEC("license") = "GPL";
index db6b3143338b613c8062ff519068abaa26f2234e..f61d623b1ce8dfe1c4b1355c036e2b74e71d7227 100644 (file)
@@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body,
+ * r1 and r4 are always precise for bpf_loop() calls.
+ */
+__msg("9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r4 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r1 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* r6 precision propagation */
 __msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: last_idx 14 first_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
 __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
-__msg("mark_precise: frame0: parent state regs=r0 stack=:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit")
+__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+__msg("frame 0: propagating r1,r4")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
+__msg("from 18 to 9: safe")
 __naked int callback_result_precise(void)
 {
        asm volatile (
@@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body */
 __msg("12: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 12 first_idx 10")
+__msg("mark_precise: frame0: last_idx 12 first_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
 __msg("mark_precise: frame0: parent state regs=r6 stack=:")
-__msg("mark_precise: frame0: last_idx 16 first_idx 0")
-__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
 __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
 __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
 __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
 __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 15: frame1:")
+__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("15: (b7) r0 = 0")
+__msg("16: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+/* r1, r4 are always precise for bpf_loop(),
+ * r6 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 16 to 9: safe")
 __naked int parent_callee_saved_reg_precise_with_callback(void)
 {
        asm volatile (
@@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body */
 __msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 11")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
 __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
 __msg("mark_precise: frame0: parent state regs= stack=-8:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
 __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
 __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
 __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
 __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
 __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
 __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 10 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 10:")
+/* r1, r4 are always precise for bpf_loop(),
+ * fp-8 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,fp-8")
+__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 18 to 10: safe")
 __naked int parent_stack_slot_precise_with_callback(void)
 {
        asm volatile (
index e959336c7a7304be409ffac7d3a34f64538d5f74..80f620602d50ffc1e4598e6c5e45c5dfa4880412 100644 (file)
@@ -53,6 +53,8 @@
 #define DEFAULT_TTL 64
 #define MAX_ALLOWED_PORTS 8
 
+#define MAX_PACKET_OFF 0xffff
+
 #define swap(a, b) \
        do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
 
@@ -183,63 +185,76 @@ static __always_inline __u32 tcp_clock_ms(void)
 }
 
 struct tcpopt_context {
-       __u8 *ptr;
-       __u8 *end;
+       void *data;
        void *data_end;
        __be32 *tsecr;
        __u8 wscale;
        bool option_timestamp;
        bool option_sack;
+       __u32 off;
 };
 
-static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
 {
-       __u8 opcode, opsize;
+       __u64 off = ctx->off;
+       __u8 *data;
 
-       if (ctx->ptr >= ctx->end)
-               return 1;
-       if (ctx->ptr >= ctx->data_end)
-               return 1;
+       /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+       if (off > MAX_PACKET_OFF - sz)
+               return NULL;
 
-       opcode = ctx->ptr[0];
+       data = ctx->data + off;
+       barrier_var(data);
+       if (data + sz >= ctx->data_end)
+               return NULL;
 
-       if (opcode == TCPOPT_EOL)
-               return 1;
-       if (opcode == TCPOPT_NOP) {
-               ++ctx->ptr;
-               return 0;
-       }
+       ctx->off += sz;
+       return data;
+}
 
-       if (ctx->ptr + 1 >= ctx->end)
-               return 1;
-       if (ctx->ptr + 1 >= ctx->data_end)
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+       __u8 *opcode, *opsize, *wscale, *tsecr;
+       __u32 off = ctx->off;
+
+       opcode = next(ctx, 1);
+       if (!opcode)
                return 1;
-       opsize = ctx->ptr[1];
-       if (opsize < 2)
+
+       if (*opcode == TCPOPT_EOL)
                return 1;
+       if (*opcode == TCPOPT_NOP)
+               return 0;
 
-       if (ctx->ptr + opsize > ctx->end)
+       opsize = next(ctx, 1);
+       if (!opsize || *opsize < 2)
                return 1;
 
-       switch (opcode) {
+       switch (*opcode) {
        case TCPOPT_WINDOW:
-               if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
-                       ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
+               wscale = next(ctx, 1);
+               if (!wscale)
+                       return 1;
+               if (*opsize == TCPOLEN_WINDOW)
+                       ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
                break;
        case TCPOPT_TIMESTAMP:
-               if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
+               tsecr = next(ctx, 4);
+               if (!tsecr)
+                       return 1;
+               if (*opsize == TCPOLEN_TIMESTAMP) {
                        ctx->option_timestamp = true;
                        /* Client's tsval becomes our tsecr. */
-                       *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
+                       *ctx->tsecr = get_unaligned((__be32 *)tsecr);
                }
                break;
        case TCPOPT_SACK_PERM:
-               if (opsize == TCPOLEN_SACK_PERM)
+               if (*opsize == TCPOLEN_SACK_PERM)
                        ctx->option_sack = true;
                break;
        }
 
-       ctx->ptr += opsize;
+       ctx->off = off + *opsize;
 
        return 0;
 }
@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
 
 static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
                                          __u16 tcp_len, __be32 *tsval,
-                                         __be32 *tsecr, void *data_end)
+                                         __be32 *tsecr, void *data, void *data_end)
 {
        struct tcpopt_context loop_ctx = {
-               .ptr = (__u8 *)(tcp_header + 1),
-               .end = (__u8 *)tcp_header + tcp_len,
+               .data = data,
                .data_end = data_end,
                .tsecr = tsecr,
                .wscale = TS_OPT_WSCALE_MASK,
                .option_timestamp = false,
                .option_sack = false,
+               /* Note: currently verifier would track .off as unbound scalar.
+                *       In case if verifier would at some point get smarter and
+                *       compute bounded value for this var, beware that it might
+                *       hinder bpf_loop() convergence validation.
+                */
+               .off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
        };
        u32 cookie;
 
@@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
        cookie = (__u32)value;
 
        if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
-                         &tsopt_buf[0], &tsopt_buf[1], data_end))
+                         &tsopt_buf[0], &tsopt_buf[1], data, data_end))
                tsopt = tsopt_buf;
 
        /* Check that there is enough space for a SYNACK. It also covers
index 1bdf2b43e49eaf31395ab93177e77b71be36ec2d..3d5cd51071f04725f4043d5aa83848c58602c9d5 100644 (file)
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-       .errstr = "back-edge from insn 0 to 0",
+       .errstr = "the call stack of 9 frames is too deep",
        .result = REJECT,
 },
 {
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-       .errstr = "back-edge",
+       .errstr = "the call stack of 9 frames is too deep",
        .result = REJECT,
 },
 {
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-       .errstr = "back-edge",
+       .errstr = "the call stack of 9 frames is too deep",
        .result = REJECT,
 },
 {
index f9297900cea6d44ecd64cf00f3f60099bb17a70a..78f19c255f20b466d4f022029217da87260a4800 100644 (file)
@@ -9,8 +9,8 @@
        BPF_MOV64_IMM(BPF_REG_0, 2),
        BPF_EXIT_INSN(),
        },
-       .errstr = "invalid BPF_LD_IMM insn",
-       .errstr_unpriv = "R1 pointer comparison",
+       .errstr = "jump into the middle of ldimm64 insn 1",
+       .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
        .result = REJECT,
 },
 {
@@ -23,8 +23,8 @@
        BPF_LD_IMM64(BPF_REG_0, 1),
        BPF_EXIT_INSN(),
        },
-       .errstr = "invalid BPF_LD_IMM insn",
-       .errstr_unpriv = "R1 pointer comparison",
+       .errstr = "jump into the middle of ldimm64 insn 1",
+       .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
        .result = REJECT,
 },
 {
index 591ca9637b23e883cacea7293070a546824c2d81..b604c570309a7fb41c2edfd2f7f3164ec523f870 100644 (file)
@@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
        struct xdp_info *meta = data - sizeof(struct xdp_info);
 
        if (meta->count != pkt->pkt_nb) {
-               ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
-                              __func__, pkt->pkt_nb, meta->count);
+               ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+                              __func__, pkt->pkt_nb,
+                              (unsigned long long)meta->count);
                return false;
        }
 
@@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp
 
        if (addr >= umem->num_frames * umem->frame_size ||
            addr + len > umem->num_frames * umem->frame_size) {
-               ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
+               ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+                              (unsigned long long)addr, len);
                return false;
        }
        if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
-               ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len);
+               ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+                              (unsigned long long)addr, len);
                return false;
        }
 
@@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
                        u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
 
                        ksft_print_msg("[%s] Too many packets completed\n", __func__);
-                       ksft_print_msg("Last completion address: %llx\n", addr);
+                       ksft_print_msg("Last completion address: %llx\n",
+                                      (unsigned long long)addr);
                        return TEST_FAILURE;
                }
 
@@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
        }
 
        if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
-               ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
-                              __func__, stats.tx_invalid_descs,
+               ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+                              __func__,
+                              (unsigned long long)stats.tx_invalid_descs,
                               ifobject->xsk->pkt_stream->nb_pkts);
                return TEST_FAILURE;
        }
index a5963ab9215b9493b5da03f8c3656aa1e07a34d6..52c59bad721395f9ad6644dcc3b5b3d7aeb2f62c 100644 (file)
@@ -18,12 +18,13 @@ else
 endif
 
 ifeq ($(ARCH),arm64)
-arm64_tools_dir := $(top_srcdir)/tools/arch/arm64/tools/
+tools_dir := $(top_srcdir)/tools
+arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
 GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/
 CFLAGS += -I$(GEN_HDRS)
 
 $(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
-       $(MAKE) -C $(arm64_tools_dir)
+       $(MAKE) -C $(arm64_tools_dir) O=$(tools_dir)
 endif
 
 LIBKVM += lib/assert.c
index cc920c79ff1c32e9a27a714c29d255a95f189fd0..4ff10ea61461796385159ae8b6fd88ae183bbbcd 100644 (file)
@@ -45,3 +45,4 @@ mdwe_test
 gup_longterm
 mkdirty
 va_high_addr_switch
+hugetlb_fault_after_madv
index 0161fb49fc6ef1dbbc712f472f940ab959d5301d..befab43719badff842fd771eb52f54fbc98b510c 100644 (file)
@@ -94,19 +94,19 @@ int init_uffd(void)
 
        uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
        if (uffd == -1)
-               ksft_exit_fail_msg("uffd syscall failed\n");
+               return uffd;
 
        uffdio_api.api = UFFD_API;
        uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
                              UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
        if (ioctl(uffd, UFFDIO_API, &uffdio_api))
-               ksft_exit_fail_msg("UFFDIO_API\n");
+               return -1;
 
        if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
            !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
            !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
            !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
-               ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api);
+               return -1;
 
        return 0;
 }
@@ -1151,7 +1151,7 @@ int sanity_tests(void)
        /* 9. Memory mapped file */
        fd = open(__FILE__, O_RDONLY);
        if (fd < 0)
-               ksft_exit_fail_msg("%s Memory mapped file\n");
+               ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
 
        ret = stat(__FILE__, &sbuf);
        if (ret < 0)
@@ -1159,7 +1159,7 @@ int sanity_tests(void)
 
        fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
        if (fmem == MAP_FAILED)
-               ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+               ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
        tmp_buf = malloc(sbuf.st_size);
        memcpy(tmp_buf, fmem, sbuf.st_size);
@@ -1189,7 +1189,7 @@ int sanity_tests(void)
 
        fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
        if (fmem == MAP_FAILED)
-               ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+               ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
        wp_init(fmem, buf_size);
        wp_addr_range(fmem, buf_size);
@@ -1479,6 +1479,10 @@ int main(void)
        struct stat sbuf;
 
        ksft_print_header();
+
+       if (init_uffd())
+               return ksft_exit_pass();
+
        ksft_set_plan(115);
 
        page_size = getpagesize();
@@ -1488,9 +1492,6 @@ int main(void)
        if (pagemap_fd < 0)
                return -EINVAL;
 
-       if (init_uffd())
-               ksft_exit_fail_msg("uffd init failed\n");
-
        /* 1. Sanity testing */
        sanity_tests_sd();
 
@@ -1595,7 +1596,7 @@ int main(void)
 
        fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
        if (fmem == MAP_FAILED)
-               ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+               ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
        wp_init(fmem, sbuf.st_size);
        wp_addr_range(fmem, sbuf.st_size);
@@ -1623,7 +1624,7 @@ int main(void)
 
        fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
        if (fmem == MAP_FAILED)
-               ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+               ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
        wp_init(fmem, buf_size);
        wp_addr_range(fmem, buf_size);
index cc16f6ca85333225004f06d5e7083700dda1d8c9..00757445278eda9f4e174aa176aa69b05dbeea5e 100755 (executable)
@@ -223,9 +223,12 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap
 CATEGORY="hugetlb" run_test ./hugepage-vmemmap
 CATEGORY="hugetlb" run_test ./hugetlb-madvise
 
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
 # For this test, we need one and just one huge page
 echo 1 > /proc/sys/vm/nr_hugepages
 CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+# Restore the previous number of huge pages, since further tests rely on it
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
 
 if test_selected "hugetlb"; then
        echo "NOTE: These hugetlb tests provide minimal coverage.  Use"
index 5b88f7129fea4428575fbec448b9272854cbfbd6..79a3dd75590e89226b128a85c47f62e672535562 100644 (file)
@@ -148,7 +148,6 @@ void receive_response(struct __test_metadata *_metadata,
                .msg_iov = &iov,
                .msg_iovlen = 1
        };
-       struct unix_diag_req *udr;
        struct nlmsghdr *nlh;
        int ret;
 
index 24b21b15ed3fb05ba677328c4d68a258d9819169..6ff3e732f449f949add1bae24657c31e8dba493f 100644 (file)
@@ -416,9 +416,9 @@ int main(int argc, char *argv[])
 {
        struct addrinfo hints, *ai;
        struct iovec iov[1];
+       unsigned char *buf;
        struct msghdr msg;
        char cbuf[1024];
-       char *buf;
        int err;
        int fd;
 
index 9a8229abfa026a73bc20d586b70bdd1ce5cc4079..be4a30a0d02aef94e2f7dda392a7dc029bec9c68 100644 (file)
@@ -2263,7 +2263,7 @@ static int check_results(void)
 
 int main(int argc, char **argv)
 {
-       unsigned int nr_process = 1;
+       long nr_process = 1;
        int route_sock = -1, ret = KSFT_SKIP;
        int test_desc_fd[2];
        uint32_t route_seq;
@@ -2284,7 +2284,7 @@ int main(int argc, char **argv)
                        exit_usage(argv);
                }
 
-               if (nr_process > MAX_PROCESSES || !nr_process) {
+               if (nr_process > MAX_PROCESSES || nr_process < 1) {
                        printk("nr_process should be between [1; %u]",
                                        MAX_PROCESSES);
                        exit_usage(argv);
index c7f9ebeebc2c5be3b30d6bbdab2313bb2e38fa54..d2043ec3bf6d69d8d3aba159406ec6391b8e401f 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <sys/ioctl.h>
 #include <sys/poll.h>
+#include <sys/random.h>
 #include <sys/sendfile.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
@@ -1125,15 +1126,11 @@ again:
 
 static void init_rng(void)
 {
-       int fd = open("/dev/urandom", O_RDONLY);
        unsigned int foo;
 
-       if (fd > 0) {
-               int ret = read(fd, &foo, sizeof(foo));
-
-               if (ret < 0)
-                       srand(fd + foo);
-               close(fd);
+       if (getrandom(&foo, sizeof(foo), 0) == -1) {
+               perror("getrandom");
+               exit(1);
        }
 
        srand(foo);
index 8672d898f8cdad5dd146460089ab03ed376b41d7..218aac46732125c005212d0d561ae3bce8f91a74 100644 (file)
@@ -18,6 +18,7 @@
 #include <time.h>
 
 #include <sys/ioctl.h>
+#include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -519,15 +520,11 @@ static int client(int unixfd)
 
 static void init_rng(void)
 {
-       int fd = open("/dev/urandom", O_RDONLY);
        unsigned int foo;
 
-       if (fd > 0) {
-               int ret = read(fd, &foo, sizeof(foo));
-
-               if (ret < 0)
-                       srand(fd + foo);
-               close(fd);
+       if (getrandom(&foo, sizeof(foo), 0) == -1) {
+               perror("getrandom");
+               exit(1);
        }
 
        srand(foo);
index 75a2438efdf3737c038f947075f3705ee6042d42..3c94f2f194d68188d8a0002779ba40c2ab0ef0eb 100755 (executable)
@@ -3240,7 +3240,7 @@ fastclose_tests()
        if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
                test_linkfail=1024 fastclose=server \
                        run_tests $ns1 $ns2 10.0.1.1
-               chk_join_nr 0 0 0
+               chk_join_nr 0 0 0 0 0 0 1
                chk_fclose_nr 1 1 invert
                chk_rst_nr 1 1
        fi
index 5f2b3f6c0d74991372726d3c8b5f1a28c3d9d27e..38be9706c45f18e410d12694a43696c09a47c211 100755 (executable)
@@ -859,7 +859,7 @@ kci_test_gretap()
 
 
        run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-       run_cmd ip -netns "$testns" link set dev $DEV_NS ups
+       run_cmd ip -netns "$testns" link set dev $DEV_NS up
        run_cmd ip -netns "$testns" link del "$DEV_NS"
 
        # test external mode
index 5b0e93f9996cb18cc390d33645132532ab6bb85c..01fa816868bc4c2bc4659663e0f7e15bc79090c3 100644 (file)
@@ -353,11 +353,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
 }
 
 #define SOCK_BUF_SIZE (2 * 1024 * 1024)
-#define MAX_MSG_SIZE (32 * 1024)
+#define MAX_MSG_PAGES 4
 
 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 {
        unsigned long curr_hash;
+       size_t max_msg_size;
        int page_size;
        int msg_count;
        int fd;
@@ -373,7 +374,8 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 
        curr_hash = 0;
        page_size = getpagesize();
-       msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE;
+       max_msg_size = MAX_MSG_PAGES * page_size;
+       msg_count = SOCK_BUF_SIZE / max_msg_size;
 
        for (int i = 0; i < msg_count; i++) {
                size_t buf_size;
@@ -383,7 +385,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
                /* Use "small" buffers and "big" buffers. */
                if (i & 1)
                        buf_size = page_size +
-                                       (rand() % (MAX_MSG_SIZE - page_size));
+                                       (rand() % (max_msg_size - page_size));
                else
                        buf_size = 1 + (rand() % page_size);
 
@@ -429,7 +431,6 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
        unsigned long remote_hash;
        unsigned long curr_hash;
        int fd;
-       char buf[MAX_MSG_SIZE];
        struct msghdr msg = {0};
        struct iovec iov = {0};
 
@@ -457,8 +458,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
        control_writeln("SRVREADY");
        /* Wait, until peer sends whole data. */
        control_expectln("SENDDONE");
-       iov.iov_base = buf;
-       iov.iov_len = sizeof(buf);
+       iov.iov_len = MAX_MSG_PAGES * getpagesize();
+       iov.iov_base = malloc(iov.iov_len);
+       if (!iov.iov_base) {
+               perror("malloc");
+               exit(EXIT_FAILURE);
+       }
+
        msg.msg_iov = &iov;
        msg.msg_iovlen = 1;
 
@@ -483,6 +489,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
                curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size);
        }
 
+       free(iov.iov_base);
        close(fd);
        remote_hash = control_readulong();